I am trying to collect activity histories in a Sharepoint list such as comments and dates created, etc. but I wasn't able to find the way to achieve it.
Would there be a way to collect those data?
Below is the code that I am using.
import json
import os
import urllib.parse
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
config_path = '\\'.join([ROOT_DIR, 'config.json'])
#read jason config file
with open(config_path) as config_file:
config = json.load(config_file)
config = config['share_point']
USERNAME = config[ 'user']
PASSWORD = config[ 'password' ]
sharepoint_url = config['url']
sharepoint_site = config['site']
sharepoint_list = config[ 'list']
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.client_context import ClientContext
ctx = ClientContext(sharepoint_site).with_credentials(UserCredential(USERNAME, PASSWORD))
web = ctx.web.lists.get_by_title('Test')
items = web.items.paged(True).top(200)
ctx.load(items)
ctx.execute_query()
contents = items.properties
print(contents)
The attached image shows the items that I would like to collect.
Sharepoint List Activities
Related
I have a test.txt on my Desktop and now I want to upload it to a Sharepoint Directory via Python3. How can I do that?
I'll start by saying this example is adapted from the example for Office365-REST-Python-Client. It works with sharepoint online using the rest api.
https://github.com/vgrem/Office365-REST-Python-Client/blob/master/examples/sharepoint/files/upload_file.py
Example url you might want to upload to [baseurl][site][folder][file]. https://your_company.sharepoint.com/path/to/site/Shared Documents/file.txt
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
baseurl = 'https://your_company.sharepoint.com'
basesite = '/path/to/site' # every share point has a home.
siteurl = baseurl + basesite
localpath = ./file.txt
remotepath = Shared Documents/file.txt # existing folder path under sharepoint site.
ctx_auth = AuthenticationContext(url)
ctx_auth.acquire_token_for_user(username, password)
ctx = ClientContext(siteurl, ctx_auth) # make sure you auth to the siteurl.
with open(localpath, 'rb') as content_file:
file_content = content_file.read()
dir, name = os.path.split(remotepath)
file = ctx.web.get_folder_by_server_relative_url(dir).upload_file(name, file_content).execute_query()```
I am trying to use the sharepoint api to download files within folders at the location "folder_in_sp" below. I've seem similar questions asked on here before but none are working for me. I am using the following code.
from office365.runtime.auth.client_credential import ClientCredential
from office365.runtime.client_request_exception import ClientRequestException
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import io
import datetime
import pandas as pd
import keyring
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.files.file_system_object_type import FileSystemObjectType
username = 'MY USERNAME'
site_url = "https://ORG_NAME.sharepoint.com/"
folder_in_sp = "/a very long/folder path/with lots of spaces/and numbers/Leading to/A Final/folder"
ctx = ClientContext(site_url).with_credentials(UserCredential(username, keyring.get_password("Sharepoint API", username))) # Use keyring to access your password for user credentials
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("You're in! Web title: {0}".format(web.properties['Title']))
def folder_details(ctx, folder_in_sharepoint):
folder = ctx.web.get_folder_by_server_relative_url(folder_in_sharepoint)
fold_names = []
sub_folders = folder.files
ctx.load(sub_folders)
ctx.execute_query()
for s_folder in sub_folders:
fold_names.append(s_folder.properties["Name"])
return fold_names
file_list = folder_details(ctx, folder_in_sp)
I am trying to access the folders within the final folder at that location. Sometimes there will be multiple folders I need to access to download a single file within each of them. I have tried replacing spaces the folder_in_sp path with "%20" as they are formatted online but that has not worked either.
Any help would be appreciated!
Thanks in advance.
In Python I am utilizing Office 365 REST Python Client library to access and read an excel workbook that contains many sheets.
While the authentication is successful, I am unable to append the right path of sheet name to the file name in order to access the 1st or 2nd worksheet by its name, which is why the output from the sheet is not JSON, rather IO Bytes which my code is not able to process.
My end goal is to simply access the specific work sheet by its name 'employee_list' and transform it into JSON or Pandas Data frame for further usage.
Code snippet below -
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc#a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/sites/SAMPLE/_layouts/15/Doc.aspx?OR=teams&action=edit&sourcedoc={739271873}'
# HOW TO ACCESS WORKSHEET BY ITS NAME IN ABOVE LINE
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions("{0}/_api/web/".format(site_url))
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content) # ERROR ENCOUNTERED JSON DECODE ERROR SINCE DATA IS IN BYTES
You can access it by sheet index, check the following code....
import xlrd
loc = ("File location")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
print(sheet.cell_value(1, 0))
You can try to add the component 'sheetname' to the url like so.
https://site/lib/workbook.xlsx#'Sheet1'!A1
It seems that URL constructed to access data is not correct. You should test full URL in your browser as working and then modify code to get going. You may try this with some changes, I have verified that URL formed with this logic would return JSON data.
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc#a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/_vti_bin/ExcelRest.aspx/RootFolder/ExcelFileName.xlsx/Model/Ranges('employee_list!A1%7CA10')?$format=json'
# Replace RootFolder/ExcelFileName.xlsx with actual path of excel file from the root.
# Replace A1 and A10 with actual start and end of cell range.
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions(site_url)
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content)
Source: https://learn.microsoft.com/en-us/sharepoint/dev/general-development/sample-uri-for-excel-services-rest-api
The update I'm using (Office365-REST-Python-Client==2.3.11) allows simpler access to an Excel file in the SharePoint repository.
# from original_question import pd,\
# username,\
# password,\
# UserCredential,\
# File,\
# BytesIO
user_credentials = UserCredential(user_name=username,
password=password)
file_url = ('https://sample.sharepoint.com'
'/sites/SAMPLE/{*recursive_folders}'
'/sample_worksheet.xlsx')
## absolute path of excel file on SharePoint
excel_file = BytesIO()
## initiating binary object
excel_file_online = File.from_url(abs_url=file_url)
## requesting file from SharePoint
excel_file_online = excel_file_online.with_credentials(
credentials=user_credentials)
## validating file with accessible credentials
excel_file_online.download(file_object=excel_file).execute_query()
## writing binary response of the
## file request into bytes object
We now have a binary copy of the Excel file as BytesIO named excel_file. Progressing, reading it as pd.DataFrame is straight-forward like usual Excel file stored in local drive. Eg.:
pd.read_excel(excel_file) # -> pd.DataFrame
Hence, if you are interested in a specific sheet like 'employee_list', you may preferably read it as
employee_list = pd.read_excel(excel_file,
sheet_name='employee_list')
# -> pd.DataFrame
or
data = pd.read_excel(excel_file,
sheet_name=None) # -> dict
employee_list = data.get('employee_list')
# -> [pd.DataFrame, None]
I know you stated you can't use a BytesIO object, but for those coming here who are reading the file in as a BytesIO object like I was looking for, you can use the sheet_name arg in pd.read_excel:
url = "https://sharepoint.site.com/sites/MySite/MySheet.xlsx"
sheet_name = 'Sheet X'
response = File.open_binary(ctx, relative_url)
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0)
df = pd.read_excel(bytes_file_obj, sheet_name = sheet_name) //call sheet name
When I retrieve csv files on Google Drive via api, I get files with no contents.
The code below consists of 3 parts (1: authenticate 2: search for files, 3: download files).
I suspect there is something wrong in step3: download files specifically around while done is False because I have no problem accessing Google Drive and download files. It's just that they are all empty files.
It would be great if someone can show me how I can fix it.
Codes below are mostly borrowed from Google website. Thank you for your time in advance!
Step 1: Authentication
from apiclient import discovery
from httplib2 import Http
import oauth2client
from oauth2client import file, client, tools
obj = lambda: None # this code allows for an empty class
auth = {"auth_host_name":'localhost', 'noauth_local_webserver':'store_true', 'auth_host_port':[8080, 8090], 'logging_level':'ERROR'}
for k, v in auth.items():
setattr(obj, k, v)
scopes = 'https://www.googleapis.com/auth/drive'
store = file.Storage('token_google_drive2.json')
creds = store.get()
# The following will takes a user to authentication link if no token file is found.
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_id.json', scopes)
creds = tools.run_flow(flow, store, obj)
Step 2: Search for files and create a dictionary of files to download
from googleapiclient.discovery import build
page_token = None
drive_service = build('drive', 'v3', credentials=creds)
while True:
name_list = []
id_list = []
response = drive_service.files().list(q="mimeType='text/csv' and name contains 'RR' and name contains '20191001'", spaces='drive',fields='nextPageToken, files(id, name)', pageToken=page_token).execute()
for file in response.get('files', []):
name = file.get('name')
id_ = file.get('id')
#name and id are strings, so create list first before creating a dictionary
name_list.append(name)
id_list.append(id_)
#also you need to remove ":" in name_list or you cannot download files - nowhere to be found in the folder!
name_list = [word.replace(':','') for word in name_list]
page_token = response.get('nextPageToken', None)
if page_token is None:
break
#### Create dictionary using name_list and id_list
zipobj = zip(name_list, id_list)
temp_dic = dict(zipobj)
Step 3: Download Files (the troublesome part)
import io
from googleapiclient.http import MediaIoBaseDownload
for i in range(len(temp_dic.values())):
file_id = list(temp_dic.values())[i]
v = list(temp_dic.keys())[i]
request = drive_service.files().get_media(fileId=file_id)
fh = io.FileIO(v, mode='w')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
status_complete = int(status.progress()*100)
print(f'Download of {len(temp_dic.values())} files, {int(status.progress()*100)}%')
Actually I figured out myself. Below is an edit.
All I needed to do was delete done = False
while done is False: and add fh.close() to close the downloader.
The complete revised part 3 is as follows:
from googleapiclient.http import MediaIoBaseDownload
for i in range(len(temp_dic.values())):
file_id = list(temp_dic.values())[i]
v = list(temp_dic.keys())[i]
request = drive_service.files().get_media(fileId=file_id)
# replace the filename and extension in the first field below
fh = io.FileIO(v, mode='wb') #only in Windows, writing for binary is specified with wb
downloader = MediaIoBaseDownload(fh, request)
status, done = downloader.next_chunk()
status_complete = int(status.progress()*100)
print(f'{list(temp_dic.keys())[i]} is {int(status.progress()*100)}% downloaded')
fh.close()
print(f'{len(list(temp_dic.keys()))} files')
I am trying to change my code to support video processing from multiple sites (youtube, vimeo, etc.) using the youtube extractions. I don't want to import youtube-dl (unless necessary). I would prefer to call a function. my understanding is that this: youtube-dl http://vimeo.com/channels/YOUR-CHANNEL) is a command line tool. please help!
import pymongo
import get_media
import configparser as ConfigParser
# shorten list to first 10 items
def shorten_list(mylist):
return mylist[:10]
def main():
config = ConfigParser.ConfigParser()
config.read('settings.cfg')
youtubedl_filename = config.get('media', 'youtubedl_input')
print('creating file: %s - to be used as input for youtubedl' % youtubedl_filename)
db = get_media.connect_to_media_db()
items = db.raw
url_list = []
cursor = items.find()
records = dict((record['_id'], record) for record in cursor)
# iterate through records in media items collection
# if 'Url' field exists and starts with youtube, add url to list
for item in records:
item_dict = records[item]
#print(item_dict)
if 'Url' in item_dict['Data']:
url = item_dict['Data']['Url']
if url.startswith('https://www.youtube.com/'):
url_list.append(url)
# for testing purposes
# shorten list to only download a few files at a time
url_list = shorten_list(url_list)
# save list of youtube media file urls
with open(youtubedl_filename, 'w') as f:
for url in url_list:
f.write(url+'\n')
if __name__ == "__main__":
main()