I am trying to use the sharepoint api to download files within folders at the location "folder_in_sp" below. I've seem similar questions asked on here before but none are working for me. I am using the following code.
from office365.runtime.auth.client_credential import ClientCredential
from office365.runtime.client_request_exception import ClientRequestException
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import io
import datetime
import pandas as pd
import keyring
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.files.file_system_object_type import FileSystemObjectType
username = 'MY USERNAME'
site_url = "https://ORG_NAME.sharepoint.com/"
folder_in_sp = "/a very long/folder path/with lots of spaces/and numbers/Leading to/A Final/folder"
ctx = ClientContext(site_url).with_credentials(UserCredential(username, keyring.get_password("Sharepoint API", username))) # Use keyring to access your password for user credentials
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("You're in! Web title: {0}".format(web.properties['Title']))
def folder_details(ctx, folder_in_sharepoint):
folder = ctx.web.get_folder_by_server_relative_url(folder_in_sharepoint)
fold_names = []
sub_folders = folder.files
ctx.load(sub_folders)
ctx.execute_query()
for s_folder in sub_folders:
fold_names.append(s_folder.properties["Name"])
return fold_names
file_list = folder_details(ctx, folder_in_sp)
I am trying to access the folders within the final folder at that location. Sometimes there will be multiple folders I need to access to download a single file within each of them. I have tried replacing spaces the folder_in_sp path with "%20" as they are formatted online but that has not worked either.
Any help would be appreciated!
Thanks in advance.
Related
I am trying to read an excel file from SharePoint to python and I get the following error:
ValueError: Excel file format cannot be determined, you must specify an engine manually
My Code:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
url_sp = 'https://company.sharepoint.com/teams/TeamE'
username_sp = 'MyUsername'
password_sp = 'MyPassword'
folder_url_sp = '/Shared%20Documents/02%20Team%20IAP/06_Da-An/Data/E/Edate.xlsx?web=1'
#Authentication
ctx_auth = AuthenticationContext(url_sp)
if ctx_auth.acquire_token_for_user(username_sp, password_sp):
ctx = ClientContext(url_sp, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print('Authentication sucessfull')
else:
print(ctx_auth.get_last_error())
import io
response = File.open_binary(ctx,folder_url_sp)
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0)
data = pd.read_excel(bytes_file_obj,sheet_name = None)
Can it be related to the fact that the Excel file consists of several worksheets?
Can you help me further?
Thanks in advance
Several Sheets should not be a problem. Have you tried specifying an engine in your code like the error message says ?
data = pd.read_excel(bytes_file_obj, sheet_name=None, engine= ... )
Possible options can be found in the documentation of pandas here (scroll down to engine: str, default None). The explanation
If io is not a buffer or path, this must be set to identify io
seem to fit your fit your problem
I have a test.txt on my Desktop and now I want to upload it to a Sharepoint Directory via Python3. How can I do that?
I'll start by saying this example is adapted from the example for Office365-REST-Python-Client. It works with sharepoint online using the rest api.
https://github.com/vgrem/Office365-REST-Python-Client/blob/master/examples/sharepoint/files/upload_file.py
Example url you might want to upload to [baseurl][site][folder][file]. https://your_company.sharepoint.com/path/to/site/Shared Documents/file.txt
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
baseurl = 'https://your_company.sharepoint.com'
basesite = '/path/to/site' # every share point has a home.
siteurl = baseurl + basesite
localpath = ./file.txt
remotepath = Shared Documents/file.txt # existing folder path under sharepoint site.
ctx_auth = AuthenticationContext(url)
ctx_auth.acquire_token_for_user(username, password)
ctx = ClientContext(siteurl, ctx_auth) # make sure you auth to the siteurl.
with open(localpath, 'rb') as content_file:
file_content = content_file.read()
dir, name = os.path.split(remotepath)
file = ctx.web.get_folder_by_server_relative_url(dir).upload_file(name, file_content).execute_query()```
I am trying to collect activity histories in a Sharepoint list such as comments and dates created, etc. but I wasn't able to find the way to achieve it.
Would there be a way to collect those data?
Below is the code that I am using.
import json
import os
import urllib.parse
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
config_path = '\\'.join([ROOT_DIR, 'config.json'])
#read jason config file
with open(config_path) as config_file:
config = json.load(config_file)
config = config['share_point']
USERNAME = config[ 'user']
PASSWORD = config[ 'password' ]
sharepoint_url = config['url']
sharepoint_site = config['site']
sharepoint_list = config[ 'list']
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.client_context import ClientContext
ctx = ClientContext(sharepoint_site).with_credentials(UserCredential(USERNAME, PASSWORD))
web = ctx.web.lists.get_by_title('Test')
items = web.items.paged(True).top(200)
ctx.load(items)
ctx.execute_query()
contents = items.properties
print(contents)
The attached image shows the items that I would like to collect.
Sharepoint List Activities
I saw this: Python - Download files from SharePoint site
But it does not work for me. I am getting the following error:
assertion_node = dom.getElementsByTagNameNS("urn:oasis:names:tc:SAML:1.0:assertion", 'Assertion')[0].toxml()
IndexError: list index out of range
Here is the code I am using, what am I doing wrong?
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
url = """https://my_url.sharepoint.com/location/AllItems.aspx?otherstuff"""
username = "here_is_my_username"
password = "here_is_my_pw"
ctx_auth = AuthenticationContext(url)
ctx_auth.acquire_token_for_user(username, password) <---this is where it errors out at
ctx = ClientContext(url,ctx_auth)
Edit #1:
The clue is somewhere in here:
'saml': '{urn:oasis:names:tc:SAML:1.0:assertion}',
That line isn't populated for some reason....the question is, how do I populate it and with what? It comes from the saml_token_provider.py file when you install these libraries:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
So, my data is in the format of CSV files in the OSS bucket of Alibaba Cloud.
I am currently executing a Python script, wherein:
I download the file into my local machine.
Do the changes using Python script in my local machine.
Store it in AWS Cloud.
I have to modify this method and schedule a cron job in Alibaba Cloud to automate the running of this script.
The Python script will be uploaded into Task Management of Alibaba Cloud.
So the new steps will be:
Read a file from the OSS bucket into Pandas.
Modify it - Merging it with other data, some column changes. - Will be done in pandas.
Store the modified file into AWS RDS.
I am stuck at the first step itself.
Error Log:
"No module found" for OSS2 & pandas.
What is the correct way of doing it?
This is a rough draft of my script (on how was able to execute script in my local machine):
import os,re
import oss2 -- **throws an error. No module found.**
import datetime as dt
import pandas as pd -- **throws an error. No module found.**
import tarfile
import mysql.connector
from datetime import datetime
from itertools import islice
dates = (dt.datetime.now()+dt.timedelta(days=-1)).strftime("%Y%m%d")
def download_file(access_key_id,access_key_secret,endpoint,bucket):
#Authentication
auth = oss2.Auth(access_key_id, access_key_secret)
# Bucket name
bucket = oss2.Bucket(auth, endpoint, bucket)
# Download the file
try:
# List all objects in the fun folder and its subfolders.
for obj in oss2.ObjectIterator(bucket, prefix=dates+'order'):
order_file = obj.key
objectName = order_file.split('/')[1]
df = pd.read_csv(bucket.get_object(order_file)) # to read into pandas
# FUNCTION to modify and upload
print("File downloaded")
except:
print("Pls check!!! File not read")
return objectName
import os,re
import oss2
import datetime as dt
import pandas as pd
import tarfile
import mysql.connector
from datetime import datetime
from itertools import islice
import io ## include this new library
dates = (dt.datetime.now()+dt.timedelta(days=-1)).strftime("%Y%m%d")
def download_file(access_key_id,access_key_secret,endpoint,bucket):
#Authentication
auth = oss2.Auth(access_key_id, access_key_secret)
# Bucket name
bucket = oss2.Bucket(auth, endpoint, bucket)
# Download the file
try:
# List all objects in the fun folder and its subfolders.
for obj in oss2.ObjectIterator(bucket, prefix=dates+'order'):
order_file = obj.key
objectName = order_file.split('/')[1]
bucket_object = bucket.get_object(order_file).read() ## read the file from OSS
img_buf = io.BytesIO(bucket_object))
df = pd.read_csv(img_buf) # to read into pandas
# FUNCTION to modify and upload
print("File downloaded")
except:
print("Pls check!!! File not read")
return objectName