Format file into an Excel sheet - python

I am a new programmer in python, and I need your help. If I load the following url in chrome https://api.mysportsfeeds.com/v1.1/pull/nhl/2016-2017-regular/cumulative_player_stats.{format}, where {format} could be csv or json format, then once downloaded and open, the file is already well formatted. How could I convert it into an Excel sheet in using pandas or openpyxl?
UPDATE
import base64
import requests
import json
USERNAME, PASSWORD = 'notworking', 'notworking'
def send_request():
# Request
try:
response = requests.get(
url="https://api.mysportsfeeds.com/v1.1/pull/nhl/2017-2018-regular/cumulative_player_stats.csv",
params={
"fordate": "20171009"
},
headers={
"Authorization": "Basic " +
base64.b64encode('{}:{}'.format(USERNAME,PASSWORD)\
.encode('utf-8')).decode('ascii')
}
)
print('Response HTTP Status Code: {status_code}'.format(
status_code=response.status_code))
print('Response HTTP Response Body: {content}'.format(
content=response.content))
except requests.exceptions.RequestException:
print('HTTP Request failed')
return response
import pandas as pd
import io
test = send_request().content
df = pd.read_csv(io.StringIO(test.decode('utf-8')))
writer = pd.ExcelWriter('/home/jeremie/Projects/Work_Projects/NHL_project/output.xls')
df.to_excel(writer, 'Sheet1')
I am struggling with the fact that my code seems working, but no file have been created.

You need to add a final .save() to close the Pandas Excel writer and then output the Excel file, e.g.
import pandas as pd
import io
import os
# Set the working folder to the same folder as the script
os.chdir(os.path.dirname(os.path.abspath(__file__)))
test = send_request().content
df = pd.read_csv(io.StringIO(test.decode('utf-8')))
writer = pd.ExcelWriter('output.xls')
df.to_excel(writer, 'Sheet1')
writer.save()
By setting the working folder this way, it should work equally well on a Windows PC.

Related

How to extract application/zip from api response?

I have got an application/octect-stream with a application/zip as body in requests.Response object returned from an api call with a csv file inside it. I am trying to read the csv file to pandas without writing to the disk, if possible.
And if I want to write the zip file to a path as a zip file, how can I do that?
resp = requests.get(url, headers=headers)
resp.raise_for_status()
csv_obj = zlib.decompress(resp.content, wbits=zlib.MAX_WBITS|32)
print(type(csv_obj))
export_file = pd.read_csv(csv_obj)
export_file.to_csv('./Test_export.csv')
Updated version
# step 1: it turns out pandas can read zipped csv files even from urls!
some_dataframe = pandas.read_csv(url)
If pandas can't figure it out by itself there are some parameters you can try to massage.
# step 1: it turns out pandas can read zipped csv files even from urls!
some_dataframe = pandas.read_csv(zip_filename, compression='zip', header=0) # etc..
Previous version
I will leave the previous version of my answer below for reference.
# step 1: downloading the zip file
zip_filename = 'response.zip'
with open(zip_filename, 'wb') as zip_file:
for chunk in response.iter_content(chunk_size=255):
if chunk:
zip_file.write(chunk)
# step 2: turns out pandas can read zipped csv files!
some_dataframe = pandas.read_csv(zip_filename)
import pandas as pd
import io
import zipfile
resp = requests.get(url, headers=headers, stream=True)
resp.raise_for_status()
zfile = zipfile.ZipFile(io.BytesIO(resp.content))
# I only had one file, so calling zfile.namelist
export_file = pd.read_csv(zfile.open(f'{zfile.namelist()[-1]}'))

Download xlsx file with Python

Want to download to local directory. This code works for csv but not xlsx. It writes a file but cannot be opened as Excel.
Any help will be appreciated.
url = 'https://some_url'
resp = requests.get(url)
open('some_filename.xlsx', 'wb').write(resp.content)
You could create a dataframe from the resp data and then use pd.to_excel() function to obtain the xlsx file. This is a tested solution, and it worked for me.
import requests
import pandas as pd
import io
url='https://www.google.com' #as an example
urlData = requests.get(url).content #Get the content from the url
dataframe = pd.read_csv(io.StringIO(urlData.decode('latin-1')))
filename="data.xlsx"
dataframe.to_excel(filename)
In pandas you could just do:
import pandas as pd
url = 'https://some_url'
df = pd.read_csv(url)

How can I get my API upload multiple files with a certain extension to work?

I'm new to programming and am trying to resolve this script to upload data, but I can't get it to work and I don't know why. I am modifying a script I already had to work with a single file, but I need to upload over 3,000 files with a specific extension (.json) in the specific file directory. I am getting an error on the 'f.read(open(files, 'r')) line. The error I get is there is no read extension for f. Not sure what I am doing incorrectly. I've researched it and still can't fix it. Any help would be appreciated.
import requests
import time
import csv
import json
import glob
from glob import glob
# function to post data
def postData(xactData):
url = 'api address'
headers = {
'Content-Type': 'application/json',
'Content-Length': str(len(xactData)),
'Request-Timeout': '2000000000'
}
return requests.post(url, headers=headers, data=xactData)
# read data
f = r'path to file'
# iterate over all files with extension path .json
for files in glob(f + '/*.json'):
my_data = f.read(open(files, 'r'))
print(files) # print the files that have been uploaded to the api
print(my_data) # print the data uploaded to the api
# post the data to the database
result = postData(my_data)
print(result)

Python - how to read Sharepoint excel sheet specific worksheet

In Python I am utilizing Office 365 REST Python Client library to access and read an excel workbook that contains many sheets.
While the authentication is successful, I am unable to append the right path of sheet name to the file name in order to access the 1st or 2nd worksheet by its name, which is why the output from the sheet is not JSON, rather IO Bytes which my code is not able to process.
My end goal is to simply access the specific work sheet by its name 'employee_list' and transform it into JSON or Pandas Data frame for further usage.
Code snippet below -
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc#a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/sites/SAMPLE/_layouts/15/Doc.aspx?OR=teams&action=edit&sourcedoc={739271873}'
# HOW TO ACCESS WORKSHEET BY ITS NAME IN ABOVE LINE
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions("{0}/_api/web/".format(site_url))
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content) # ERROR ENCOUNTERED JSON DECODE ERROR SINCE DATA IS IN BYTES
You can access it by sheet index, check the following code....
import xlrd
loc = ("File location")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
print(sheet.cell_value(1, 0))
You can try to add the component 'sheetname' to the url like so.
https://site/lib/workbook.xlsx#'Sheet1'!A1
It seems that URL constructed to access data is not correct. You should test full URL in your browser as working and then modify code to get going. You may try this with some changes, I have verified that URL formed with this logic would return JSON data.
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc#a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/_vti_bin/ExcelRest.aspx/RootFolder/ExcelFileName.xlsx/Model/Ranges('employee_list!A1%7CA10')?$format=json'
# Replace RootFolder/ExcelFileName.xlsx with actual path of excel file from the root.
# Replace A1 and A10 with actual start and end of cell range.
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions(site_url)
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content)
Source: https://learn.microsoft.com/en-us/sharepoint/dev/general-development/sample-uri-for-excel-services-rest-api
The update I'm using (Office365-REST-Python-Client==2.3.11) allows simpler access to an Excel file in the SharePoint repository.
# from original_question import pd,\
# username,\
# password,\
# UserCredential,\
# File,\
# BytesIO
user_credentials = UserCredential(user_name=username,
password=password)
file_url = ('https://sample.sharepoint.com'
'/sites/SAMPLE/{*recursive_folders}'
'/sample_worksheet.xlsx')
## absolute path of excel file on SharePoint
excel_file = BytesIO()
## initiating binary object
excel_file_online = File.from_url(abs_url=file_url)
## requesting file from SharePoint
excel_file_online = excel_file_online.with_credentials(
credentials=user_credentials)
## validating file with accessible credentials
excel_file_online.download(file_object=excel_file).execute_query()
## writing binary response of the
## file request into bytes object
We now have a binary copy of the Excel file as BytesIO named excel_file. Progressing, reading it as pd.DataFrame is straight-forward like usual Excel file stored in local drive. Eg.:
pd.read_excel(excel_file) # -> pd.DataFrame
Hence, if you are interested in a specific sheet like 'employee_list', you may preferably read it as
employee_list = pd.read_excel(excel_file,
sheet_name='employee_list')
# -> pd.DataFrame
or
data = pd.read_excel(excel_file,
sheet_name=None) # -> dict
employee_list = data.get('employee_list')
# -> [pd.DataFrame, None]
I know you stated you can't use a BytesIO object, but for those coming here who are reading the file in as a BytesIO object like I was looking for, you can use the sheet_name arg in pd.read_excel:
url = "https://sharepoint.site.com/sites/MySite/MySheet.xlsx"
sheet_name = 'Sheet X'
response = File.open_binary(ctx, relative_url)
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0)
df = pd.read_excel(bytes_file_obj, sheet_name = sheet_name) //call sheet name

python and Django export mssql query to excel

i have build an app in django to extract data from an mssql server and display the results on a table on a template.
what i want to do now is to export the same sql query results to an excel file. I have used pymssql driver to connect to the db and pysqlalchemy.
This is what i did, but some how excel file wasn't created when the function was call
def download_excel(request):
if "selectdate" in request.POST:
if "selectaccount" in request.POST:
selected_date = request.POST["selectdate"]
selected_acc = request.POST["selectaccount"]
if selected_date==selected_date:
if selected_acc==selected_acc:
convert=datetime.datetime.strptime(selected_date, "%Y-%m-%d").toordinal()
engine=create_engine('mssql+pymssql://username:password#servername /db')
connection = engine.connect()
metadata=MetaData()
fund=Table('gltrxdet',metadata,autoload=True,autoload_with=engine)
rate=Table('gltrx_all',metadata,autoload=True,autoload_with=engine)
stmt=select([fund.columns.account_code,fund.columns.description,fund.columns.nat_balance,fund.columns.rate_type_home,rate.columns.date_applied,rate.columns.date_entered,fund.columns.journal_ctrl_num,rate.columns.journal_ctrl_num])
stmt=stmt.where(and_(rate.columns.journal_ctrl_num==fund.columns.journal_ctrl_num,fund.columns.account_code==selected_acc,rate.columns.date_entered==convert))
df = pd.read_sql(stmt,connection)
writer = pd.ExcelWriter('C:\excel\export.xls')
df.to_excel(writer, sheet_name ='bar')
writer.save()
my code actually worked. I thought it was going to save the excel file to 'C:\excel' folder so i was looking for the file in the folder but i couldn't find the excel file. The excel file was actually exported to my django project folder instead.
How to i allow the end user to be able to download the file to their desktop instead of exporting it to the server itself
I manage to get it to work with much time spend research. This code will export sql query to excel file which will allow end user to download the excel file
import pandas as pd
from django.http import HttpResponse
try:
from io import BytesIO as IO # for modern python
except ImportError:
from StringIO import StringIO as IO # for legacy python
def download_excel(request):
if "selectdate" in request.POST:
if "selectaccount" in request.POST:
selected_date = request.POST["selectdate"]
selected_acc = request.POST["selectaccount"]
if selected_date==selected_date:
if selected_acc==selected_acc:
convert=datetime.datetime.strptime(selected_date, "%Y-%m-%d").toordinal()
engine=create_engine('mssql+pymssql://username:password#servername /db')
metadata=MetaData(connection)
fund=Table('gltrxdet',metadata,autoload=True,autoload_with=engine)
rate=Table('gltrx_all',metadata,autoload=True,autoload_with=engine)
stmt=select([fund.columns.account_code,fund.columns.description,fund.columns.nat_balance,rate.columns.date_applied,fund.columns.journal_ctrl_num,rate.columns.journal_ctrl_num])
stmt=stmt.where(and_(rate.columns.journal_ctrl_num==fund.columns.journal_ctrl_num,fund.columns.account_code==selected_acc,rate.columns.date_applied==convert))
results=connection.execute(stmt)
sio = StringIO()
df = pd.DataFrame(data=list(results), columns=results.keys())
####dowload excel file##########
excel_file = IO()
xlwriter = pd.ExcelWriter(excel_file, engine='xlsxwriter')
df.to_excel(xlwriter, 'sheetname')
xlwriter.save()
xlwriter.close()
excel_file.seek(0)
response = HttpResponse(excel_file.read(), content_type='application/ms-excel vnd.openxmlformats-officedocument.spreadsheetml.sheet')
# set the file name in the Content-Disposition header
response['Content-Disposition'] = 'attachment; filename=myfile.xls'
return response

Categories

Resources