Hi a learner in python I have written code the extracts a son file from a sports website.
The code is
from bs4 import BeautifulSoup
import requests
import json
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
print(response.text)
and the output looks like this (small portion)
sectionaltimes_callback({"Horses":[{"Comment":"Slow Out 1 Lengths , got back 2nd last off tardy start 8 Lengths 800m, still mile off them getting widest from the corner, charged home last 200m for eye catching second spot # powered home widest","FinalPosition":2,"FinalPositionAbbreviation":"2nd","FullName":"Ameerati","SaddleNumber":12,"HorseUrl":"/horses/ameerati","SilkUrl":"//cdn.silks.racing.com/bb/114031.png","Trainer":"Robbie Griffiths & Mathew de Kock","TrainerUrl":"/trainers/robbie-griffiths","Jockey":"P.M.Moloney","JockeyUrl":"/jockeys/patrick-moloney","SectionalTimes":[{"Distance":"1200m","Position":11,"Time":"1:11.43","AvgSpeed":0.0},{"Distance":"1000m","Position":11,"Time":"59.29","AvgSpeed":0.0},{"Distance":"800m","Position":11,"Time":"46.95","AvgSpeed":0.0},{"Distance":"600m","Position":11,"Time":"34.77","AvgSpeed":0.0},{"Distance":"400m","Position":11,"Time":"22.71","AvgSpeed":0.0},{"Distance":"200m","Position":4,"Time":"11.45","AvgSpeed":0.0},{"Distance":"Finish","Position":2,"Time":"","AvgSpeed":0.0}],"SplitTimes":[{"Distance":"1200m-1000m","Position":11,"Time":"12.14","AvgSpeed":0.0},{"Distance":"1000m-800m","Position":11,"Time":"12.34","AvgSpeed":0.0},{"Distance":"800m-600m","Position":11,"Time":"12.18","AvgSpeed":0.0},{"Distance":"600m-400m","Position":11,"Time":"12.06","AvgSpeed":0.0},{"Distance":"400m-200m","Position":11,"Time":"11.26","AvgSpeed":0.0},{"Distance":"200m-Finish","Position":4,"Time":"11.45","AvgSpeed":0.0}],"StartPosition":0,"BarrierNumber":12,"RaceTime":"","TimeVarToWinner":0.0,"BeatenMargin":0.0,"DistanceRun":0,"DistanceVarToWinner":"","SixHundredMetresTime":"34.77","TwoHundredMetresTime":"11.45","Early":0.0,"Mid":0.0,"Late":0.0,"OverallPeakSpeed":0.0,"PeakSpeedLocation":null,"OverallAvgSpeed":0.0,"DistanceFromRail":0.0},
The help I would appreciate now is what do I do to put this in a format that I can open in excel
import pandas as pd
import requests
import json
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.utils.dataframe import dataframe_to_rows
def race_data_to_xslxs(url, fname):
# get data
data = json.loads(requests.get(url).text[24:-1])
# create dataframes
dfs = {}
singulars = pd.DataFrame()
for k, v in data.items():
if isinstance(v, list):
dfs[k] = pd.DataFrame(v)
else:
singulars[k] = [v]
dfs = {'summary': singulars, **dfs}
# create workbook
wb = Workbook()
for k, df in dfs.items():
# create sheet
wsx = wb.create_sheet(title=k)
rows = dataframe_to_rows(df)
for r_idx, row in enumerate(rows, 1):
for c_idx, value in enumerate(row, 1):
wsx.cell(row=r_idx, column=c_idx, value=str(value))
del wb['Sheet']
# write excel file
wb.save(filename=fname)
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
path = 'fname.xlsx'
race_data_to_xslxs(url=url, fname=path)
The API is returning JSONP, not JSON. This is JSON wrapped in a call to a callback function, which can be used by browsers without violating the same-origin rule. You need to remove that function call before parsing it as JSON.
import re
import json
response = requests.request("GET", url, headers=headers, data=payload)
json_string = re.sub(r'^sectionaltimes_callback\((.*)\)$', r'\1', response)
data = json.loads(json_string)
You can try this -
import requests
import json
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
response = requests.get(url)
json.loads(response.text.split("(", 1)[1].strip(")"))
i made a script that compare datas form diferent sheets, all godd, now i want to add this updates sheet instead of the old one on the entire excel and keeping the other sheets.
import numpy as np
import pandas as pd
from timestampdirectory import createdir
import openpyxl
from openpyxl import workbook
from openpyxl import worksheet
import os
import time
def svnanalysis():
dest = createdir()
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))
dfSvnRepoGroupAccess = pd.read_excel(os.path.join(dest, "SvnRepoGroupAccess.xlsx"))
dfsvnReposSize = pd.read_excel(os.path.join(dest, "svnReposSize.xlsx"))
dfsvnRepoLastChangeDate = pd.read_excel(os.path.join(dest, "svnRepoLastChangeDate.xlsx"))
dfUserDetails = pd.read_excel(r"D:\GIT-files\Automate-Stats\SVN_sample_files\CM_UsersDetails.xlsx")
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
dfSvnRepoGroupAccess.to_excel(xlwriter, sheet_name='SvnRepoGroupAccess', index = False)
dfsvnReposSize.to_excel(xlwriter, sheet_name='svnReposSize', index = False)
dfsvnRepoLastChangeDate.to_excel(xlwriter, sheet_name='svnRepoLastChangeDate',index= False)
xlwriter.close()
whats above its in the same script where i used some xlsx files an create only 1 xlsx with those files as sheets, now below i make some changes in SvnUser sheet and i want to upload it on the excel instead of old sheet SvnUser, and keep the other sheets
# xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-SvnAnalysis.xlsx'))
svnUsers = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="SvnUsers")
details = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="UserDetails")
svnUsers = svnUsers.assign(SVNaccount=svnUsers["accountName"].isin(details["Account"]).astype(bool))
print(svnUsers)
# dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
# xlwriter.close()
The easiest way to achieve that would be to overwrite the entire excel sheet, for example like this:
import pandas as pd
# create dataframe from excel file
df = pd.read_excel(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
sheet_name='UserDetails'
)
res_df = ..... # calculate the df you want to write
# overwrite excel sheet with dataframe
with pd.ExcelWriter(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
mode='a',
if_sheet_exists='replace'
) as writer:
res_df.to_excel(writer, sheet_name='UserDetails')
I used python 3 and pandas to parse the daily close from WSJ into EXCEL. However, the daily close shown on the web page screen cannot be extracted. Here is the link: "https://quotes.wsj.com/index/COMP/historical-prices"
How to download the close data on screen into excel?
and how to download "DOWNLOAD A SPREADSHEET" button file into excel with another name like comp.xlxs ?
Here are the codes:
import requests
import pandas as pd
url = 'https://quotes.wsj.com/index/COMP/historical-prices'
jsonData = requests.get(url).json()
final_df = pd.DataFrame()
for row in jsonData['data']:
#row = jsonData['data'][1]
data_row = []
for idx, colspan in enumerate(row['colspan']):
colspan_int = int(colspan[0])
data_row.append(row['td'][idx] * colspan_int)
flat_list = [item for sublist in data_row for item in sublist]
temp_row = pd.DataFrame([flat_list])
final_df = final_df.append(temp_row, sort=True).reset_index(drop=True)
wait2 = input("PRESS ENTER TO CONTINUE.")
Follow UP question quotes:
#
url = 'https://quotes.wsj.com/index/HK/XHKG/HSI/historical-prices/download?num_rows=15&range_days=15&endDate=12/06/2019'
response = requests.get(url)
open('HSI.csv', 'wb').write(response.content)
read_file = pd.read_csv (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\HSI.csv')
read_file.to_excel (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\HSI.xlsx', index = None, header=True)
#
url = 'https://quotes.wsj.com/index/SPX/historical-prices/download?num_rows=15&range_days=15&endDate=12/06/2019'
response = requests.get(url)
open('SPX.csv', 'wb').write(response.content)
read_file = pd.read_csv (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\SPX.csv')
read_file.to_excel (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\SPX.xlsx', index = None, header=True)
#
url = 'https://quotes.wsj.com/index/COMP/historical-prices/download?num_rows=15&range_days=15&endDate=12/06/2019'
response = requests.get(url)
open('COMP.csv', 'wb').write(response.content)
read_file = pd.read_csv (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\COMP.csv')
read_file.to_excel (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\COMP.xlsx', index = None, header=True)
the URL is wrong; once downloaded you can do "Get Info" if on a Mac, and you'll see "Where From:". You will see it's of the form below.
import requests
import pandas as pd
import io
#original URL had a bunch of other parameters I omitted, only these seem to matter but YMMV
url = 'https://quotes.wsj.com/index/COMP/historical-prices/download?num_rows=360&range_days=360&endDate=11/06/2019'
response = requests.get(url)
#do this if you want the CSV written to your machine
open('test_file.csv', 'wb').write(response.content)
# this decodes the content of the downloaded response and presents it to pandas
df_test = pd.read_csv(io.StringIO(response.content.decode('utf-8')))
To answer your additional question -- you can simply loop across a list of tickers or symbols, something like:
base_url = 'https://quotes.wsj.com/index/{ticker_name}/historical-prices/download?num_rows=360&range_days=360&endDate=11/06/2019'
ticker_list = ['COMP','SPX','HK/XHKG/HSI']
for ticker in ticker_list:
response = requests.get(base_url.format(ticker_name = ticker))
#do this if you want the CSV written to your machine
open('prices_'+ticker.replace('/','-')+'.csv', 'wb').write(response.content)
Note for HK/XHKG/HSI, we need to replace the slashes with hyphens or it's not a valid filename. You can also use this pattern to make dataframes.
What I'm trying to do:
I'm using Python to populate data to an existing Excel file.
What works:
My code below is successful in exporting the table to Excel file "Futures.xls".
What doesn't work:
The code below extracts table from website and exports to Futures excel file ( 100 Rows )The subsequent code re-opens Futures file and appends to Futures1 excel file ( 200 Rows ). However, if I continue to the run the subsequent code multiple times I cannot get more than 200 rows like 300 rows and so on as an when I run the code. Can someone tell me the issue?
My code:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import os
url = "https://quotes.ino.com/exchanges/contracts.html?r=NYMEX_NG"
res = requests.get(url)
soup = BeautifulSoup(res.text, 'lxml')
Markets = []
Contracts =[]
Opens =[]
Highs =[]
Lows =[]
Lasts=[]
Changes=[]
Pcts=[]
data_rows = soup.findAll('tr')[3:]
for td in data_rows[:100]:
Market = td.findAll ('td')[0].text
Markets.append(Market)
Contract = td.findAll('td')[1].text
Contracts.append(Contract)
Open = td.findAll('td')[2].text
Opens.append(Open)
High = td.findAll('td')[3].text
Highs.append(High)
Low = td.findAll('td')[4].text
Lows.append(Low)
Last = td.findAll('td')[5].text
Lasts.append(Last)
Change = td.findAll('td')[6].text
Changes.append(Change)
Pct = td.findAll('td')[7].text
Pcts.append(Pct)
Time = td.findAll('td')[8].text
df = pd.DataFrame({'Contracts' :Contracts, 'Markets':Market,'Open':Opens, 'High':Highs, 'Low':Lows,'Last':Lasts,'Pct':Pcts})
out_path = "C:\Sid\Futures.xls"
writer = pd.ExcelWriter(out_path , engine='xlsxwriter')
df.to_excel(writer,'Sheet2',index=False)
writer.save()
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = load_workbook("C:\Sid\Futures.xlsx")
ws = wb['Sheet2']
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = load_workbook("C:\Sid\Futures.xlsx")
ws = wb['Sheet2']
for row in dataframe_to_rows(df, header=None, index = True):
ws.append(row)
wb.save('C:\Sid\Futures1.xlsx')
Additional:
Also, what code do I need so that my python runs automatically when my website updates? Prices change every 15 minutes.
I am looking to update my already created Excel worksheet with Json data coming from a browser. The current code generates a new worksheet everytime i run the program.
import requests
import json
import urllib
import xlwt
url = raw_input("Enter url:-")
res = urllib.urlopen(url)
data = res.read()
data1 = json.loads(data)
book = xlwt.Workbook(encoding="utf-8")
sheet1 = book.add_sheet("AssetsReport0")
colunm_count = 0
for title, value in data1.iteritems():
sheet1.write(0, colunm_count, title)
sheet1.write(1, colunm_count, value)
colunm_count += 1
file_name = "test1.xls"%()
book.save(file_name)
What is the best way i could do this?