Loop through dataframe rows

Loop through dataframe rows - python

I want to push data to a sheet.
When i'm printing my code it is working, but on my sheet it prints only the last element of my list.
Here is my code :
import json
from urllib.request import urlopen
import pygsheets
import pandas as pd
with urlopen("myapiurl") as response: source = response.read()
data = json.loads(source)
#authorization
gc = pygsheets.authorize(service_file='myjsonfile')
#open the google spreadsheet
sh = gc.open('Test')
#select the first sheet
wks = sh[0]
# Create empty dataframe
df = pd.DataFrame()
#update the first sheet with df, starting at cell B2.
wks.set_dataframe(df,(1,1))
for item in data["resultsPage"]["results"]["Entry"]:
id = item["event"]["id"]
print(id)
df['id_string'] = id

You can save ids in a list id_s an finally copy it in the colum of DataFrame.
it will work because you don't rewrite the colum.
import json
from urllib.request import urlopen
import pygsheets
import pandas as pd
with urlopen("myapiurl") as response: source = response.read()
data = json.loads(source)
#authorization
gc = pygsheets.authorize(service_file='myjsonfile')
#open the google spreadsheet
sh = gc.open('Test')
#select the first sheet
wks = sh[0]
# Create empty dataframe
df = pd.DataFrame()
#update the first sheet with df, starting at cell B2.
wks.set_dataframe(df,(1,1))
id_s=[]
for item in data["resultsPage"]["results"]["Entry"]:
id = item["event"]["id"]
print(id)
id_s.append(id)
df['id_string'] = id_s

Related

Parsing JSON output file

Hi a learner in python I have written code the extracts a son file from a sports website.
The code is
from bs4 import BeautifulSoup
import requests
import json
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
print(response.text)
and the output looks like this (small portion)
sectionaltimes_callback({"Horses":[{"Comment":"Slow Out 1 Lengths , got back 2nd last off tardy start 8 Lengths 800m, still mile off them getting widest from the corner, charged home last 200m for eye catching second spot # powered home widest","FinalPosition":2,"FinalPositionAbbreviation":"2nd","FullName":"Ameerati","SaddleNumber":12,"HorseUrl":"/horses/ameerati","SilkUrl":"//cdn.silks.racing.com/bb/114031.png","Trainer":"Robbie Griffiths & Mathew de Kock","TrainerUrl":"/trainers/robbie-griffiths","Jockey":"P.M.Moloney","JockeyUrl":"/jockeys/patrick-moloney","SectionalTimes":[{"Distance":"1200m","Position":11,"Time":"1:11.43","AvgSpeed":0.0},{"Distance":"1000m","Position":11,"Time":"59.29","AvgSpeed":0.0},{"Distance":"800m","Position":11,"Time":"46.95","AvgSpeed":0.0},{"Distance":"600m","Position":11,"Time":"34.77","AvgSpeed":0.0},{"Distance":"400m","Position":11,"Time":"22.71","AvgSpeed":0.0},{"Distance":"200m","Position":4,"Time":"11.45","AvgSpeed":0.0},{"Distance":"Finish","Position":2,"Time":"","AvgSpeed":0.0}],"SplitTimes":[{"Distance":"1200m-1000m","Position":11,"Time":"12.14","AvgSpeed":0.0},{"Distance":"1000m-800m","Position":11,"Time":"12.34","AvgSpeed":0.0},{"Distance":"800m-600m","Position":11,"Time":"12.18","AvgSpeed":0.0},{"Distance":"600m-400m","Position":11,"Time":"12.06","AvgSpeed":0.0},{"Distance":"400m-200m","Position":11,"Time":"11.26","AvgSpeed":0.0},{"Distance":"200m-Finish","Position":4,"Time":"11.45","AvgSpeed":0.0}],"StartPosition":0,"BarrierNumber":12,"RaceTime":"","TimeVarToWinner":0.0,"BeatenMargin":0.0,"DistanceRun":0,"DistanceVarToWinner":"","SixHundredMetresTime":"34.77","TwoHundredMetresTime":"11.45","Early":0.0,"Mid":0.0,"Late":0.0,"OverallPeakSpeed":0.0,"PeakSpeedLocation":null,"OverallAvgSpeed":0.0,"DistanceFromRail":0.0},
The help I would appreciate now is what do I do to put this in a format that I can open in excel

import pandas as pd
import requests
import json
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.utils.dataframe import dataframe_to_rows
def race_data_to_xslxs(url, fname):
# get data
data = json.loads(requests.get(url).text[24:-1])
# create dataframes
dfs = {}
singulars = pd.DataFrame()
for k, v in data.items():
if isinstance(v, list):
dfs[k] = pd.DataFrame(v)
else:
singulars[k] = [v]
dfs = {'summary': singulars, **dfs}
# create workbook
wb = Workbook()
for k, df in dfs.items():
# create sheet
wsx = wb.create_sheet(title=k)
rows = dataframe_to_rows(df)
for r_idx, row in enumerate(rows, 1):
for c_idx, value in enumerate(row, 1):
wsx.cell(row=r_idx, column=c_idx, value=str(value))
del wb['Sheet']
# write excel file
wb.save(filename=fname)
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
path = 'fname.xlsx'
race_data_to_xslxs(url=url, fname=path)

The API is returning JSONP, not JSON. This is JSON wrapped in a call to a callback function, which can be used by browsers without violating the same-origin rule. You need to remove that function call before parsing it as JSON.
import re
import json
response = requests.request("GET", url, headers=headers, data=payload)
json_string = re.sub(r'^sectionaltimes_callback\((.*)\)$', r'\1', response)
data = json.loads(json_string)

You can try this -
import requests
import json
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
response = requests.get(url)
json.loads(response.text.split("(", 1)[1].strip(")"))

Overwrite sheets saving and the other sheets on excel

i made a script that compare datas form diferent sheets, all godd, now i want to add this updates sheet instead of the old one on the entire excel and keeping the other sheets.
import numpy as np
import pandas as pd
from timestampdirectory import createdir
import openpyxl
from openpyxl import workbook
from openpyxl import worksheet
import os
import time
def svnanalysis():
dest = createdir()
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))
dfSvnRepoGroupAccess = pd.read_excel(os.path.join(dest, "SvnRepoGroupAccess.xlsx"))
dfsvnReposSize = pd.read_excel(os.path.join(dest, "svnReposSize.xlsx"))
dfsvnRepoLastChangeDate = pd.read_excel(os.path.join(dest, "svnRepoLastChangeDate.xlsx"))
dfUserDetails = pd.read_excel(r"D:\GIT-files\Automate-Stats\SVN_sample_files\CM_UsersDetails.xlsx")
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
dfSvnRepoGroupAccess.to_excel(xlwriter, sheet_name='SvnRepoGroupAccess', index = False)
dfsvnReposSize.to_excel(xlwriter, sheet_name='svnReposSize', index = False)
dfsvnRepoLastChangeDate.to_excel(xlwriter, sheet_name='svnRepoLastChangeDate',index= False)
xlwriter.close()
whats above its in the same script where i used some xlsx files an create only 1 xlsx with those files as sheets, now below i make some changes in SvnUser sheet and i want to upload it on the excel instead of old sheet SvnUser, and keep the other sheets
# xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-SvnAnalysis.xlsx'))
svnUsers = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="SvnUsers")
details = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="UserDetails")
svnUsers = svnUsers.assign(SVNaccount=svnUsers["accountName"].isin(details["Account"]).astype(bool))
print(svnUsers)
# dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
# xlwriter.close()

The easiest way to achieve that would be to overwrite the entire excel sheet, for example like this:
import pandas as pd
# create dataframe from excel file
df = pd.read_excel(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
sheet_name='UserDetails'
)
res_df = ..... # calculate the df you want to write
# overwrite excel sheet with dataframe
with pd.ExcelWriter(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
mode='a',
if_sheet_exists='replace'
) as writer:
res_df.to_excel(writer, sheet_name='UserDetails')

How to extract daily close from WSJ using Python?

I used python 3 and pandas to parse the daily close from WSJ into EXCEL. However, the daily close shown on the web page screen cannot be extracted. Here is the link: "https://quotes.wsj.com/index/COMP/historical-prices"
How to download the close data on screen into excel?
and how to download "DOWNLOAD A SPREADSHEET" button file into excel with another name like comp.xlxs ?
Here are the codes:
import requests
import pandas as pd
url = 'https://quotes.wsj.com/index/COMP/historical-prices'
jsonData = requests.get(url).json()
final_df = pd.DataFrame()
for row in jsonData['data']:
#row = jsonData['data'][1]
data_row = []
for idx, colspan in enumerate(row['colspan']):
colspan_int = int(colspan[0])
data_row.append(row['td'][idx] * colspan_int)
flat_list = [item for sublist in data_row for item in sublist]
temp_row = pd.DataFrame([flat_list])
final_df = final_df.append(temp_row, sort=True).reset_index(drop=True)
wait2 = input("PRESS ENTER TO CONTINUE.")
Follow UP question quotes:
#
url = 'https://quotes.wsj.com/index/HK/XHKG/HSI/historical-prices/download?num_rows=15&range_days=15&endDate=12/06/2019'
response = requests.get(url)
open('HSI.csv', 'wb').write(response.content)
read_file = pd.read_csv (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\HSI.csv')
read_file.to_excel (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\HSI.xlsx', index = None, header=True)
#
url = 'https://quotes.wsj.com/index/SPX/historical-prices/download?num_rows=15&range_days=15&endDate=12/06/2019'
response = requests.get(url)
open('SPX.csv', 'wb').write(response.content)
read_file = pd.read_csv (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\SPX.csv')
read_file.to_excel (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\SPX.xlsx', index = None, header=True)
#
url = 'https://quotes.wsj.com/index/COMP/historical-prices/download?num_rows=15&range_days=15&endDate=12/06/2019'
response = requests.get(url)
open('COMP.csv', 'wb').write(response.content)
read_file = pd.read_csv (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\COMP.csv')
read_file.to_excel (r'C:\A-CEO\REPORTS\STOCKS\PROFILE\Python\COMP.xlsx', index = None, header=True)

the URL is wrong; once downloaded you can do "Get Info" if on a Mac, and you'll see "Where From:". You will see it's of the form below.
import requests
import pandas as pd
import io
#original URL had a bunch of other parameters I omitted, only these seem to matter but YMMV
url = 'https://quotes.wsj.com/index/COMP/historical-prices/download?num_rows=360&range_days=360&endDate=11/06/2019'
response = requests.get(url)
#do this if you want the CSV written to your machine
open('test_file.csv', 'wb').write(response.content)
# this decodes the content of the downloaded response and presents it to pandas
df_test = pd.read_csv(io.StringIO(response.content.decode('utf-8')))
To answer your additional question -- you can simply loop across a list of tickers or symbols, something like:
base_url = 'https://quotes.wsj.com/index/{ticker_name}/historical-prices/download?num_rows=360&range_days=360&endDate=11/06/2019'
ticker_list = ['COMP','SPX','HK/XHKG/HSI']
for ticker in ticker_list:
response = requests.get(base_url.format(ticker_name = ticker))
#do this if you want the CSV written to your machine
open('prices_'+ticker.replace('/','-')+'.csv', 'wb').write(response.content)
Note for HK/XHKG/HSI, we need to replace the slashes with hyphens or it's not a valid filename. You can also use this pattern to make dataframes.

Using Python to populate data to existing excel file

What I'm trying to do:
I'm using Python to populate data to an existing Excel file.
What works:
My code below is successful in exporting the table to Excel file "Futures.xls".
What doesn't work:
The code below extracts table from website and exports to Futures excel file ( 100 Rows )The subsequent code re-opens Futures file and appends to Futures1 excel file ( 200 Rows ). However, if I continue to the run the subsequent code multiple times I cannot get more than 200 rows like 300 rows and so on as an when I run the code. Can someone tell me the issue?
My code:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import os
url = "https://quotes.ino.com/exchanges/contracts.html?r=NYMEX_NG"
res = requests.get(url)
soup = BeautifulSoup(res.text, 'lxml')
Markets = []
Contracts =[]
Opens =[]
Highs =[]
Lows =[]
Lasts=[]
Changes=[]
Pcts=[]
data_rows = soup.findAll('tr')[3:]
for td in data_rows[:100]:
Market = td.findAll ('td')[0].text
Markets.append(Market)
Contract = td.findAll('td')[1].text
Contracts.append(Contract)
Open = td.findAll('td')[2].text
Opens.append(Open)
High = td.findAll('td')[3].text
Highs.append(High)
Low = td.findAll('td')[4].text
Lows.append(Low)
Last = td.findAll('td')[5].text
Lasts.append(Last)
Change = td.findAll('td')[6].text
Changes.append(Change)
Pct = td.findAll('td')[7].text
Pcts.append(Pct)
Time = td.findAll('td')[8].text
df = pd.DataFrame({'Contracts' :Contracts, 'Markets':Market,'Open':Opens, 'High':Highs, 'Low':Lows,'Last':Lasts,'Pct':Pcts})
out_path = "C:\Sid\Futures.xls"
writer = pd.ExcelWriter(out_path , engine='xlsxwriter')
df.to_excel(writer,'Sheet2',index=False)
writer.save()
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = load_workbook("C:\Sid\Futures.xlsx")
ws = wb['Sheet2']
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = load_workbook("C:\Sid\Futures.xlsx")
ws = wb['Sheet2']
for row in dataframe_to_rows(df, header=None, index = True):
ws.append(row)
wb.save('C:\Sid\Futures1.xlsx')
Additional:
Also, what code do I need so that my python runs automatically when my website updates? Prices change every 15 minutes.

Update Excel worksheet using python xlwt module

I am looking to update my already created Excel worksheet with Json data coming from a browser. The current code generates a new worksheet everytime i run the program.
import requests
import json
import urllib
import xlwt
url = raw_input("Enter url:-")
res = urllib.urlopen(url)
data = res.read()
data1 = json.loads(data)
book = xlwt.Workbook(encoding="utf-8")
sheet1 = book.add_sheet("AssetsReport0")
colunm_count = 0
for title, value in data1.iteritems():
sheet1.write(0, colunm_count, title)
sheet1.write(1, colunm_count, value)
colunm_count += 1
file_name = "test1.xls"%()
book.save(file_name)
What is the best way i could do this?

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Loop through dataframe rows - python

Related

Parsing JSON output file

Overwrite sheets saving and the other sheets on excel

How to extract daily close from WSJ using Python?

Using Python to populate data to existing excel file

Update Excel worksheet using python xlwt module

Categories

Resources