Using Python to populate data to existing excel file - python

What I'm trying to do:
I'm using Python to populate data to an existing Excel file.
What works:
My code below is successful in exporting the table to Excel file "Futures.xls".
What doesn't work:
The code below extracts table from website and exports to Futures excel file ( 100 Rows )The subsequent code re-opens Futures file and appends to Futures1 excel file ( 200 Rows ). However, if I continue to the run the subsequent code multiple times I cannot get more than 200 rows like 300 rows and so on as an when I run the code. Can someone tell me the issue?
My code:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import os
url = "https://quotes.ino.com/exchanges/contracts.html?r=NYMEX_NG"
res = requests.get(url)
soup = BeautifulSoup(res.text, 'lxml')
Markets = []
Contracts =[]
Opens =[]
Highs =[]
Lows =[]
Lasts=[]
Changes=[]
Pcts=[]
data_rows = soup.findAll('tr')[3:]
for td in data_rows[:100]:
Market = td.findAll ('td')[0].text
Markets.append(Market)
Contract = td.findAll('td')[1].text
Contracts.append(Contract)
Open = td.findAll('td')[2].text
Opens.append(Open)
High = td.findAll('td')[3].text
Highs.append(High)
Low = td.findAll('td')[4].text
Lows.append(Low)
Last = td.findAll('td')[5].text
Lasts.append(Last)
Change = td.findAll('td')[6].text
Changes.append(Change)
Pct = td.findAll('td')[7].text
Pcts.append(Pct)
Time = td.findAll('td')[8].text
df = pd.DataFrame({'Contracts' :Contracts, 'Markets':Market,'Open':Opens, 'High':Highs, 'Low':Lows,'Last':Lasts,'Pct':Pcts})
out_path = "C:\Sid\Futures.xls"
writer = pd.ExcelWriter(out_path , engine='xlsxwriter')
df.to_excel(writer,'Sheet2',index=False)
writer.save()
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = load_workbook("C:\Sid\Futures.xlsx")
ws = wb['Sheet2']
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = load_workbook("C:\Sid\Futures.xlsx")
ws = wb['Sheet2']
for row in dataframe_to_rows(df, header=None, index = True):
ws.append(row)
wb.save('C:\Sid\Futures1.xlsx')
Additional:
Also, what code do I need so that my python runs automatically when my website updates? Prices change every 15 minutes.

Related

how to format excel file using python?

I have a script that scrapes data from list of websites using beautifulSoup package and save in an excel file using pandas and xlsxwriter packages.
What i want is to be able to format the excel file as i need like the width of the columns
but when i run the script it crash and display the below error.
AttributeError: 'NoneType' object has no attribute 'write'
code:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import xlsxwriter
def scrap_website():
url_list = ["https://www.bayt.com/en/international/jobs/executive-chef-jobs/",
"https://www.bayt.com/en/international/jobs/head-chef-jobs/",
"https://www.bayt.com/en/international/jobs/executive-sous-chef-jobs/"]
joineddd = []
for url in url_list:
soup = BeautifulSoup(requests.get(url).content,"lxml")
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com"+ a['href'])
for link in links:
s = BeautifulSoup(requests.get(link).content, "lxml")
### update Start ###
alldd = dict()
alldd['link'] = link
dd_div = [i for i in s.select("div[class='card-content is-spaced'] div")
if ('<dd>' in str(i) ) and ( "<dt>" in str(i))]
for div in dd_div:
k = div.select_one('dt').get_text(';', True)
v = div.select_one('dd').get_text(';', True)
alldd[k] = v
### update End ###
joineddd.append(alldd)
# result
df = pd.DataFrame(joineddd)
df_to_excel = df.to_excel(r"F:\\AIenv\web_scrapping\\jobDesc.xlsx", index = False, header=True)
workbook = xlsxwriter.Workbook(df_to_excel)
worksheet = workbook.add_worksheet()
worksheet.set_column(0, 0,50)
workbook.close()
where is the error and how to fix it ?
To access and format the Excel workbook or worksheet created by to_excel() you need to create an ExcelWriter object first. Something like this:
import pandas as pd
# Create a Pandas dataframe from some data.
df = pd.DataFrame({'Data': [10, 20, 30, 20, 15, 30, 45]})
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1', index=False, header=True)
# Get the xlsxwriter objects from the dataframe writer object.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Set the column width.
worksheet.set_column(0, 0, 50)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
Output:
See Working with Python Pandas and XlsxWriter for more details.
to_excel function returns nothing. It's why you got the error message.
# save excel file
excel_file_name = r"jobDesc.xlsx"
df.to_excel(excel_file_name, index = False, header=True)
# open excel file for change col width or something
workbook = xlsxwriter.Workbook(excel_file_name)
Basically, you can't change existing file with xlsxwriter. There is a way to do so, but it is not recommended. I recommend openpyxl package instead of this. FYI, xlsxwriter: is there a way to open an existing worksheet in my workbook?

Loop through dataframe rows

I want to push data to a sheet.
When i'm printing my code it is working, but on my sheet it prints only the last element of my list.
Here is my code :
import json
from urllib.request import urlopen
import pygsheets
import pandas as pd
with urlopen("myapiurl") as response: source = response.read()
data = json.loads(source)
#authorization
gc = pygsheets.authorize(service_file='myjsonfile')
#open the google spreadsheet
sh = gc.open('Test')
#select the first sheet
wks = sh[0]
# Create empty dataframe
df = pd.DataFrame()
#update the first sheet with df, starting at cell B2.
wks.set_dataframe(df,(1,1))
for item in data["resultsPage"]["results"]["Entry"]:
id = item["event"]["id"]
print(id)
df['id_string'] = id
You can save ids in a list id_s an finally copy it in the colum of DataFrame.
it will work because you don't rewrite the colum.
import json
from urllib.request import urlopen
import pygsheets
import pandas as pd
with urlopen("myapiurl") as response: source = response.read()
data = json.loads(source)
#authorization
gc = pygsheets.authorize(service_file='myjsonfile')
#open the google spreadsheet
sh = gc.open('Test')
#select the first sheet
wks = sh[0]
# Create empty dataframe
df = pd.DataFrame()
#update the first sheet with df, starting at cell B2.
wks.set_dataframe(df,(1,1))
id_s=[]
for item in data["resultsPage"]["results"]["Entry"]:
id = item["event"]["id"]
print(id)
id_s.append(id)
df['id_string'] = id_s

Data validation using openpyxl isnt writing to file - code enclosed

The code to actually write each file runs great. The problem I'm having is that the data validation piece doesn't appear to be doing anything. No drop downs are being created in the range I'm referencing.
Thanks in advance for any and all assistance!
%%time
import pandas as pd
import xlsxwriter as ew
import csv as csv
import os
import glob
import openpyxl
#remove existing files from directory
files = glob.glob(#filename)
for f in files:
os.remove(f)
pendpath = #filename
df = pd.read_sas(pendpath)
allusers = df.UserID_NB.unique()
listuserpath = #filename
listusers = pd.read_csv(listuserpath)
listusers = listusers['USER_ID'].apply(lambda x: str(x).strip())
for id in listusers:
x = df.loc[df['UserID_NB']==id]
path = #filename
x.to_excel(path, sheet_name = str(id), index = False)
from openpyxl import load_workbook
wb = openpyxl.load_workbook(filename = path)
sheet = wb.get_sheet_by_name(str(id))
maxrow = sheet.max_row
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
rangevar = 'R1:T'+ str(maxrow)
dv.ranges.append(rangevar)
wb.save(path)
print str(id), rangevar
Code for Basic Sheet
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
sheet.title = 'testsheet'
path = '#filepath'
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
dv.ranges.append('A1')
wb.save(path)
You are missing to add the dv to the worksheet.
>>> # Add the data-validation object to the worksheet
>>> ws.add_data_validation(dv)
Read the docs about validation

Python - CSV file blank after using CSV writer

I'm very new at this and I'm having trouble writing a CSV file from a webscraping. Can anyone help? Thank you!
import sys
import urllib2
import csv
import requests
from bs4 import BeautifulSoup
#web scraping wind data
r_wind = requests.get('http://w1.weather.gov/data/obhistory/KCQX.html')
html_wind = r_wind.text
soup = BeautifulSoup(html_wind, "html.parser")
table = soup.find('table')
rows_wind = table.findAll('tr')
rows_wind = rows_wind[1:]
#writing to a csv file
csvfile_wind = open("wind.csv","wb")
output_wind = csv.writer(csvfile_wind, delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)
for row in rows_wind:
Date = cells[0].text.strip()
Time = cells[1].text.strip()
Wind = cells[2].text.strip()
output.writerow([Date,Time,Wind])
csvfile_wind.close()

Update Excel worksheet using python xlwt module

I am looking to update my already created Excel worksheet with Json data coming from a browser. The current code generates a new worksheet everytime i run the program.
import requests
import json
import urllib
import xlwt
url = raw_input("Enter url:-")
res = urllib.urlopen(url)
data = res.read()
data1 = json.loads(data)
book = xlwt.Workbook(encoding="utf-8")
sheet1 = book.add_sheet("AssetsReport0")
colunm_count = 0
for title, value in data1.iteritems():
sheet1.write(0, colunm_count, title)
sheet1.write(1, colunm_count, value)
colunm_count += 1
file_name = "test1.xls"%()
book.save(file_name)
What is the best way i could do this?

Categories

Resources