I've successfully Extracted vbaProject.bin but I'm still getting an error. My goal is to be able to use VBA after the information is passed to the xlsm file. This is a workaround provided by xlswriter.
import pandas as pd
def export(self):
sql = "SELECT WellID,AFEno,AFEsuppno,AccountCode,AFEAmount FROM WellAFEDetails WHERE WellID = ?"
df = pd.read_sql_query(sql,self.con, params=([self.wellid_bx.get()]))
writer = pd.ExcelWriter (r'C:\Users\bjenks\Desktop\Macro.xlsm')
df.to_excel(writer, sheet_name='Raw')
workbook = writer.book
workbook.filename = (r'C:\Users\bjenks\Desktop\Macro.xlsm')
workbook.add_vba_project(r'C:\Users\bjenks\Desktop/vbaProject.bin')
writer.save()
Related
Here is a multi sheet excel file opened and operated on one sheet taken in a dataframe and then copied back. Now, a new sheet (sheet1) is being created while doing this. Objective however is to overwrite the old target sheet. When I am trying deleting the sheet before pasting data from dataframe, it says 'sheet' does not exist.
Here is the code:
import openpyxl as op
import pandas as pd
basePath = filePath
wbk = op.load_workbook(basePath + "file.xlsx")
writer = pd.ExcelWriter(basePath + "file.xlsx", engine = 'openpyxl', mode="a", if_sheet_exists="replace")
writer.book = wbk
df = pd.read_excel(basePath + "file.xlsx", sheet_name="sheet")
df.insert(0,"newCol2","")
#wbk.remove_sheet(wbk.get_sheet_by_name('sheet'))
df.to_excel(writer, sheet_name = 'sheet', index=False)
writer.save()
writer.close()
What am I doing wrong?
I have a script that scrapes data from list of websites using beautifulSoup package and save in an excel file using pandas and xlsxwriter packages.
What i want is to be able to format the excel file as i need like the width of the columns
but when i run the script it crash and display the below error.
AttributeError: 'NoneType' object has no attribute 'write'
code:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import xlsxwriter
def scrap_website():
url_list = ["https://www.bayt.com/en/international/jobs/executive-chef-jobs/",
"https://www.bayt.com/en/international/jobs/head-chef-jobs/",
"https://www.bayt.com/en/international/jobs/executive-sous-chef-jobs/"]
joineddd = []
for url in url_list:
soup = BeautifulSoup(requests.get(url).content,"lxml")
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com"+ a['href'])
for link in links:
s = BeautifulSoup(requests.get(link).content, "lxml")
### update Start ###
alldd = dict()
alldd['link'] = link
dd_div = [i for i in s.select("div[class='card-content is-spaced'] div")
if ('<dd>' in str(i) ) and ( "<dt>" in str(i))]
for div in dd_div:
k = div.select_one('dt').get_text(';', True)
v = div.select_one('dd').get_text(';', True)
alldd[k] = v
### update End ###
joineddd.append(alldd)
# result
df = pd.DataFrame(joineddd)
df_to_excel = df.to_excel(r"F:\\AIenv\web_scrapping\\jobDesc.xlsx", index = False, header=True)
workbook = xlsxwriter.Workbook(df_to_excel)
worksheet = workbook.add_worksheet()
worksheet.set_column(0, 0,50)
workbook.close()
where is the error and how to fix it ?
To access and format the Excel workbook or worksheet created by to_excel() you need to create an ExcelWriter object first. Something like this:
import pandas as pd
# Create a Pandas dataframe from some data.
df = pd.DataFrame({'Data': [10, 20, 30, 20, 15, 30, 45]})
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1', index=False, header=True)
# Get the xlsxwriter objects from the dataframe writer object.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Set the column width.
worksheet.set_column(0, 0, 50)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
Output:
See Working with Python Pandas and XlsxWriter for more details.
to_excel function returns nothing. It's why you got the error message.
# save excel file
excel_file_name = r"jobDesc.xlsx"
df.to_excel(excel_file_name, index = False, header=True)
# open excel file for change col width or something
workbook = xlsxwriter.Workbook(excel_file_name)
Basically, you can't change existing file with xlsxwriter. There is a way to do so, but it is not recommended. I recommend openpyxl package instead of this. FYI, xlsxwriter: is there a way to open an existing worksheet in my workbook?
I'm attempting to teach myself python skills and took an awesome tutorial from Giraffe Academy and using some of the skills.
I created a file called country.xlsx and installed xlsxwriter to read, validate and create a dropdown box using this tutorial - https://xlsxwriter.readthedocs.io/example_data_validate.html
When I run or debug the code below,
import xlsxwriter
workbook = xlsxwriter.workbook("Countries.xlsx")
worksheet = workbook.sheet_by_name("Sheet1")
workbook = worksheet.set_column("A:A")
workbook = worksheet.set_column("B:B")
workbook = worksheet.set_column("C:C")
workbook = worksheet.set_column("D:D")
workbook - worksheet.set.row(0, 6)
heading1 = "Continent"
heading2 = "Country"
heading3 = "Capital"
heading4 = "Airline"
workbook = worksheet("A1", {heading1})
workbook = worksheet("B1", {heading2})
workbook = worksheet("C1", {heading3})
workbook = worksheet("D1", {heading4})
txt = "Select from the Dropdown List"
workbook = worksheet.data_validation("B15", {"validate": "list", "source" : "=$A$1:$D$7"})
workbook.close()
I receive this error
workbook = xlsxwriter.workbook('Countries.xlsx')
TypeError: 'module' object is not callable
Can someone point me in the right direction??
Looking at the Documentation it seems like you have to use Workbook with a capital W:
workbook = xlsxwriter.Workbook('Countries.xlsx')
This program should take the contents of individual sheets and put them into one excel workbook. It almost does that, but it is overwriting instead of appending new sheets into the final workbook. I read that pandas excel writer is the way to go with this, any ideas as to why its having this behavior?
import xlwt, csv, os
from openpyxl import load_workbook
import pandas as pd
from pandas import ExcelWriter
csv_folder = r'C:\Users\Me\Desktop\Test_Folder\\'
for fil in os.listdir(csv_folder):
if '.xlsx' not in fil:
continue
else:
pass
df = pd.read_excel(csv_folder+fil, encoding = 'utf8')
file_name = fil.replace('.xlsx','')
writer = pd.ExcelWriter('condensed_output.xlsx', engine = 'xlsxwriter')
df.to_excel(writer, sheet_name = file_name)
writer.save()
#writer.close()
Make sure the writer.save() is outside of the loop. Also be aware of the character limit on sheetnames, so if the file names are the same up to a certain point, you run the risk of writing over a sheetname that way as well.
import xlwt, csv, os
from openpyxl import load_workbook
import pandas as pd
from pandas import ExcelWriter
csv_folder = r'C:\Users\Me\Desktop\Test_Folder\\'
writer = pd.ExcelWriter('condensed_output.xlsx', engine = 'xlsxwriter')
for fil in os.listdir(csv_folder):
if '.xlsx' not in fil:
continue
else:
pass
df = pd.read_excel(csv_folder+fil, encoding = 'utf8')
file_name = fil.replace('.xlsx','')
df.to_excel(writer, sheet_name = file_name)
writer.save() #make sure this is outside of the loop.
ETA: establish the writer outside of the loop as well
I am new to Python programming and seeking for some help/guidance in correcting my python code.
Here my query is.
I have one Excel file which has (7 Tabs).
I have one Folder which contains 7 different text file and each text file contains respective Tab SQL Query and each text file name is same as the Tab Name which is available in Excel File.
I have a written a Python code to loop through all the text file one by one and execute that each text file SQL query and whatever data will come in output that output data should dump into existing excel file in that respective sheet/tab. i am using pandas to do this, however, code is working fine but while updating data into excel pandas is removing all existing sheets from the file and updating only current output data into excel file.
Example: if Python code execute a text file(Filename: Data) and after executing this SQL query we got some data and this data should dump into excel file (sheetname: Data).
<pre><code>
import pypyodbc
import pandas as pd
import os
import ctypes
from pandas import ExcelWriter
fpath = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries"
xlfile = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries\Open_Case_Data.xlsx"
cnxn = pypyodbc.connect('Driver={SQL Server};Server=MyServerName;Database=MyDatabaseName;Trusted_Connection=Yes')
cursor = cnxn.cursor()
for subdir, dirs, files in os.walk(fpath):
for file in files:
#print(os.path.join(subdir,file))
filepath = os.path.join(subdir,file)
#print("FilePath: ", filepath)
if filepath.endswith(".txt"):
if file != "ClosedAging_Cont.txt":
txtdata = open(filepath, 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
if file == "ClosedAging.txt":
txtdata = open(os.path.join(subdir,"ClosedAging_Cont.txt"), 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
col = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
df = pd.DataFrame(list(data),columns=col)
#save_xls(df,xlfile)
writer = pd.ExcelWriter(xlfile)
flnm = file.replace('.txt','').strip()
df.to_excel(writer,sheet_name=flnm,index=False)
writer.save()
print(file, " : Successfully Updated.")
else:
print(file, " : Ignoring this File")
else:
print(file, " : Ignoring this File")
ctypes.windll.user32.MessageBoxW(0,"Open Case Reporting Data Successfully Updated","Open Case Reporting",1)
</pre></code>
By looping through the text files, you overwrite the Excel file inside the loop each time. Instead instantiate pd.ExcelWriter(xlfile) and call writer.save() outside the loop.
The following example is adapted from the xlswriter documentation
You can find more information about multiple sheets here: xlswriter documentaion - multiple sheets
import pandas as pd
# Create a Pandas Excel writer using XlsxWriter as the engine outside the loop.
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Sample loop, replace with directory browsing loop
for i in range(7):
# Sample Pandas dataframe. Replace with SQL query and resulting data frame.
df = pd.DataFrame({'DataFromSQLQuery': ['SQL query result {0}'.format(i)]})
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet{0}'.format(i))
# Close the Pandas Excel writer and output the Excel file.
writer.save()
The following code addresses the concrete question but is untested.
import pypyodbc
import pandas as pd
import os
import ctypes
from pandas import ExcelWriter
fpath = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries"
xlfile = r"C:\MNaveed\DataScience\Python Practice New\SQL Queries\Open_Case_Data.xlsx"
cnxn = pypyodbc.connect('Driver={SQL Server};Server=MyServerName;Database=MyDatabaseName;Trusted_Connection=Yes')
cursor = cnxn.cursor()
# Create a Pandas Excel writer using XlsxWriter as the engine outside the loop
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# File loop
for subdir, dirs, files in os.walk(fpath):
for file in files:
filepath = os.path.join(subdir,file)
if filepath.endswith(".txt"):
if file != "ClosedAging_Cont.txt":
txtdata = open(filepath, 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
if file == "ClosedAging.txt":
txtdata = open(os.path.join(subdir,"ClosedAging_Cont.txt"), 'r')
script = txtdata.read().strip()
txtdata.close()
cursor.execute(script)
col = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
# Data frame from original question
df = pd.DataFrame(list(data),columns=col)
# Convert the dataframe to an XlsxWriter Excel object
flnm = file.replace('.txt','').strip()
df.to_excel(writer, sheet_name=flnm, index=False)
print(file, " : Successfully Updated.")
else:
print(file, " : Ignoring this File")
else:
print(file, " : Ignoring this File")
# Close the Pandas Excel writer and output the Excel file
writer.save()
ctypes.windll.user32.MessageBoxW(0,"Open Case Reporting Data Successfully Updated","Open Case Reporting",1)