i made a script that compare datas form diferent sheets, all godd, now i want to add this updates sheet instead of the old one on the entire excel and keeping the other sheets.
import numpy as np
import pandas as pd
from timestampdirectory import createdir
import openpyxl
from openpyxl import workbook
from openpyxl import worksheet
import os
import time
def svnanalysis():
dest = createdir()
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))
dfSvnRepoGroupAccess = pd.read_excel(os.path.join(dest, "SvnRepoGroupAccess.xlsx"))
dfsvnReposSize = pd.read_excel(os.path.join(dest, "svnReposSize.xlsx"))
dfsvnRepoLastChangeDate = pd.read_excel(os.path.join(dest, "svnRepoLastChangeDate.xlsx"))
dfUserDetails = pd.read_excel(r"D:\GIT-files\Automate-Stats\SVN_sample_files\CM_UsersDetails.xlsx")
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
dfSvnRepoGroupAccess.to_excel(xlwriter, sheet_name='SvnRepoGroupAccess', index = False)
dfsvnReposSize.to_excel(xlwriter, sheet_name='svnReposSize', index = False)
dfsvnRepoLastChangeDate.to_excel(xlwriter, sheet_name='svnRepoLastChangeDate',index= False)
xlwriter.close()
whats above its in the same script where i used some xlsx files an create only 1 xlsx with those files as sheets, now below i make some changes in SvnUser sheet and i want to upload it on the excel instead of old sheet SvnUser, and keep the other sheets
# xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-SvnAnalysis.xlsx'))
svnUsers = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="SvnUsers")
details = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'), sheet_name="UserDetails")
svnUsers = svnUsers.assign(SVNaccount=svnUsers["accountName"].isin(details["Account"]).astype(bool))
print(svnUsers)
# dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
# xlwriter.close()
The easiest way to achieve that would be to overwrite the entire excel sheet, for example like this:
import pandas as pd
# create dataframe from excel file
df = pd.read_excel(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
sheet_name='UserDetails'
)
res_df = ..... # calculate the df you want to write
# overwrite excel sheet with dataframe
with pd.ExcelWriter(
'2022-06-15-Usage-SvnAnalysis.xlsx',
engine='openpyxl',
mode='a',
if_sheet_exists='replace'
) as writer:
res_df.to_excel(writer, sheet_name='UserDetails')
Related
import xlrd
import pandas as pd
wb = xlrd.open_workbook("excel_1.xlsx")
sheets = wb.sheet_names()
xl = pd.ExcelFile("excel_1.xlsx")
for sheet in sheets:
df = xl.parse(sheet)
df = df.sort_values(by="column2")
writer = pd.ExcelWriter("excel_2.xlsx")
df.to_excel(writer, sheet_name=sheet, index=False)
writer.save()
In excel_2.xlsx i can only find the sorted sheet of the last sheet of excel_1.xlsx.
Please help me in this regard.
Thank You
Create and save the writer outside of the loop.
You're overwriting the file for each sheet now.
import xlrd
import pandas as pd
wb = xlrd.open_workbook("excel_1.xlsx")
xl = pd.ExcelFile("excel_1.xlsx")
writer = pd.ExcelWriter("excel_2.xlsx")
for sheet in wb.sheet_names():
df = xl.parse(sheet).sort_values(by="column2")
df.to_excel(writer, sheet_name=sheet, index=False)
writer.save()
Here is a multi sheet excel file opened and operated on one sheet taken in a dataframe and then copied back. Now, a new sheet (sheet1) is being created while doing this. Objective however is to overwrite the old target sheet. When I am trying deleting the sheet before pasting data from dataframe, it says 'sheet' does not exist.
Here is the code:
import openpyxl as op
import pandas as pd
basePath = filePath
wbk = op.load_workbook(basePath + "file.xlsx")
writer = pd.ExcelWriter(basePath + "file.xlsx", engine = 'openpyxl', mode="a", if_sheet_exists="replace")
writer.book = wbk
df = pd.read_excel(basePath + "file.xlsx", sheet_name="sheet")
df.insert(0,"newCol2","")
#wbk.remove_sheet(wbk.get_sheet_by_name('sheet'))
df.to_excel(writer, sheet_name = 'sheet', index=False)
writer.save()
writer.close()
What am I doing wrong?
I am trying to save a pandas dataframe as an excel table to a sharepoint site. I have two separate blocks of code which achieve the below.(thanks for Stackoverflow community)
A script which can save a pandas df as excel table using ExcelWriter on local storage.
A Script which can save a local file to Sharepoint online.
I am very confused on how to combine these two to save a df to sharepoint online but the excel file should be a table not just a range of data. Kindly help
SCRIPT 1 to save excel range as table on local
##############################################################################
#
# An example of adding a dataframe to an worksheet table in an xlsx file
# using Pandas and XlsxWriter.
#
# Tables in Excel are used to group rows and columns of data into a single
# structure that can be referenced in a formula or formatted collectively.
#
# SPDX-License-Identifier: BSD-2-Clause
# Copyright 2013-2021, John McNamara, jmcnamara#cpan.org
#
import pandas as pd
# Create a Pandas dataframe from some data.
df = pd.DataFrame({
'Country': ['China', 'India', 'United States', 'Indonesia'],
'Population': [1404338840, 1366938189, 330267887, 269603400],
'Rank': [1, 2, 3, 4]})
# Order the columns if necessary.
df = df[['Rank', 'Country', 'Population']]
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_table.xlsx', engine='xlsxwriter')
# Write the dataframe data to XlsxWriter. Turn off the default header and
# index and skip one row to allow us to insert a user defined header.
df.to_excel(writer, sheet_name='Sheet1', startrow=1, header=False, index=False)
# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Get the dimensions of the dataframe.
(max_row, max_col) = df.shape
# Create a list of column headers, to use in add_table().
column_settings = [{'header': column} for column in df.columns]
# Add the Excel table structure. Pandas will add the data.
worksheet.add_table(0, 0, max_row, max_col - 1, {'columns': column_settings})
# Make the columns wider for clarity.
worksheet.set_column(0, max_col - 1, 12)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
SCRIPT 2 to save any file to sharepoint
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
import os
baseurl = 'https://testsite.sharepoint.com/'
basesite = '/sites/project' # every share point has a home.
siteurl = baseurl + basesite
localpath = "pandas_table.xlsx"
remotepath = "Shared Documents/General/file.xlsx" # existing folder path under sharepoint site.
ctx_auth = AuthenticationContext(siteurl)
ctx_auth.acquire_token_for_user(<username>, <password>)
ctx = ClientContext(siteurl, ctx_auth) # make sure you auth to the siteurl.
with open(localpath, 'rb') as content_file:
file_content = content_file.read()
dir, name = os.path.split(remotepath)
file = ctx.web.get_folder_by_server_relative_url(dir).upload_file(name, file_content).execute_query()
I am not really sure how I can use the writer.save() with the sharepoint connector. Kindly advise
thanks in advance.
Given that you have your data in a df the following code will write to sharepoint using the O365 library.
from io import BytesIO
from tempfile import gettempdir
from O365 import Account, FileSystemTokenBackend
import pandas as pd
O365_CLIENT_ID = "client"
O365_SECRET = "secret"
O365_TENANT_ID = "<name>"
O365_SHAREPOINT = "<name>.sharepoint.com"
O365_SITE = "/sites/..."
def save_file(folder_path, filename, data):
"""save file to O365."""
account = Account(
(O365_CLIENT_ID, O365_SECRET),
auth_flow_type="credentials",
tenant_id=O365_TENANT_ID,
token_backend=FileSystemTokenBackend(
token_path=gettempdir(), token_filename="o365_token.txt"
),
)
if account.authenticate():
drive = (
account.sharepoint()
.get_site(O365_SHAREPOINT, O365_SITE)
.get_default_document_library()
)
subfolders = folder_path.split("/")
if len(subfolders) != 0:
items = drive.get_items()
for subfolder in subfolders:
try:
subfolder_drive = list(filter(lambda x, sf=subfolder: sf in x.name, items))[0]
items = subfolder_drive.get_items()
except Exception as excep: # pylint: disable=broad-except
raise f"Path {folder_path} does not exist." from excep
else:
subfolder_drive = drive.get_root_folder()
subfolder_drive.upload_file(
item=None,
item_name=filename,
stream=data,
stream_size=data.getbuffer().nbytes,
)
with BytesIO() as buf:
df.to_excel(buf, index=False)
buf.seek(0)
save_file('folder/to/upload/to', 'filename.xlsx', buf)
Writing to multiple sheets:
with BytesIO() as buf:
with pd.ExcelWriter( # pylint: disable=abstract-class-instantiated
buf,
engine="xlsxwriter",
) as writer:
df.to_excel(
writer,
sheet_name='sheet1',
index=False,
)
df.to_excel(
writer,
sheet_name='sheet2',
index=False,
)
buf.seek(0)
save_file('folder/to/upload/to', 'filename.xlsx', buf)
my propose:
if excel file not exist, create it and copy data table to it;
if excel file exist, copy to data table to new sheet.
but following code running, only copy to data to new sheet, original sheet in excel file was removed.
import os
import pandas as pd
import openpyxl
f_name = "123.xlsx" #target excel file
if os.path.exists(f_name):
"""if excel file exist, added table to another sheet"""
wb = openpyxl.load_workbook(f_name) #load excel file
writer = pd.ExcelWriter(f_name, engine="openpyxl")
writer.wb = wb
df = pd.DataFrame(pd.read_excel("table_2.xlsx")) #get table to be added excel file
df.to_excel(writer, sheet_name="sheet2",index=False) #write to another sheet
writer.save()
writer.close()
else:
"""if excel file not exit, create it"""
df_1 = pd.DataFrame() # create excel file
df_1.to_excel(f_name)
writer = pd.ExcelWriter(f_name)
df_2 = pd.DataFrame(pd.read_excel("table_1.xlsx")) # get table_1
df_2.to_excel(writer, sheet_name="sheet1",index=False) # write table_1 into excel file
writer.save()
writer.close()
import os
import pandas as pd
import openpyxl
f_name = "123.xlsx" #target excel file
if os.path.exists(f_name):
"""if excel file exist, added table to another sheet"""
wb = openpyxl.load_workbook(f_name) #load excel file
writer = pd.ExcelWriter(f_name, engine="openpyxl") #assign engine
writer.book = wb #overwrite if no this
df = pd.DataFrame(pd.read_excel("table_2.xlsx")) #get table to be added excel file
df.to_excel(writer, sheet_name="table_2",index=False) #write to another sheet
writer.save()
writer.close()
else:
"""if excel file not exit, create it"""
df_1 = pd.DataFrame() # create excel file
df_1.to_excel(f_name)
writer = pd.ExcelWriter(f_name)
df_2 = pd.DataFrame(pd.read_excel("table_1.xlsx")) # get table_1
df_2.to_excel(writer, sheet_name="table_1",index=False) # write table_1 into excel file
writer.save()
writer.close()
I have a script that scrapes data from list of websites using beautifulSoup package and save in an excel file using pandas and xlsxwriter packages.
What i want is to be able to format the excel file as i need like the width of the columns
but when i run the script it crash and display the below error.
AttributeError: 'NoneType' object has no attribute 'write'
code:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import xlsxwriter
def scrap_website():
url_list = ["https://www.bayt.com/en/international/jobs/executive-chef-jobs/",
"https://www.bayt.com/en/international/jobs/head-chef-jobs/",
"https://www.bayt.com/en/international/jobs/executive-sous-chef-jobs/"]
joineddd = []
for url in url_list:
soup = BeautifulSoup(requests.get(url).content,"lxml")
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com"+ a['href'])
for link in links:
s = BeautifulSoup(requests.get(link).content, "lxml")
### update Start ###
alldd = dict()
alldd['link'] = link
dd_div = [i for i in s.select("div[class='card-content is-spaced'] div")
if ('<dd>' in str(i) ) and ( "<dt>" in str(i))]
for div in dd_div:
k = div.select_one('dt').get_text(';', True)
v = div.select_one('dd').get_text(';', True)
alldd[k] = v
### update End ###
joineddd.append(alldd)
# result
df = pd.DataFrame(joineddd)
df_to_excel = df.to_excel(r"F:\\AIenv\web_scrapping\\jobDesc.xlsx", index = False, header=True)
workbook = xlsxwriter.Workbook(df_to_excel)
worksheet = workbook.add_worksheet()
worksheet.set_column(0, 0,50)
workbook.close()
where is the error and how to fix it ?
To access and format the Excel workbook or worksheet created by to_excel() you need to create an ExcelWriter object first. Something like this:
import pandas as pd
# Create a Pandas dataframe from some data.
df = pd.DataFrame({'Data': [10, 20, 30, 20, 15, 30, 45]})
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1', index=False, header=True)
# Get the xlsxwriter objects from the dataframe writer object.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Set the column width.
worksheet.set_column(0, 0, 50)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
Output:
See Working with Python Pandas and XlsxWriter for more details.
to_excel function returns nothing. It's why you got the error message.
# save excel file
excel_file_name = r"jobDesc.xlsx"
df.to_excel(excel_file_name, index = False, header=True)
# open excel file for change col width or something
workbook = xlsxwriter.Workbook(excel_file_name)
Basically, you can't change existing file with xlsxwriter. There is a way to do so, but it is not recommended. I recommend openpyxl package instead of this. FYI, xlsxwriter: is there a way to open an existing worksheet in my workbook?