Python. openpyxl. [Errno 13] Permission denied - python

I'm making a script that, based on input variables from an excel file (script.xlsm), connects to the database, processes the data, and at the output it should add the processed data to a new sheet in the script.xlsm file. Code execution result error:
import pandas as pd
from openpyxl import load_workbook
def matrixf():
ex = pd.read_excel('C:\\Users\\admin\\Documents\\Project Python\\script.xlsm', sheet_name='Sheet1')
file_path = ex['Unnamed: 2'][1]
bank = ex['Unnamed: 2'][2]
obligations_type = ex['Unnamed: 2'][3]
date1='2020-01-01'
date2='2019-01-01'
df = pd.read_csv(file_path, sep=';', encoding = 'cp1251')
df.columns = ['id', 'date', 'delay_day', 'oblig_type', 'bank']
df.date = pd.to_datetime(df.date, format='%d/%m/%Y')
df = df[df.bank == bank]
df = df[df.oblig_type==obligations_type]
def delay_category(val):
if val == 0:
return 'Без просрочки'
elif 0 < val <= 30:
return '0 - 30 days'
elif 30 < val <= 60:
return '31-60 days'
elif 60 < val <= 90:
return '61-90 days'
else:
return '91+ days'
df['delay_cat'] = df['delay_day'].apply(delay_category)
df = df.drop(columns=['oblig_type', 'bank', 'delay_day'])
df1 = df[df['date'] == date1]
df2 = df[df['date'] == date2]
suff1 = '_'+date1
suff2 = '_'+date2
df_final = df1.merge(df2, on='id', how='outer', suffixes=(suff1, suff2))
matrix = df_final.pivot_table(index=['date'+suff1, 'delay_cat'+suff1],
columns=['date'+suff2, 'delay_cat'+suff2],
values='id',
aggfunc='count')
book = load_workbook('script.xlsm')
writer = pd.ExcelWriter('script.xlsm', mode='a', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
matrix.to_excel(writer, "matrix")
writer.save()
[Errno 13] Permission denied: 'script.xlsm'

Related

How to use a condition to filter information in one column at the same time use other condition in the same columnI?

import os
import pandas as pd
def concat_df(file_list):
df = pd.DataFrame()
for f in file_list:
tmp_df = pd.read_csv(f)
df = pd.concat((df, tmp_df), axis=0)
return df
root_dir = 'ce survey'
if os.path.exists('total.csv'):
os.remove('total.csv')
years = list(range(2012, 2022))
for year in years:
print(year)
dir_path = os.path.join(root_dir, str(year))
files = os.listdir(dir_path)
expd_files = []
fmld_files = []
fmli_files = []
memd_files = []
for file in files:
if file.endswith('.csv'):
fp = os.path.join(dir_path, file)
if 'expd' in file:
expd_files.append(fp)
elif 'fmld' in file:
fmld_files.append(fp)
elif 'fmli' in file:
fmli_files.append(fp)
elif 'memd' in file:
fmli_files.append(fp)
expd_df = concat_df(expd_files)
fmld_df = concat_df(fmld_files)
fmli_df = concat_df(fmli_files)
memd_df = concat_df(memd_files)
expd_df = expd_df.loc[expd_df['UCC'] == 180620 ][['NEWID', 'COST', [['UCC']=290420,320130,410901], 'EXPNYR']]
if year == 2015 or year == 2016 or year == 2017:
fmld_df = fmld_df[['NEWID', 'REGION', 'FWAGEX', 'INC_RANK', 'HISP_REF', 'HORREF1', 'HORREF2', 'RACE2', 'REF_RACE', 'EDUC_REF', 'FAM_SIZE', 'INCLASS','FAM_TYPE','CHILDAGE',]]
elif year == 2018 or year == 2019 or year == 2020 or year == 2021:
fmld_df = fmld_df[
['NEWID', 'REGION', 'FWAGEX', 'INC_RANK', 'HISP_REF', 'HORREF1', 'HORREF2', 'RACE2', 'REF_RACE', 'EDUC_REF', 'FAM_SIZE','FAM_TYPE','CHILDAGE',]]
else:
fmld_df = fmld_df[['NEWID', 'REGION', 'FWAGEX', 'INC_RANK', 'HISP_REF', 'HORREF1', 'HORREF2', 'RACE2', 'REF_RACE', 'EDUC_REF', 'FAM_SIZE', 'FINCAFTX', 'INCLASS']]
if year == 2012:
fmli_df = fmli_df[['NEWID', 'INCLASS2', 'RACE2']]
else:
fmli_df = fmli_df[['NEWID', 'INCLASS2', 'RACE2', 'FINATXEM']]
memd_df = memd_df[['NEWID','CU_CODE1','EMPLTYPE', 'SEX', 'OCCUEARN', 'OCCULIST', 'AGE', 'WKS_WRKD', 'MARITAL']]
expd_df = pd.merge(left=expd_df, right=fmld_df, on='NEWID', how='left')
expd_df = pd.merge(left=expd_df, right=fmli_df, on='NEWID', how='left')
expd_df = pd.merge(left=expd_df, right=memd_df, on='NEWID', how='left')
if year == 2012:
expd_df['FINATXEM'] = ''
if year == 2015 or year == 2016 or year == 2017:
expd_df['FINCAFTX'] = ''
elif year == 2018 or year == 2019 or year == 2020 or year == 2021:
expd_df['INCLASS'] = ''
expd_df['FINCAFTX'] = ''
expd_df['YEAR'] = year
expd_df = expd_df[['NEWID', 'COST', 'UCC', 'EXPNYR', 'REGION', 'FWAGEX', 'INC_RANK',
'HISP_REF', 'HORREF1', 'HORREF2', 'RACE2_x', 'REF_RACE', 'EDUC_REF',
'FAM_SIZE', 'FINCAFTX', 'INCLASS', 'INCLASS2', 'RACE2_y', 'FINATXEM']]
if year == 2012:
expd_df.to_csv('total.csv', mode='a', index=False)
else:
expd_df.to_csv('total.csv', mode='a', index=False, header=None)
I'm trying to get information to see how other factors could influence the cost of 180620. 290420,320130,410901 are in the same column with 180620. I tried to use 180620 as the condition to filter other information and 290420,320130,410901 should be contained as other information. In the sheet of expd, UCC is the expenses of NEWIDS, different UCC code could be paired with same newid. I and trying to find NEWIDS paired with 180620 first and get the cost of 180620, then see if the NEWIDS' cost on 290420,320130,410901. In the end, merge all the information from different files into one CSV sheet.
Thank you for your time to look at this. This problem bothers me for really a long time.

How can I overwrite existing excel sheets with new data, for a new companies data

def auto_program(ticker, file_name, your_name):
ticker_df = ticker.upper()
company_name = quandl.get_table('SHARADAR/TICKERS', ticker=ticker, table='SF1')['name'][0]
today = datetime.today().strftime('%Y-%m-%d')
header = {'label': ['Company Name', 'Ticker Symbol', 'Created By', 'Last Retrieved'],
'value': [company_name, ticker_df, your_name, today]}
header_df = pd.DataFrame(header)
initial_quarterly = quandl.get_table('SHARADAR/SF1',dimension='ARQ', ticker=ticker)
initial_quarterly['calendardate'] = pd.to_datetime(initial_quarterly['calendardate'])
ordered_quarterly = initial_quarterly.sort_values(by='calendardate', ascending = True)
initial_annual = quandl.get_table('SHARADAR/SF1',dimension='ARY', ticker=ticker)
initial_annual['calendardate'] = pd.to_datetime(initial_annual['calendardate'])
ordered_annual = initial_annual.sort_values(by='calendardate', ascending = True)
fixed_quarterly = ordered_quarterly.drop(['assetsavg'], axis=1)\
.swapaxes('index', 'columns', copy=True)
idx = [2] + [i for i in range(len(fixed_quarterly)) if i != 2]
fixed_annual = ordered_annual.drop(['assetsavg'], axis=1)\
.swapaxes('index', 'columns', copy=True)
idx = [2] + [i for i in range(len(fixed_annual)) if i != 2]
annual_pd = fixed_annual.iloc[idx]
quarterly_pd = fixed_quarterly.iloc[idx]
# '/Users/user/Downloads/' is a specific local path to file, will need to change for others
book = load_workbook('/Users/user/Downloads/' + file_name+ '.xlsx')
writer = pd.ExcelWriter(file_name+'.xlsx', engine='openpyxl')
writer.book = book
header_df.to_excel(writer,sheet_name='Header', index = False,header= False)
annual_pd.to_excel(writer,sheet_name='Annual', index = True,header= False)
quarterly_pd.to_excel(writer,sheet_name='Quarterly', index = True,header= False)
writer.save()
I would like to be able to run this program more than once and have a new ticker symbol in the input, and have the header, annual, and quarterly sheets simply be over written with new data. How can this be done?

Dynamically iterate through multiple sheets while writing multiple dfs to excel using Excelwriter/xlsx writer

This is a sample code to write 2 dataframes in sheet Result for a workbook which works perfectly fine.I want to write various iterations of such dfs to multiple sheets generated dynamically.
How do I do it?
index sheet :-[index][1]
def write_dataframes_to_excel_sheet(dataframes, dir, name):
with pd.ExcelWriter(f'{dir}/{name}.xlsx', engine='xlsxwriter') as writer:
workbook = writer.book
worksheet = workbook.add_worksheet('Result')
writer.sheets['Result'] = worksheet
COLUMN = 0
row = 0
for df in dataframes:
worksheet.write_string(row, COLUMN, df.name)
row += 1
df.to_excel(writer, sheet_name='Result',
startrow=row, startcol=COLUMN)
row += df.shape[0] + 2
# Create sample dataframes
df1 = pd.DataFrame([(1, 2, 3), (4, 5, 6)], columns=('A', 'B', 'C'))
df1.name = "Dataframe1"
df2 = pd.DataFrame([(7, 8, 9), (10, 11, 12)], columns=('A', 'B', 'C'))
df2.name = "Dataframe2"
dataframes = [df1, df2]
write_dataframes_to_excel_sheet(dataframes, 'C:/Users/path', 'bar')
I tried this way it didn't work,prints 2 dfs one below the other only for the last most id
------------------------------------------------------------------------
book = xlrd.open_workbook("input1.xlsx")
first_sheet = book.sheet_by_index(0)
writer = pd.ExcelWriter("Output1.xlsx",engine='xlsxwriter')
def write_dataframes_to_excel_sheet(dataframes, dir, name,writer):
#with pd.ExcelWriter(f'{dir}/{name}.xlsx', engine='xlsxwriter') as writer:
workbook = writer.book
worksheet = workbook.add_worksheet(str(id))
writer.sheets[str(id)] = worksheet
COLUMN = 0
row = 0
for df in dataframes:
#worksheet.write_string(row, COLUMN, df.name)
row += 1
df.to_excel(writer, sheet_name=str(id),
startrow=row, startcol=COLUMN)
row += df.shape[0] + 2
with pd.ExcelWriter('output1.xlsx', engine='xlsxwriter') as writer:
for i in [x for x in range(first_sheet.nrows) if x != 0]:
id = int(first_sheet.cell(i,0).value)
strt_row_num=1
output_array= []
for remarks_row in range(3,6):
remarks = first_sheet.cell(i,remarks_row).value
sql_query = '''select * from ( some query) '''.format(id)
output = pd.DataFrame()
for df in pd.read_sql(sql_query, conn, chunksize=10000):
output = output.append(df)
output.columns=output.columns.str.strip()
output_array.append(output)
#output_array=tuple(output_array)
strt_row_num=strt_row_num +len(output) +3
write_dataframes_to_excel_sheet(output_array, 'C:/Users/path','output1',writer)
[1]: https://i.stack.imgur.com/lE5UO.png

Python generated Excel file only shows one row of data vs multiple rows

I am trying to write the results from the loop into an Excel file (keys = column names) and (values = rows data). This code generates the file for me, but it only prints one row of data in the file. How can i make it append the other rows to the file?
import pandas as pd
p = (('BusinessName', 'CustomerNameToSearch'), ('PageSize', '2'), ('CountryCode', 'CA'))
prepare_link = requests.get('https://api.myapiloopuplink?', auth=BearerAuth('PMay4TY5K577b76154i97yC9DlbPytqd'), params=p)
test = requests.get(prepare_link.url, auth=BearerAuth('PMay4TY5K577b76154i97yC9DlbPytqd'), params=p)
data = json.loads(test.text)
CustomerIdList = []
for customer in data['Data']:
BusinessID = customer['BusinessId']
BusinessName = customer['BusinessName']
CustomerIdList.append(str(customer['BusinessId']))
for i in CustomerIdList:
links2 = ("https://api.myapiloopuplink/"+i+"/History?count=1")
test2 = requests.get(links2, auth=BearerAuth('PMay4TY5K577b76154i97yC9DlbPytqd'))
data2 = json.loads(test2.text)
start_row = 0
for extradetails in data2['Data']:
myDict = {}
myDict["BusinessId"] = customer['BusinessId']
myDict["BusinessName"] = customer['BusinessName']
myDict["Year"] = extradetails['Year']
myDict["Rate"] = extradetails['Rate']
print(myDict)
k = list(myDict.keys())
v = list(myDict.values())
#print(k)
#print(v)
x = [myDict]
df = pd.DataFrame(x)
df.to_excel ('locationandnameoffile.xlsx', sheet_name = 'sheet1', index = False, startrow=start_row)
start_row = start_row + len(df) + 1
This is the output i currently get
This is the output i am trying to get
In the loop i get the right results when i print (it shows multiple rows)
print(myDict)
I think the problem is here:
for extradetails in data2['Data']:
myDict = {}
myDict["BusinessId"] = customer['BusinessId']
myDict["BusinessName"] = customer['BusinessName']
myDict["Year"] = extradetails['Year']
myDict["Rate"] = extradetails['Rate']
print(myDict)
k = list(myDict.keys())
v = list(myDict.values())
#print(k)
#print(v)
x = [myDict]
df = pd.DataFrame(x) #problem
df.to_excel ('locationandnameoffile.xlsx', sheet_name = 'sheet1', index = False, startrow=start_row)#problem
start_row = start_row + len(df) + 1
You are creating an excel file in every loop. How about create an excel file after the loop completes. like this:
datas=[]
for extradetails in data2['Data']:
myDict = {}
myDict["BusinessId"] = customer['BusinessId']
myDict["BusinessName"] = customer['BusinessName']
myDict["Year"] = extradetails['Year']
myDict["Rate"] = extradetails['Rate']
print(myDict)
k = list(myDict.keys())
v = list(myDict.values())
#print(k)
#print(v)
datas.append([myDict])
start_row = start_row + len(df) + 1
df = pd.DataFrame(datas)
df.to_excel ('locationandnameoffile.xlsx', sheet_name = 'sheet1', index = False, startrow=start_row)

Pandas read_csv not reading the file (while-loop)

I am trying to read several (~30) csv-sheets i have stored on my PC.
i=2
Liste1 = []
Liste2 = []
x = 0
while i < 32:
string = str(i)
if i < 10:
try:
Name = 'D:\\FTPDaten\\2020\\Alle\\2020010'+string+'.csv'
Tabelle = pd.read_csv(Name, sep=';', decimal=",", header=0, usecols=[7, 20])
Tabelle.columns = ['AC', 'DC']
neueTabelle1 = Tabelle['AC']
neueTabelle = Tabelle['DC']
Schleifenlaenge = len(neueTabelle)
j = 0
del(Tabelle)
while j < Schleifenlaenge:
Datenwert1 = neueTabelle.iloc[j]
Datenwert2 = neueTabelle1.iloc[j]
Liste1.append(Datenwert1)
Liste2.append(Datenwert2)
j = j + 1
except FileNotFoundError:
i=i+1
elif i >= 10 and i < 32:
try:
Name = 'D:\\FTPDaten\\2020\\Alle\\202001' + string + '.csv'
Tabelle = pd.read_csv(Name, sep=';', decimal=",", header=0, usecols=[7, 20])
Tabelle.columns = ['AC', 'DC']
neueTabelle1 = Tabelle['AC']
neueTabelle = Tabelle['DC']
Schleifenlaenge = len(neueTabelle)
j = 0
while j < Schleifenlaenge:
Datenwert1 = neueTabelle1.iloc[j]
Datenwert2 = neueTabelle.iloc[j]
Liste1.append(Datenwert1)
Liste2.append(Datenwert2)
j = j + 1
except FileNotFoundError:
i = i+1
i = i + 1
When the while-loop is running for the first time everything works fine. When it comes into the first iteration, the read_csv does not read the file like it did before. I would expect to get a DataFrame where the 7th and 20th column is stored. But i do get a DataFrame with no content at all - just the header.
I tried a lot, but certainly i can´t fix it.
The issue was with how it was reading in the ';' in the other files (except the first one). If you open them in excel, you might be able to see what I'm talking about. So what you'll need to do is skip those rows at the beginning of the file.
import pandas as pd
Liste1 = []
Liste2 = []
for i in range(2,32):
skipRows = 7
if i != 2:
skipRows += 1
if i < 10:
try:
Name = 'D:\\FTPDaten\\2020\\Alle\\2020010{string}.csv'.format(string=i)
Tabelle = pd.read_csv(Name, sep=';', decimal=",", header=0, usecols=[7, 20], skiprows=skipRows)
Tabelle.columns = ['AC', 'DC']
if i < 10:
Datenwert1 = list(Tabelle['DC'])
Datenwert2 = list(Tabelle['AC'])
elif i >= 10 and i < 32:
Datenwert1 = list(Tabelle['AC'])
Datenwert2 = list(Tabelle['DC'])
Liste1 += Datenwert1
Liste2 += Datenwert2
except FileNotFoundError as e:
print(e)
df = pd.DataFrame({'col1':Datenwert1, #<-- change 'col1', 'col2' to whatever you want to name them
'col2':Datenwert2})
Try creating a new dataframe here instead of iterate over the existing one
cols = ['AC', 'DC']
new_Tabelle = pd.DataFrame(columns = cols)
new_Tabelle['AC']=Tabelle['AC']
new_Tabelle['DC']=Tabelle['DC']
I don't have 30 semi-colon delimited files. However this can be so easily simplified to only pick up files that exist and match a pattern using glob
import pandas as pd
from pathlib import Path
import random
for i in range(30):
with open(f"2020010_os_{i}.csv", "w") as fp: fp.write(f"id;val\n{i};{random.randint(10,20)}\n")
pd.concat([pd.read_csv(fn, sep=";") for fn in Path().cwd().glob("2020010*.csv")])

Categories

Resources