How to fix ValueError in pandas - python

I am moving an application from a classic Tkinter GUI to a Django cloud-based application and am receiving a
ValueError: Invalid file path or buffer object type: <class 'bool'>
when trying to run a function which calls on pandas.
Exception Location: C:\Users\alfor\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\io\common.py in get_filepath_or_buffer, line 232
I have not tried much because I cannot find this same Error in searches.
I do not believe this function even runs AT ALL because my media folder is not getting a new directory where the file would be saved.. but I could be wrong.
The beginning of the function that is having issues looks like this:
def runpayroll():
man_name = 'Jessica Jones'
sar_file = os.path.isfile('media/reports/Stylist_Analysis.xls')
sar_file2 = os.path.isfile('media/reports/Stylist_Analysis.xls')
tips_file = os.path.isfile('media/reports/Tips_By_Employee_Report.xls')
hours_wk1_file = os.path.isfile('media/reports/Employee_Hours1.xls')
hours_wk2_file = os.path.isfile('media/reports/Employee_Hours2.xls')
retention_file = os.path.isfile('media/reports/SC_Client_Retention_Report.xls')
efficiency_file = os.path.isfile('media/reports/Employee_Service_Efficiency.xls')
df_sar = pd.read_excel(sar_file,
sheet_name=0, header=None, skiprows=4)
df_sar2 = pd.read_excel(sar_file2,
sheet_name=0, header=None, skiprows=4)
df_tips = pd.read_excel(tips_file,
sheet_name=0, header=None, skiprows=0)
df_hours1 = pd.read_excel(hours_wk1_file,
header=None, skiprows=5)
df_hours2 = pd.read_excel(hours_wk2_file,
header=None, skiprows=5)
df_retention = pd.read_excel(retention_file, sheet_name=0,
header=None, skiprows=8)
df_efficiency = pd.read_excel(efficiency_file, sheet_name=0,
header=None, skiprows=5)
The only code I have changed from the rest of this function is this which I am assuming does not matter because it is only a file location..
writer = pd.ExcelWriter('/media/payroll.xlsx', engine='xlsxwriter')
and instead of asking the user for a file save location using tkinter I used...
with open(file_path, 'rb') as f:
response = HttpResponse(f, content_type=guess_type(file_path)[0])
response['Content-Length'] = len(response.content)
return response
Expected results are to open a few excel sheets, do some work to the dataframes, and to spit out an excel sheet to the user.

I believe you need change for each file from:
sar_file = os.path.isfile('media/reports/Stylist_Analysis.xls')
to:
sar_file = 'media/reports/Stylist_Analysis.xls'
because os.path.isfile:
Return True if path is an existing regular file. This follows symbolic links, so both islink() and isfile() can be true for the same path.

Related

How do I send my output xls files to a specific path in python?

How do i make my df.to_excel function write to an output path? After my script runs, I do not see the files in the output_path directory i have defined.
import pandas as pd
from openpyxl import load_workbook
import os
import datetime
output_path = 'C:/Users/g/Desktop/autotranscribe/python/Processed'
path = 'C:/Users/g/Desktop/autotranscribe/python/Matching'
cols_to_drop = ['PSI ID','PSIvet Region','PSIvet region num','Fax','County']
column_name_update_map = {'Account name': 'Company Name','Billing address':'Address','Billing city':'City','Billing State':'State'}
for file in os.listdir("C:/Users/g/Desktop/autotranscribe/python/Matching"):
if file.startswith("PSI") and "(updated headers)" not in file:
dfs = pd.read_excel(file, sheet_name=None,skiprows=5)
output = dict()
for ws, df in dfs.items():
if ws.startswith("Cancelled Members"): df = df.drop('Active date', axis=1)
if any(ws.startswith(x) for x in ["New Members","PVCC"]):
continue
#if ws in ["New Members 03.22","PVCC"]: #sheetstoavoid
temp = df
dt = pd.to_datetime(os.path.getctime(os.path.join(path,file)),unit="s").replace(nanosecond=0)
output[ws] = temp
writer = pd.ExcelWriter(f'{file.replace(".xlsx","")} (updated headers).xlsx')
for ws, df in output.items():
df.to_excel(writer, index=None, sheet_name=ws)
writer.save()
writer.close()
I tried df.to_excel(writer,output_path, index=None, sheet_name=ws)
But i get an error
File "", line 36, in
df.to_excel(writer,output_path, index=None, sheet_name=ws)
TypeError: to_excel() got multiple values for argument 'sheet_name'.
A few comments:
The function os.listdir() only returns "unqualified" file names, so before using file, we need to prepend path using something like input_file_name = f'{path}/{file}'.
Similarly, pd.ExcelWriter() will need a qualified file name (that is, including the path as well as the "unqualified" file name), which we can get by doing this: output_file_name = f'{output_path}/{file.replace(".xlsx","")} (updated headers).xlsx'.
There are some elements of the code in your question that may not be getting used, but rather than comment on or change those, I provide a working version with minimal changes below.
I created directories named Matching and Processed. I placed a file named PSI 123.xlsx in Matching with a tab named Cancelled Members containing the following:
will skip
will skip
will skip
will skip
will skip
Col1 Col2 Col3 Active date
xx NY 110 789
I then ran the following modification to your code (note the changes to output_path and path for testing purposes in my environment):
import pandas as pd
from openpyxl import load_workbook
import os
import datetime
#output_path = 'C:/Users/g/Desktop/autotranscribe/python/Processed'
#path = 'C:/Users/g/Desktop/autotranscribe/python/Matching'
output_path = './Processed'
path = './Matching'
cols_to_drop = ['PSI ID','PSIvet Region','PSIvet region num','Fax','County']
column_name_update_map = {'Account name': 'Company Name','Billing address':'Address','Billing city':'City','Billing State':'State'}
for file in os.listdir(path):
if file.startswith("PSI") and "(updated headers)" not in file:
input_file_name = f'{path}/{file}'
dfs = pd.read_excel(input_file_name, sheet_name=None,skiprows=5)
output = dict()
for ws, df in dfs.items():
if ws.startswith("Cancelled Members") and 'Active date' in df.columns: df = df.drop('Active date', axis=1)
if any(ws.startswith(x) for x in ["New Members","PVCC"]):
continue
#if ws in ["New Members 03.22","PVCC"]: #sheetstoavoid
temp = df
dt = pd.to_datetime(os.path.getctime(os.path.join(path,file)),unit="s").replace(nanosecond=0)
output[ws] = temp
output_file_name = f'{output_path}/{file.replace(".xlsx","")} (updated headers).xlsx'
writer = pd.ExcelWriter(output_file_name)
for ws, df in output.items():
df.to_excel(writer, index=None, sheet_name=ws)
writer.save()
writer.close()
After running, the code had created a new file in Processed named PSI 123 (updated headers).xlsx with sheets named as in the input. The sheet Cancelled Members contained the following:
Address State Zip Status Status.1 Date Partner
Col1 Col2 Col3
xx NY 110

Problem in reading mu(μ) character in python

I have one input file in which there is one row where multiple mu(μ) characters are there. Python code just open the file and does some manipulation and we save that file in .csv format. When I save that file in .csv it is producing some weird and funny characters (�). The attached images show the input file and output files when I open in Excel.
Input CSV file:
Output CSV file:
from pathlib import Path
import pandas as pd
import time
import argparse
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('path',
help='define the directory to folder/file')
start = time.time()
def main(path_files):
rs_columns = "SourceFile,RowNum,SampleID,Method,Element,Result".split(",")
rs = pd.DataFrame(columns=rs_columns)
if path_files.is_file():
fnames = [path_files]
else:
fnames = list(Path(path_files).glob("*.csv"))
for fn in fnames:
if "csv" in str(fn):
#df = pd.read_csv(str(fn))
df = pd.read_csv(str(fn), header=None, sep='\n')
df = df[0].str.split(',', expand=True)
else:
print("Unknown file", str(fn))
non_null_columns = [col for col in df.columns if df.loc[:, col].notna().any()]
# loop thru each column for the whole file and create a row of results in the output file
for i in range(1,len(non_null_columns)):
SourceFile = Path(fn.name)
Method = "WetScreening"
Element = df.iloc[1,i]
print(Element)
for j in range(2,len(df)):
RowNum = j+1
Result = df.iloc[j,i]
SampleID = df.iloc[j,0]
rs = rs.append(pd.DataFrame({
"SourceFile": [SourceFile],
"RowNum": [RowNum],
"SampleID": [SampleID],
"Method": [Method],
"Element": [Element],
"Result": [Result]
}),ignore_index=True)
rs.to_csv("check.csv",index=False)
print("Output: check.csv")
if __name__== "__main__":
start = time.time()
args = parser.parse_args()
path = Path(args.path)
main(path)
print("Processed time: ", time.time()-start)
Attach files here
Any help????
Try encoding to utf-8:
rs.to_csv("check.csv",index=False, encoding='UTF-8')
See also Pandas df.to_csv("file.csv" encode="utf-8") still gives trash characters for minus sign
That answer mentions the BOM bytes (0xEF, 0xBB, 0xBF) at the start of the file that acts as a utf-8 signature.
rd.to_csv('file.csv', index=False, encoding='utf-8-sig')

Writing xls from python row wise using pandas

I have sucessfully created .xlsx files using pandas
df = pd.DataFrame([list of array])
'''
:param data: Data Rows
:param filename: name of the file
:return:
'''
df = pd.DataFrame(data)
# my "Excel" file, which is an in-memory output file (buffer)
# for the new workbook
excel_file = BytesIO()
writer = pd.ExcelWriter(excel_file, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1_test')
writer.save()
writer.close()
# important step, rewind the buffer or when it is read() you'll get nothing
# but an error message when you try to open your zero length file in Excel
excel_file.seek(0)
# set the mime type so that the browser knows what to do with the file
response = HttpResponse(excel_file.read(),
content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
# set the file name in the Content-Disposition header
response['Content-Disposition'] = 'attachment; filename=' + filename + '.xlsx'
return response
But I have issue here,
There is unnecessary SNo. which i dont want, how to do I remove that.
There is SNo. as first row and column, How do i remove that?
according to the documentation here
to_excel default set index to write as a new column,
use index as False
df.to_excel(writer, sheet_name='Sheet1_test',index=False)
You can take reference from this https://medium.com/better-programming/using-python-pandas-with-excel-d5082102ca27 post of medium for this.

Try / Except in Python to show which file threw the error

I am writing a py to import in a large amount of files, manipulate them and then output to .csv. Which is cake in Pandas, however I have no control over the files coming in so I am trying to write the script to have an exception on how to handle if files come in the "wrong" way.
Anyway, I am using a Try/Except to show the user that there is a KeyError in one of the files (basicially there is a " in a cell when the datatype is int).
My question is: Is there a way to have the except: bring back the file name of the file that caused the error??
for csv in csvList:
df = pd.read_csv(csv, header=0, skip_blank_lines=True, skipinitialspace=True)\
.dropna(how='all')
try:
df[0] = df[0].astype(int)
df[1] = df[1].astype(int)
df[2] = df[2].astype(int)
df[3] = df[3].astype(int)
report_path = 'UPC_Ready_for_Import'
if not os.path.exists(report_path):
os.makedirs(report_path)
df.to_csv(os.path.join(report_path, csv + '_import.csv'), index=False)
except KeyError:
print('Error within file, please review files')
Assuming csvList contains list of input file paths:
for csv in csvList:
....
try:
...
except KeyError:
print('Error within file {}, please review files'.format(csv))
You could write, something like this, I guess:
for csv in csvList:
df = pd.read_csv(csv, header=0, skip_blank_lines=True, skipinitialspace=True)\
.dropna(how='all')
try:
df[0] = df[0].astype(int)
df[1] = df[1].astype(int)
df[2] = df[2].astype(int)
df[3] = df[3].astype(int)
report_path = 'UPC_Ready_for_Import'
if not os.path.exists(report_path):
os.makedirs(report_path)
file_name = os.path.join(report_path, csv + '_import.csv')
df.to_csv(file_name, index=False)
except KeyError:
print('Error within file', file_name ', please review files')
The main idea is to store the file name in a variable file_name and use it in the except block.

File naming when working with paths

So I have the following code and I'm trying to export a csv and immediately open it in Python.
# define weekly pull code
def GT_Weekly_Run(keys):
# connect to Google
connector = pyGTrends(google_username, google_password)
# make request
connector.request_report(keys, geo="US")
# wait a random amount of time between requests to avoid bot detection
time.sleep(randint(5, 10))
# download file
connector.save_csv(path, '_' + "GT_Weekly" + '_' + keys)
name = path, '_' + "GT_Weekly" + '_' + keys
with open(name + '.csv', 'rt') as csvfile:
csvReader = csv.reader(csvfile)
data = []
data = [row for row in csvReader if row and row[0].startswith("20")]
week_df = pd.DataFrame(data)
cols = ["Date", "Trend"]
week_df.columns = [cols]
The problem is that I'm not able to match the save as file name with the open file name. Have tried a number of things but keep getting errors regarding
IOError: [Errno 2] No such file or directory: 'GT_Weekly_football.csv'
TypeError: can only concatenate tuple (not "str") to tuple
Is there anything that looks off. I just need to go from saving the file as X and using that same name (X) to import it back in.
Thanks!
I would recommend you create a variable to hold the filename. That way, the same name will be used both for creation and loading back.
import os
# define weekly pull code
def GT_Weekly_Run(keys):
# connect to Google
connector = pyGTrends(google_username, google_password)
# make request
connector.request_report(keys, geo="US")
# wait a random amount of time between requests to avoid bot detection
time.sleep(randint(5, 10))
# download file
filename = "_GT_Weekly_" + keys
connector.save_csv(path, filename)
with open(os.path.join(path, filename), 'rt') as csvfile:
csvReader = csv.reader(csvfile)
data = []
data = [row for row in csvReader if row and row[0].startswith("20")]
week_df = pd.DataFrame(data)
cols = ["Date", "Trend"]
week_df.columns = [cols]
It is safer to make use of Python's os.path.join function to create your full file names.
Also take a look at the keys parameter you are passing to GT_Weekly_Run, it should just be a simple string.

Categories

Resources