Print Unknown Length of Table to a CSV file using Python/Pandas - python

I have table I want to print out to a csv file using pandas. This table was extracted from a different excel file. The problem I have is that the table length is unknown. How should I print out this table to the csv file to show all of it and not on just on one line
for x in ABC:
print()
print(f"{x}:")
try:
df = pd.read_csv(x + "/File.csv")
df_filter= df[['A','B','C', "D", "E"]]
if df_filter['D'].str.contains('Fail').any():
noSC= df_filter[df_filter.DUTId != 'SC_INFO']
finalTable= noSC[noSC.D == 'Fail']
if finalTable.empty:
print("Did not complete")
sheet1['A16'] = finalTable
else:
filterTable= finalTable[['A','B','C', "E"]]
fullfinalTable = filterTable.to_string()
print(fullfinalTable)
else:
print("Run Successful")
except FileNotFoundError:
print("File does not exist")
I know that sheet1['A16'] = finalTable is wrong, but I unsure what I would do instead of that. It does output the table but only on A16, so it is a long line. Is there anyway to have the unknown table formatted into the new excel?

Try this instead.
from pathlib import Path
import pandas as pd
dir_path = r"yourFolderPath"
files_list = [str(p) for p in dir_path.glob("**/*.csv")]
if files_list:
source_dfs = [pd.read_csv(file_) for file_ in files_list]
df = pd.concat(source_dfs, ignore_index=True)
df = df[['A','B','C', "D", "E", "DUTId"]]
if df['D'].str.contains('Fail').any():
df = df_filter[df_filter.DUTId != 'SC_INFO']
finalTable = df[df.D == 'Fail']
if finalTable.empty:
print("Did not complete. Dataframe is empty.")
else:
print("Dataframe written to .csv")
finalTable = finalTable[['A','B','C','E']]
finalTable.to_csv(dir_path + r"/finaltable.csv")
else:
print(f"No .csv files in {dir_path}")

Related

Pandas loop through sheets without sheet name

I have a couple of excel files with different sheet names
Was wondering if its possible to iterate through all of the sheet names without giving a variable "sheetname"
Right now it is through input selection.. I have tried to search before posting but haven't found or figured out how to work this out, help is appreciated.
import pandas as pd
# Look for sheet names
file = file_name
df = pd.ExcelFile(file).sheet_names
# Filter sheets
counter = 0
sheets = [""]
for sheet in df:
if sheet[0] == "Δ" or sheet == "Log Data":
pass
else:
counter += 1
sheets.append(sheet)
print(f"{sheets[counter]} - {counter}")
# Sheet selection
try:
x = int(input("Select Sheet Number: "))
assert x in range(1, counter + 1), "Select a value from list"
except ValueError as err:
logger.error(err)
raise
else:
df = pd.read_excel(f"{file}", f"{sheets[x]}")
finally:
print(f"{sheets[x]} Selected")

Pandas .to_csv() gives "No engine for filetype: 'csv'"

Basically, the title. The pd.Dataframe.to_csv() method works fine on my machine outside of the app code, but if I try to run it in a pyqt5 gui, I get the following error: No engine for filetype: 'csv'. Below is the function that causes the error. Note: if the user gives an .xlsx file, the .to_excel() function works fine. The file name the function uses is passed through a QFileDialog object. Here is the function causing the problem.
def writeToFile(self, file, rows):
try:
new_df = pd.DataFrame(rows.items(), columns = ['Company Name', 'File Number'] )
if self.stack.currentIndex() == 0:
if file[-4:] == '.csv':
df = pd.read_csv(file)
new_df = pd.concat([df, new_df])
else:
df = pd.read_excel(file)
new_df = pd.concat([df, new_df])
if file[-4] == '.csv':
new_df.to_csv(file, index = False, compression = None)
else:
new_df.to_excel(file)
if self.flag_uids == []:
return "All emails were written to a file"
else:
s = "All emails possible were written to a file. There were " + str(len(self.flag_uids)) + " messages unable to be interpreted"
return s
except Exception as e:
return e
maybe the code is confusing the extensions, try to separate things.
def writeToFile(self, file, rows):
try:
new_df = pd.DataFrame(rows.items(), columns = ['Company Name', 'File Number'] )
if self.stack.currentIndex() == 0:
if file[-4:] == '.csv':
df_csv = pd.read_csv(file)
new_df_csv = pd.concat([df_csv, new_df])
else:
df_excel = pd.read_excel(file)
new_df_excel = pd.concat([df_excel, new_df])
if file[-4] == '.csv':
new_df_csv.to_csv(file, index = False, compression = None)
else:
new_df_excel.to_excel(file)
if self.flag_uids == []:
return "All emails were written to a file"
else:
s = "All emails possible were written to a file. There were " + str(len(self.flag_uids)) + " messages unable to be interpreted"
return s
except Exception as e:
return e

Python KeyError when Comparing two DataFrames

I have extracted data from a CSV file and want to use it update values in a spreadsheet. The dataframe has a column 'ticker' of values. I want to check the existing values in the spreadsheet, and if the CSV has a new value add the new value to the spreadsheet.
if df_xls.empty:
df_xls = df_xls.append(pd.DataFrame({"ticker": [[reduced.ticker[0]]]}), ignore_index=True)
wtw = 1
print(reduced.columns.values)
print(df_xls.columns.values)
for csv_row in reduced:
for xls_row in df_xls:
if reduced.ticker[csv_row] == df_xls.ticker[xls_row]:
wtw = 0
break
else:
next(xls_row)
if wtw == 1:
df_xls = df_xls.append(pd.DataFrame({"ticker": [[reduced.ticker[csv_row]]]}), ignore_index=True)
next(csv_row)
I am getting a "KeyError: 'ticker'" in reference to the line "if reduced.ticker[csv_row] == df_xls.ticker[xls_row]:" I don't understand the error given the column names are correct. The print outputs above display:
['ticker' '2021-02-01 shares' '2021-02-01 value']
['ticker']
Thanks in advance.
Edit --
I do not have the code available at a URL, but here is the entirety of the script:
import numpy as np
import pandas as pd
filename = "2021-02-01-FULLREPORT.csv"
##load new information from CSV into dataframe##
df_csv = pd.read_csv(filename)
prefix = filename[0:10]
ticker = df_csv.ticker
shares = df_csv.shares
value = df_csv["market value($)"]
reduced = pd.DataFrame({
"ticker": ticker,
prefix +" shares": shares,
prefix +" value": value
})
##end load new information from CSV into dataframe##
##load excel
from pandas import ExcelWriter
from pandas import ExcelFile
df_xls = pd.read_excel('file.xlsx')
##update ticker list with information saved in reduced##
if df_xls.empty:
df_xls = df_xls.append(pd.DataFrame({"ticker": [[reduced.ticker[0]]]}), ignore_index=True)
wtw = 1
print(reduced.columns.values)
print(df_xls.columns.values)
for csv_row in reduced:
for xls_row in df_xls:
if reduced.ticker[csv_row] == df_xls.ticker[xls_row]:
wtw = 0
break
else:
next(xls_row)
if wtw == 1:
df_xls = df_xls.append(pd.DataFrame({"ticker": [[reduced.ticker[csv_row]]]}), ignore_index=True)
next(csv_row)
print (df_xls)
This resolved the KeyError:
##load excel
from pandas import ExcelWriter
from pandas import ExcelFile
df_xls = pd.read_excel('file.xlsx')
##update ticker list saved in reduced##
for csv_row in reduced.index:
wtw = 1
print ("testing " + reduced.ticker[csv_row])
for xls_row in df_xls.index:
print ("comparing "+ reduced.ticker[csv_row] + "with ")
print (df_xls.ticker[xls_row])
if reduced.ticker[csv_row] == df_xls.ticker[xls_row]:
print ("match found")
wtw = 0
break
if wtw == 1:
df_xls = df_xls.append(pd.DataFrame({"ticker": [[reduced.ticker[csv_row]]]}), ignore_index=True)
print (df_xls)

Pandas not updating CSV

Dataset:
https://github.com/Bene939/newsheadlinedatasets
With my program I am labeling my dataset of news headlines. It worked fine until today.
For some reason it won't write the csv file anymore. As far as I can see the data frame gets updated though.
At around 4469 rows of my csv it started to not overwrite the csv file. And then it did. And then didnt do it again until it stopped overwriting completely at row 4474. It worked fine until now and if I create a new csv it will overwrite it.
I am using Jupyter Notebook. Is there some kind of limit to this? The labeled dataset is around 300KB.
!pip install pandas
!pip install pathlib
import pandas as pd
from pathlib import Path
#takes data frame and file name & appends it to given csv
def append_df(df, file_name):
my_file = Path(file_name)
if my_file.exists():
print("Appending to existing file named " + file_name)
orig_df = pd.read_csv(file_name)
print("Old Data Frame: ")
print(orig_df)
new_df = pd.concat([orig_df, df], ignore_index=True).drop_duplicates()
print("New Data Frame: ")
print(new_df)
new_df.to_csv(file_name, index=False, header = True, encoding='utf-8-sig')
else:
print("Creating new file named" + file_name)
news_sentiment_df.to_csv(file_name, index=False, header = True, encoding='utf-8-sig')
#takes data frame and file name & overwrites given csv
def update_csv(df, file_name):
print("Overwriting " + file_name)
df.to_csv(file_name, index=False, header = True, encoding='utf-8-sig')
#shows sentence by sentence, labels it according to input and saves it in a new csv file
print("WARNING: EDITING CSV FILE WITH EXCEL MAY CORRUPT FILE\n")
file_name = "news_headlines.csv"
new_file = "news_headlines_sentiment.csv"
news_sentiment_df = pd.DataFrame(columns=["news", "sentiment"])
my_file = Path(file_name)
if my_file.exists():
df = pd.read_csv(file_name, encoding='utf-8-sig', error_bad_lines=False)
print("Loaded " + file_name)
for index, row in df.iterrows():
user_input = -1
range = [0, 1, 2]
while user_input not in range:
print("####################################################################")
print(row["news"])
try:
user_input = int(input("Negative: 0\nNeutral: 1\nPositive: 2\n"))
except ValueError as err:
print("\nPlease enter an Integer!\n")
pass
new_element = 0
#label sentiment according to input
if user_input == 0:
new_element = [row["news"], 0]
elif user_input == 1:
new_element = [row["news"], 1]
elif user_input == 2:
new_element = [row["news"], 2]
#save labeled sentence to new file
news_sentiment_df.loc[len(news_sentiment_df)] = new_element
append_df(news_sentiment_df, new_file)
#delete data point from original data frame
index_name = df[df["news"] == row["news"]].index
df.drop(index_name, inplace=True)
#update old csv file
update_csv(df, file_name)
else:
print("File not Found")
I was trying to add duplicates while using drop_duplicates function without noticing it

Record exists in Python Dataframe?

I have a Pandas dataframe with multiple columns. I need to check if a record exists but only comparing by certain columns.
Is there a better way to do it than:
#df has the following columns df = pd.DataFrame(columns = ['DIR', FILE_NAME', 'A-HASH', 'P-HASH', 'D-HASH', 'W-HASH','SIZE', 'TAGSIZE', 'FILE_CREATE_DATE'])
df = pd.read_csv(mainDFfile, index_col = ['INDEX'])
for base in filter(functools.partial(re.match, "(?i).*jpe?g$"),
filenames):
fn = os.path.join(dirpath, base)
file_size = os.path.getsize(fn)
if ((df['DIR'] == dirpath) & (df['FILE_NAME'] == base) & (df['SIZE'] == file_size)).any():
print('FILE EXISTS', fn)
else:
#add file to DB

Categories

Resources