Manipulating csv files with Python - python

Im trying to output the difference between 2 csv files by two columns and create a third csv file. How can I make the following code compare by columns 0 and 3.
import csv
f1 = open ("ted.csv")
oldFile1 = csv.reader(f1, delimiter=',')
oldList1 = list(oldFile1)
f2 = open ("ted2.csv")
newFile2 = csv.reader(f2, delimiter=',')
newList2 = list(newFile2)
f1.close()
f2.close()
output1 = set(tuple(row) for row in newList2 if row not in oldList1)
output2 = set(tuple(row) for row in oldList1 if row not in newList2)
with open('Michal_K.csv','w') as csvfile:
wr = csv.writer(csvfile,delimiter=',')
for line in (output2).difference(output1):
wr.writerow(line)

If you want the rows from ted.csv that do not have any of the same third and fourth column elements as ted2, create a set of those elements from the ted2 and check each row from ted.csv before writing:
with open("ted.csv") as f1, open("ted2.csv") as f2, open('foo.csv', 'w') as out:
r1, r2 = csv.reader(f1), csv.reader(f2)
st = set((row[0], row[3]) for row in r1)
wr = csv.writer(out)
for row in (row for row in r2 if (row[0],row[3]) not in st):
wr.writerow(row)
If you actually want something like the symmetric difference where you get unique rows from both then make a set of each third and fourth columns from both files :
from itertools import chain
with open("ted.csv") as f1, open("ted2.csv") as f2, open('foo.csv', 'w') as out:
r1, r2 = csv.reader(f1), csv.reader(f2)
st1 = set((row[0], row[3]) for row in r1)
st2 = set((row[0], row[3]) for row in r2)
f1.seek(0), f2.seek(0)
wr = csv.writer(out)
r1, r2 = csv.reader(f1), csv.reader(f2)
output1 = (row for row in r1 if (row[0], row[3]) not in st2)
output2 = (row for row in r2 if (row[0], row[3]) not in st1)
for row in chain.from_iterable((output1, output2)):
wr.writerow(row)

Related

Sum of rows from CSV

I have the following code:
with open("expenses.csv") as read_exp:
reader = csv.reader(read_exp, delimiter=',')
header = next(reader)
if header != None:
for row in reader:
month_str = row[0]
month_dt= datetime.strptime(month_str, '%d/%m/%Y').month
if month_dt == month1:
sum1 = sum((map(int,row[2:7])))
print(sum1)
This gives me the sum of each individual row that is from the month I am looking for.
Output:
Enter selected month number: 7
Selected Month is: July
15
26
7
23
21
19
30
Is there a way to combine the individual sums into one total sum?
My csv is as below:
Date,Budget,Groceries,Transport,Food,Bills,Others
12/7/2021,30,1,0,4,2,8
13/7/2021,30,9,3,5,7,2
14/7/2021,30,3,3,0,0,1
15/7/2021,30,1,0,10,7,5
16/7/2021,30,9,9,0,2,1
17/7/2021,30,0,6,4,1,8
18/7/2021,30,0,9,9,8,4
16/8/2021,30,7,10,7,10,1
17/8/2021,30,5,6,10,9,1
18/8/2021,30,6,1,9,10,5
19/8/2021,30,0,8,8,3,5
20/8/2021,30,4,0,6,9,4
21/8/2021,30,6,2,1,1,5
22/8/2021,30,3,3,1,1,10
13/9/2021,30,8,2,9,4,6
14/9/2021,30,10,7,10,5,7
15/9/2021,30,5,5,6,9,6
16/9/2021,30,5,7,4,6,2
17/9/2021,30,3,7,10,5,7
18/9/2021,30,8,9,6,8,1
19/9/2021,30,5,3,1,9,5
I assume you want to print the full value of the month in your example correct?
If that is the case you could just have a variable total_sum for example where u add the content of sum1(I m assuming sum1 is a value) into it like this:
reader = csv.reader(read_exp, delimiter=',')
header = next(reader)
if header != None:
for row in reader:
month_str = row[0]
month_dt= datetime.strptime(month_str, '%d/%m/%Y').month
if month_dt == month1:
sum1 = sum((map(int,row[2:7])))
print(sum1)
total_sum += sum1
print(total_sum)

merge some rows in two conditions

I want to merge rows within a condition. If the row is less than 20 characters long, combine that row with the previous row. But I have two columns and I want to apply the condition in the code in the second column, if any row contains less than 20 characters remove row for two columns.
I got help here already to merge rows but if I had one column now I have different requirements. I have two columns and want to apply the operation in the second row, any row have less than 20 char merge this row with the previous row and remove this row from two columns.
This the old code for merge and remove row but when I have one columns. Thank you for help.
I'm try this code but doesn't give me result.
import csv
import pandas as pd
df = pd.read_csv('Test.csv')
with open('Output.csv', mode='w', newline='', encoding='utf-16') as f:
writer = csv.writer(f, delimiter=' ')
rows = []
for i, data in enumerate(df['Sentence']):
if i + 1 == len(df['Sentence']):
writer.writerow([data])
elif len(df['Sentence'][i + 1]) < 20:
writer.writerow([data + df['Sentence'][i + 1]])
df.drop(df.index[[i + 1]])
elif len(df['Sentence'][i + 1]) >= 20:
writer.writerow([data])
I solved this by make the row null then remove it from CSV
df = pd.read_csv('test.csv', encoding='utf-8')
with open('output.csv', mode='w', newline='', encoding='utf-16') as f:
writer = csv.writer(f, delimiter=' ')
rows = []
for i, data in enumerate(df['Sentence']):
if i + 1 == len(df['Sentence']):
writer.writerow([data])
elif len(df['Sentence'][i + 1]) < 19:
writer.writerow([data + df['Sentence'][i + 1]])
df['Sentence'][i + 1] = ''
elif len(df['Sentence'][i + 1]) >= 19:
writer.writerow([data])

Repeating the same formula?

Basically, I'm trying to repeat the same formula but need to store variables every second. What I did was put all my variables in an excel file and have a reader go through the list. When I try to use the new variable, I'm only able to use it one at a time, not the whole list.
What I would like to do is basically print y1 = 1, y2 = 2, y3 = 3
Below is an example :
csv file :
column1, column2, column3
apple, 1 , appleweight
orange, 2, orangeweight
banana, 3, bananaweight
import csv
with open(r"C:\Users\Admin\Desktop\Untitled.csv", newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
x = row['column1']
y = row['column2']
z = row['column3']
x = method.get_value(y)
z = x.get_name()
print (y)
time.sleep(1)
The above code, will print:
1
2
3
I would like to print
y1 = 1
y2 = 2
y3 = 3
You can add a counter to iteration of the rows, and use that to print what you want.
with open(r"C:\Users\Admin\Desktop\Untitled.csv", newline='') as csvfile:
reader = csv.DictReader(csvfile)
for i, row in enumerate(reader, 1):
y = row['column2']
print(f'y{i} = {y}')
time.sleep(1)

openpyxl start writing from particular column/cell

I have the following code:
ws = wb.worksheets[1]
print(ws)
with open('out.txt', 'r+') as data:
reader = csv.reader(data, delimiter='\t')
for row in reader:
print(row)
ws.append(row)
wb.save('test.xlsx')
by default it's written to xlsx file starting from A0
Is there a more convinient way to start appending data, let's say from C2?
Or only xxx.cell(row=xx , column=yy ).value=zz ?
i = 2
j = 3
with open('out.txt', 'r+') as data:
reader = list(csv.reader(data, delimiter='\t'))
for row in reader:
for element in row:
ws.cell(row=i, column=j).value = element
j += 1
j = 3
i += 1
Just pad the rows with Nones
ws.append([]) # move to row 2
for row in reader:
row = (None)*2 + row
ws.append(row)

Calculate value difference between two different CSV files python

I have two differenct csv files:
outputnovember.csv
symbol,name,amount
A,john,2
D,mary,6
E,bob,9
m,liz,-8
p,peter,-2
A total 2,Positive total 17,Negative total -10
outputdecember.csv
symbol,name,amount
A,john,2
D,mary,26
m,liz,-1
p,peter,-2
A total 2,Positive total 26,Negative total -3
how do i calculate the difference between the calculated values of the two file so that the following is appended to outdecember: A total 0, Posiitve total 9, Negative total-17
here's my code so far:
import csv
f=open('outputnovember.csv')
csv_f= csv.reader(f)
with open('input.csv', 'r') as f_input, open('outdecember.csv', 'w') as f_output:
csv_input = csv.reader(f_input)
csv_output = csv.writer(f_output)
header = next(csv_input)
csv_output.writerow(header)
sum_positive = sum_negative = sum_a = 0
for cols in csv_input:
csv_output.writerow(cols)
value = int(cols[2])
if cols[0] == 'A':
sum_a += value
if value >= 0:
sum_positive += value
else:
sum_negative += value
csv_output.writerow(["A total {}".format(sum_a)],
csv_output.writerow(["Positive total {}".format(sum_positive)])
csv_output.writerow(["Negative total {}".format(sum_negative)])
... here is where i'm stuck to retrieve the values from outputnovember.csv and find the difference from outputdecember.csv
Thanks all
B

Categories

Resources