Iterating over DictReader variable [duplicate] - python

I open a file and read it with csv.DictReader. I iterate over it twice, but the second time nothing is printed. Why is this, and how can I make it work?
with open('MySpreadsheet.csv', 'rU') as wb:
reader = csv.DictReader(wb, dialect=csv.excel)
for row in reader:
print row
for row in reader:
print 'XXXXX'
# XXXXX is not printed

You read the entire file the first time you iterated, so there is nothing left to read the second time. Since you don't appear to be using the csv data the second time, it would be simpler to count the number of rows and just iterate over that range the second time.
import csv
from itertools import count
with open('MySpreadsheet.csv', 'rU') as f:
reader = csv.DictReader(f, dialect=csv.excel)
row_count = count(1)
for row in reader:
next(count)
print(row)
for i in range(row_count):
print('Stack Overflow')
If you need to iterate over the raw csv data again, it's simple to open the file again. Most likely, you should be iterating over some data you stored the first time, rather than reading the file again.
with open('MySpreadsheet.csv', 'rU') as f:
reader = csv.DictReader(f, dialect=csv.excel)
for row in reader:
print(row)
with open('MySpreadsheet.csv', 'rU') as f:
reader = csv.DictReader(f, dialect=csv.excel)
for row in reader:
print('Stack Overflow')
If you don't want to open the file again, you can seek to the beginning, skip the header, and iterate again.
with open('MySpreadsheet.csv', 'rU') as f:
reader = csv.DictReader(f, dialect=csv.excel)
for row in reader:
print(row)
f.seek(0)
next(reader)
for row in reader:
print('Stack Overflow')

You can create a list of dictionaries, each dictionary representing a row in your file, and then count the length of the list, or use list indexing to print each dictionary item.
Something like:
with open('YourCsv.csv') as csvfile:
reader = csv.DictReader(csvfile)
rowslist = list(reader)
for i in range(len(rowslist))
print(rowslist[i])

add a wb.seek(0) (goes back to the start of the file) and next(reader) (skips the header row) before your second loop.

You can try store the dict in list and output
input_csv = []
with open('YourCsv.csv', 'r', encoding='UTF-8') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
input_csv.append(row)
for row in input_csv:
print(row)
for row in input_csv:
print(row)

Related

Compare two CSV files and write difference in the same file as an extra column in python

Hey intelligent community,
I need a little bit of help because i think i don't see the the wood in the trees.
i have to CSV files that look like this:
Name,Number
AAC;2.2.3
AAF;2.4.4
ZCX;3.5.2
Name,Number
AAC;2.2.3
AAF;2.4.4
ZCX;3.5.5
I would like to compare both files and than write any changes like this:
Name,Number,Changes
AAC;2.2.3
AAF;2.4.4
ZCX;5.5.5;change: 3.5.2
So on every line when there is a difference in the number, i want to add this as a new column at the end of the line.
The Files are formated the same but sometimes have a new row so thats why i think i have to map the keys.
I come this far but now iam lost in my thoughts:
Python 3.10.9
import csv
Reading the first csv and set mapping
with open('test1.csv', 'r') as csvfile:
reader= csv.reader(csvfile)
rows = list(reader)
file1_dict = {row[1]: row[0] for row in rows}
Reading the second csv and set mapping
with open('test2.csv', 'r') as csvfile:
reader= csv.reader(csvfile)
rows = list(reader)
file2_dict = {row[1]: row[0] for row in rows}
comparing the keys and find the diff
for k in test1_dict:
if test1_dict[k] != test2:dict[k]
test1_dict[k] = test2_dict[k]
for row in rows:
if row[1] == k:
row.append(test2_dict[k])
#write the csv (not sure how to add the word "change:")
with open('test1.csv', 'w', newline ='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(rows)
If i try this, i don't get a new column, it just "updates" the csv file with the same columns.
For example this code gives me the diff row but i'am not able to just add it to existing file and row.
with open('test1.csv') as fin1:
with open('test2.csv') as fin2:
read1 = csv.reader(fin1)
read2 = csv.reader(fin2)
diff_rows = (row1 for row1, row2 in zip(read1, read2) if row1 != row2)
with open('test3.csv', 'w') as fout:
writer = csv.writer(fout)
writer.writerows(diff_rows)
Does someone have any tips or help for my problem? I read many answers on here but can't figure it out.
Thanks alot.
#bigkeefer
Thanks for your answer, i tried to change it for the delimiter ; but it gives an "list index out of range error".
with open('test3.csv', 'r') as file1:
reader = csv.reader(file1, delimiter=';')
rows = list(reader)[1:]
file1_dict = {row[0]: row[1] for row in rows}
with open('test4.csv', 'r') as file2:
reader = csv.reader(file2, delimiter=';')
rows = list(reader)[1:]
file2_dict = {row[0]: row[1] for row in rows}
new_file = ["Name;Number;Changes\n"]
with open('output.csv', 'w') as nf:
for key, value in file1_dict.items():
if value != file2_dict[key]:
new_file.append(f"{key};{file2_dict[key]};change: {value}\n")
else:
new_file.append(f"{key};{value}\n")
nf.writelines(new_file)
You will need to adapt this to overwrite your first file etcetera, as you mentioned above, but I've left it like this for your testing purposes. Hopefully this will help you in some way.
I've assumed you've actually got the headers above in each file. If not, remove the slicing on the list creations, and change the new_file variable assignment to an empty list ([]).
with open('f1.csv', 'r') as file1:
reader = csv.reader(file1, delimiter=";")
rows = list(reader)[1:]
file1_dict = {row[0]: row[1] for row in rows if row}
with open('f2.csv', 'r') as file2:
reader = csv.reader(file2, delimiter=";")
rows = list(reader)[1:]
file2_dict = {row[0]: row[1] for row in rows if row}
new_file = ["Name,Number,Changes\n"]
for key, value in file1_dict.items():
if value != file2_dict[key]:
new_file.append(f"{key};{file2_dict[key]};change: {value}\n")
else:
new_file.append(f"{key};{value}\n")
with open('new.csv', 'w') as nf:
nf.writelines(new_file)

get the first row in multiple files in python

I'm trying to iterate over files to get the first line and put it in a database.
The problem that I have is that i can't figure out how to only read 1 row.
Now it runs over all the rows while I only need 1 row per file.
The files looks like this:
batch_name sample_barcode pool_barcode pool_type pooling_volume_ul pooling_comments
NIPT20200304 0101002253 PT2129764 A 2.0
NIPT20200304 0109011474 PT2129764 A 17.66
And my code is currently this:
pools = []
for files in library:
with open(files, 'r') as f:
next(f)
reader = csv.reader(f, delimiter='\t')
for row in reader:
print("row: ", row)
pools.append(row)
print("pools", pools)
with the for row in reader, it shows me all the rows, and with row[0], I only get the first column, but still get all the rows. i tried f.readline.().rstrip() but then I don't know where to put the delimiter as "\t" shows up in the pools variable instead of a space.
I got what I wanted thanks with this:
#read over the pool_report files and get the first line
pools = []
for files in library:
with open(files, 'r') as f:
next(f)
data = f.readline().strip()
values = data.split()
pools.append(values)
print("pools: ", pools)
Try to call next(reader) instead of next(f):
pools = []
for files in library:
with open(files, 'r') as f:
reader = csv.reader(f, delimiter='\t')
next(reader, None)
for row in reader:
print("row: ", row)
pools.append(row)
print("pools", pools)
You dont need teh last for loop, like below
pools = []
for files in library:
with open(files, 'r') as f:
next(f)
reader = csv.reader(f, delimiter='\t')
pools.append(next(reader))
print("pools", pools)

Create list from csv lines in python

I have a csv file with 2 rows and multiple lines.
import csv
with open('data.csv', 'r') as csv_file:
csv_reader = csv.reader(csv_file)
next(csv_reader)
for row in csv_reader:
print(row[0])
The output is:
row0line0
row0line1
row0line2
...
Is there a way i could further separate the rows into a list of individual cells?
Thanks
As I understand your csv file look like this:
row0line0
row1line1
...
If its possible i should reccomand to change it to:
row0 line0
row1 line1
...
(Add a space between the rows and the lines)
Then you can update your code to the code bellow to print only the rows and create two lists - one that contain the rows and another that contain the lines:
import csv
with open('data.csv', 'r') as csv_file:
csv_reader = csv.reader(csv_file)
next(csv_reader)
rows = []
lines = []
for item in csv_reader:
temp = item[0].split(" ")
rows.append(temp[0])
lines.append(temp[1])
print(temp[0])
If you mean that each row becomes an element of the list that goes this way:
with open('data.csv', 'r') as csv_file:
reader = csv.reader(csv_file)
data_list = [row[0] for row in reader]
Otherwise, if you want to create a list of the first elements of each line, you can do this:
with open('data.csv', 'r') as csv_file:
reader = csv.reader(csv_file)
row0_list = []
for row in reader:
row0_list.append(row[0])
I hope the problem is solved with this explanation.
My understanding is that you are asking to output all the data fields. csv_reader is already separating your rows into a list individual cells!
You current script reads the file one line at a time and prints the first item in each row with this line:
for row in csv_reader:
print(row[0])
Instead of printing row[0], which only prints the first field in the csv row, you can just print the row:
for row in csv_reader:
print(row)
That will output the field lists (from my sample csv):
['r0v0', 'r0v1', 'r0v2']
['r1v0', 'r1v1', 'r1v2']
If you want to print in a nicer format, you can use join:
for row in csv_reader:
print(", ".join(row))
Output:
r0v0, r0v1, r0v2
r1v0, r1v1, r1v2
My csv:
r0v0,r0v1,r0v2
r1v0,r1v1,r1v2

reading data from specified location

import CSV
#Get high temperatures from file.
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
highs = []
for row in reader:
highs.append(row[1])
print(highs)
I encountered the code above when I am learning about extract and reading data.
I didn’t quite get the usage of next():
header_row = next(reader)
The book explains that because we have already read the header row,the loop will begins at the second line where the actual data begins
What to do if we need to read from the third line? Is the following right?
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
row_1 = next(reader)
highs = []
for row in reader:
highs.append(row[2])
Print(highs)
The question might be frivolous, but I’m very confused
The next function moves the cursor one row ahead, so yes in the code segment
header_row = next(reader)
row_1 = next(reader)
highs = []
for row in reader:
highs.append(row[2])
Print(highs)
The line does start from the third line though it's not the best way to do it.
If you want to access rows directly try this instead
with open(filename) as f:
reader = csv.reader(f)
rows = list(reader)
print rows[2] # this will get you the third row

Finding string in row to overwrite this row of CSV using Python 2.7

I'm following some feedback from another thread, but have gotten stuck. I'm looking to search an existing csv file to locate the row in which a string occurs. I am then looking to update this row with new data.
What I have so far gives me an "TypeError: unhasable type: 'list'":
allLDR = []
with open('myfile.csv', mode='rb') as f:
reader = csv.reader(f)
#allLDR.extend(reader)
for num, row in enumerate(reader):
if myField in row[0]:
rowNum = row
line_to_override = {rowNum:[nMaisonField, entreeField, indiceField, cadastreField]}
with open('myfile.csv', 'wb') as ofile:
writer = csv.writer(ofile, quoting=csv.QUOTE_NONE, delimiter=',')
#for line, row in enumerate(allLDR):
for line, row in enumerate(reader):
data = line_to_override.get(line, row)
writer.writerow(data)
The line allDR.extend(reader) consumes all of the input lines from the csv.reader object. Therefore, the for loop never runs, and rowNum=row is never executed, and {rowNum:blah} generates an exception.
Try commenting out the allDR.extend(reader) line.
As a debugging aid, try adding print statements inside the for loop and inside the conditional.
Here is a program which does what I think you want your program to do: it reads in myfile.csv, modifies rows conditionally based on the content of the first cell, and writes the file back out.
import csv
with open('myfile.csv', mode='rb') as ifile:
allDR = list(csv.reader(ifile))
for row in allDR:
if 'fowl' in row[0]:
row[:] = ['duck', 'duck', 'goose']
with open('myfile.csv', 'wb') as ofile:
csv.writer(ofile).writerows(allDR)

Categories

Resources