Splitting values out of a CSV Reader Python - python

Here is my current code
a_reader = None
a_reader = open('data.csv', 'rU')
a_csv_reader = csv.reader(a_reader)
for row in a_csv_reader:
print row
a_reader.close()
count = 0
sum = 0.0
a_reader = open('data.csv', 'rU')
a_csv_reader = csv.reader(a_reader)
a_csv_reader.next()
for row in a_csv_reader:
if count != 0 and row[0] != '':
sum = sum + float(row[0])
count = count + 1
a_reader.close()
print 'Number of lines is:',count
print 'Sum is:',sum
return listStation
This produces the results below
['1', '476050', '7709929']
['2', '473971', '7707713']
['3', '465676', '7691097']
['4', '515612', '7702192']
['5', '516655', '7704405']
['6', '519788', '7713255']
['7', '538466', '7683341']
Number of lines is: 8
Sum is: 28.0
Ok now what I want to do is to split out the value of the ID, Easting and Northing and append them to a list to create one 2d list. Is it possible to do this? If so can you provide me the code?

rows = []
for row in a_csv_reader:
rows.append(row)
Will yield in rows:
[['1', '476050', '7709929']
['2', '473971', '7707713']
['3', '465676', '7691097']
['4', '515612', '7702192']
['5', '516655', '7704405']
['6', '519788', '7713255']
['7', '538466', '7683341']]

Try this:
import csv
def run():
count = 0
sum = 0.0
listStation = []
with open('data.csv', 'rU') as a_reader:
a_csv_reader = csv.reader(a_reader)
for row in a_csv_reader:
if count != 0:
if row[0] != '':
sum = sum + float(row[0])
listStation.append(map(int, row))
print 'row =', row
count = count + 1
print 'Number of lines is:',count
print 'Sum is:', sum
print listStation
if __name__ == '__main__':
run()

I don't have your data.csv file, to test with, but here's how I'd rewrite your code and make it produce the 2D list you want:
import csv
with open('test_data.csv', 'rU') as a_reader:
a_csv_reader = csv.reader(a_reader)
for row in a_csv_reader:
print row
with open('test_data.csv', 'rU') as a_reader:
a_csv_reader = csv.reader(a_reader)
a_csv_reader.next()
listStation = []
count = 0
total = 0.0
for row in a_csv_reader:
if count != 0 and row[0] != '':
total += float(row[0])
count += 1
listStation.append(map(int, row))
print 'Number of lines is:', count
print 'Sum is:', total
print 'listStation:', listStation
Output:
['ID', 'Easting', 'Northing']
['1', '476050', '7709929']
['2', '473971', '7707713']
['3', '465676', '7691097']
['4', '515612', '7702192']
['5', '516655', '7704405']
['6', '519788', '7713255']
['7', '538466', '7683341']
Number of lines is: 7
Sum is: 27.0
listStation: [[1, 476050, 7709929], [2, 473971, 7707713], [3, 465676, 7691097],
[4, 515612, 7702192], [5, 516655, 7704405], [6, 519788, 7713255],
[7, 538466, 7683341]]
Note, I changed the variable you named sum to total to prevent a conflict with the built-in sum() function.

The following might work (depends on the data though - do blank first columns/otherwise invalid numbers exist in the columns etc...):
from itertools import islice
import csv
with open('data.csv') as fin:
csvin = islice(csv.reader(fin), 1, None) # skip header
rows = [map(int, row) for row in csvin]
print 'Rows are:'
print rows
print 'Number of lines is:', len(stuff)
print 'Sum is:', sum(row[0] for row in stuff)

I suppose I would write your code with a DictReader and default dict:
import csv
data={}
with open('/tmp/sta.txt','r') as fin:
reader=csv.DictReader(fin)
for row in reader:
for k,v in row.items():
data.setdefault(k,[]).append(float(v))
print data
print 'Sum is:',sum(data['Station ID'])
print 'Number of lines is:',len(data['Station ID'])+1
Prints:
{'Station ID': [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
'Easting': [476050.0, 473971.0, 465676.0, 515612.0, 516655.0, 519788.0, 538466.0],
'Northing': [7709929.0, 7707713.0, 7691097.0, 7702192.0, 7704405.0, 7713255.0, 7683341.0]}
Sum is: 28.0
Number of lines is: 8

Related

Im trying to print output to another file

This is my code:
data = [['a', 'b', 'c'],['1', '2', '3']]
col_width = max(len(word) for row in data for word in row) + 2
for row in data:
print("".join(word.ljust(col_width) for word in row))
this is what I tried:
import sys
data = [['a', 'b', 'c'],['1', '2', '3']]
col_width = max(len(word) for row in data for word in row) + 2
for row in data:
print("".join(word.ljust(col_width) for word in row))
original_stdout = sys.stdout
with open('filename.txt', 'w') as f:
sys.stdout = f
print("".join(word.ljust(col_width) for word in row))
sys.stdout = original_stdout
this is the output I want in the file:
a b c
1 2 3
this is the output I get when I run my code:
1 2 3

Python : I am not able to access individual elements inside sublist. The entire sublist is displayed as single element

My Code :
import ast
with open('input.txt', 'r') as file :
filedata = file.read()
filedata = filedata.replace('|', ',')
out = []
buff = []
for c in filedata :
if c == '\n':
out.append(''.join(buff))
buff = []
else:
buff.append(c)
else:
if buff:
out.append(''.join(buff))
list = [[i] for i in out]
print(list)
Input :
10|1|SELL|toaster_1|10.00|20 12|8|BID|toaster_1|7.50
13|5|BID|toaster_1|12.50 15|8|SELL|tv_1|250.00|20 16
17|8|BID|toaster_1|20.00 18|1|BID|tv_1|150.00 19|3|BID|tv_1|200.00
20 21|3|BID|tv_1|300.00
Expected Output
[["10","1","SELL","toaster_1","10.00","20"],
["12","8","BID","toaster_1","7.50"],
["13","5","BID","toaster_1","12.50"],
["15","8","SELL","tv_1","250.00","20"], ["16"],
["17","8","BID","toaster_1","20.00"],
["18","1","BID","tv_1","150.00"], ["19","3","BID","tv_1","200.00"],
["20"], ["21","3","BID","tv_1","300.00"]] "
The Output I am getting:
[['10,1,SELL,toaster_1,10.00,20'],
['12,8,BID,toaster_1,7.50'], ['13,5,BID,toaster_1,12.50'],
['15,8,SELL,tv_1,250.00,20'], ['16'], ['17,8,BID,toaster_1,20.00'],
['18,1,BID,tv_1,150.00'], ['19,3,BID,tv_1,200.00'], ['20'],
['21,3,BID,tv_1,300.00']] [Finished in 0.1s]
I want to access individual elements within sublist, eg, SELL, or
toaster, but I am not able to access them. Can someone advice please?
Use:
# filedata = file.read()
filedata = """10|1|SELL|toaster_1|10.00|20 12|8|BID|toaster_1|7.50
13|5|BID|toaster_1|12.50 15|8|SELL|tv_1|250.00|20 16
17|8|BID|toaster_1|20.00 18|1|BID|tv_1|150.00 19|3|BID|tv_1|200.00
20 21|3|BID|tv_1|300.00 """
result = []
for i in filedata.split(): #split by space
result.append(i.split("|")) #split by `|` and append to result
print(result)
Or a list comprehension
Ex:
result = [i.split("|") for i in filedata.split()]
Output:
[['10', '1', 'SELL', 'toaster_1', '10.00', '20'],
['12', '8', 'BID', 'toaster_1', '7.50'],
['13', '5', 'BID', 'toaster_1', '12.50'],
['15', '8', 'SELL', 'tv_1', '250.00', '20'],
['16'],
['17', '8', 'BID', 'toaster_1', '20.00'],
['18', '1', 'BID', 'tv_1', '150.00'],
['19', '3', 'BID', 'tv_1', '200.00'],
['20'],
['21', '3', 'BID', 'tv_1', '300.00']]
Well your code never handles splitting the line into comma separated values. You just read the line character by character, join all those characters together into a string, and append it to the out list.
The following code should work (I minimally changed your own code. I would instead use a more clean solution like the one by Rakesh):
import ast
with open('input.txt', 'r') as file :
filedata = file.read()
filedata = filedata.replace('|', ',')
out = []
buff = []
for c in filedata :
if c == '\n':
line = ''.join(buff)
for word in line.split(","):
out.append(word)
buff = []
else:
buff.append(c)
else:
if buff:
out.append(''.join(buff))
# l = [[i] for i in out]
print(out)
By the way, it is recommended not to use list as a variable name.

Split a string which is inside a list of a list in two elements which stay in the same list

This is a chain of number inside a text file which I import and want to convert into a specific list.
3 04,24
4 04,75
4 05,11
4 05,47
4 05,78
4 06,80
3 07,25
3 07,92
3 08,23
2 09,76
Actually with my code I reach this point :
[['3 04,24'], ['4 04,75'], ['4 05,11'], ['4 05,47'], ['4 05,78'], ['4 06,80'], ['3 07,25'], ['3 07,92'], ['3 08,23'], ['2 09,76']]
But I want to split the elements inside the tuples in two to get something like this :
[['3','04,24'], ['4','04,75']] etc...
But after many research I can't find the solution, also if you could tell me how to convert these elements from string to int that would be very helpful !
Here's my code :
with open("myfile.txt") as f:
mylist = [line.rstrip('\n') for line in f]
mylist = [mylist[x:x+1] for x in range(0, len(mylist), 1)]
print(mylist)
Thanks.
This is one solution using csv module from the standard library:
import csv
with open('myfile.txt', 'r') as f:
reader = csv.reader(f, delimiter=' ')
res = list(reader)
Example with your data:-
from io import StringIO
import csv
mystr = StringIO("""3 04,24
4 04,75
4 05,11
4 05,47
4 05,78
4 06,80
3 07,25
3 07,92
3 08,23
2 09,76""")
with mystr as f:
reader = csv.reader(f, delimiter=' ')
res = list(reader)
print(res)
# [['3', '04,24'],
# ['4', '04,75'],
# ['4', '05,11'],
# ['4', '05,47'],
# ['4', '05,78'],
# ['4', '06,80'],
# ['3', '07,25'],
# ['3', '07,92'],
# ['3', '08,23'],
# ['2', '09,76']]
Or if you need to convert data to numeric:
with mystr as f:
reader = csv.reader(f, delimiter=' ')
res = [[int(i), float(j.replace(',', '.'))] for i, j in reader]
print(res)
[[3, 4.24],
[4, 4.75],
[4, 5.11],
...
Use a list-comprehension:
>>> lst = [['3 04.24'], ['4 04.75'], ['4 05.11'], ['4 05.47'], ['4 05.78'], ['4 06.80'], ['3 07.25'], ['3 07.92'], ['3 08.23'], ['2 09.76']]
>>> [x[0].split() for x in lst]
Outputs:
[['3', '04.24'],
['4', '04.75'],
['4', '05.11'],
['4', '05.47'],
['4', '05.78'],
['4', '06.80'],
['3', '07.25'],
['3', '07.92'],
['3', '08.23'],
['2', '09.76']]
To convert string into integer:
[[int(i) if not '.' in i else float(i) for i in x[0].split()] for x in lst]
Use the str.split() method:
with open("myfile.txt") as f:
mylist = [line.rstrip('\n') for line in f]
my_structured_list = [line.split(" ") for line in mylist]
print(my_structured_list)
For the second part of your question about converting the elements to int, you can use str.split() again and convert the resulting elements to int:
with open("myfile.txt") as f:
mylist = [line.rstrip('\n') for line in f]
my_structured_list = [line.split(" ") for line in mylist]
my_structured_int_list = []
for line_tuple in my_structured_list:
input_first_element = line_tuple[0]
input_second_element, input_third_element = line_tuple[1].split(",")
output_first_half = int(input_first_element)
output_second_half = int(input_second_element), int(input_third_element)
my_structured_int_list.append((output_first_half, output_second_half))
print(my_structured_int_list)
simple solution is as follows
with open(file,'r') as f:
print([each.split() for each in f])

How to split file into smaller by first number in second column

So my data looks like:
1 3456542 5 may 2014
2 1245678 4 may 2014
3 4256876 2 may 2014
4 5643156 6 may 2014
.....
The goal is to sort it by the 2nd column then separate the rows based on the first number in the 2nd column (i.e. 3456542 goes to subs_3.txt, 1245678 goes to subs_1.txt...). The output is totally wrong and gives me 6 files with what appears to be random rows in them. Any suggestions?
import csv
from operator import itemgetter
file_lines = []
with open("subs.txt", "r") as csv_file:
reader = csv.reader(csv_file, delimiter=" ")
for row in reader:
file_lines.append(row)
file_lines.sort(key=itemgetter(1))
with open("sorted_subs.txt", "w") as csv_file:
writer = csv.writer(csv_file, delimiter=" ")
for row in file_lines:
writer.writerow(row)
for row in file_lines:
file_num = row[1[1]
with open("file_{0}.txt".format(file_num), "w") as f:
writer = csv.writer(f, delimiter=" ")
writer.writerow(row)
You could use itertools.groupby to group the lines that go to same file together and then just loop over the groups in order to write the files:
from itertools import groupby
for k, g in groupby(file_lines, key=lambda x: x[1][0]):
with open("file_{0}.txt".format(k), "w") as f:
csv.writer(f, delimiter=" ").writerows(g)
Update: groupby will group the lines based on the first number in second column. It will return the key used for grouping and iterator containing the grouped items. Since file_lines is already sorted we know that all items belonging to same group will be returned within one group. Here's a short example how it works, note that test data is different than in original question in order to demonstrate grouping:
from itertools import groupby
lst = [
['2', '1245678', '', '4', 'may', '2014'],
['1', '3456542', '', '5', 'may', '2014'],
['3', '3256876', '', '2', 'may', '2014'],
['4', '5643156', '', '6', 'may', '2014']
]
for k, g in groupby(lst, key=lambda x: x[1][0]):
print('key: {0}, items: {1}'.format(k, list(g)))
Output:
key: 1, items: [['2', '1245678', '', '4', 'may', '2014']]
key: 3, items: [['1', '3456542', '', '5', 'may', '2014'], ['3', '3256876', '', '2', 'may', '2014']]
key: 5, items: [['4', '5643156', '', '6', 'may', '2014']]

When calling a recursive function to order values, it misses one. How do I fix this?

I have a recursive function that reads a list of scout records from a file, and adds then in order of their ID's to a list box. The function is called with addScouts(1) The function is below:
def addScouts(self,I):
i = I
with open(fileName,"r") as f:
lines = f.readlines()
for line in lines:
if str(line.split(",")[3])[:-1] == str(i):
self.scoutList.insert(END,line[:-1])
i += 1
return self.addScouts(i)
return
My issue is that my file ID's are ordered 1,2,4,5 as at some point I removed the scout with ID of 3. However, when I run the function to re-order the scouts in the list box (the function above), it only lists the scouts up to and including ID 3. This is because when i = 3, none of the items in the file are equal to 3, so the function reaches the end and returns before it gets a chance to check the remaining records.
File contents:
Kris,Rice,17,1
Olly,Fallows,17,2
Olivia,Bird,17,4
Louis,Martin,18,5
Any idea's how to fix this?
Just sort on the last column:
sorted(f,key=lambda x: int(x.split(",")[-1]))
You can use bisect to find where to put the new data to keep the data ordered after it is sorted once:
from bisect import bisect
import csv
with open("foo.txt") as f:
r = list(csv.reader(f))
keys = [int(row[-1]) for row in r]
new = ["foo","bar","12","3"]
ind = bisect(keys, int(new[-1]))
r.insert(ind,new)
print(r)
Output:
[['Kris', 'Rice', '17', '1'], ['Olly', 'Fallows', '17', '2'], ['foo', 'bar', '12', '3'], ['Olivia', 'Bird', '17', '4'], ['Louis', 'Martin', '18', '5']]
A simpler way is to check for the first row that has a higher id, if none are higher just append to the end:
import csv
with open("foo.txt") as f:
r = list(csv.reader(f))
new = ["foo","bar","12","3"]
key = int(new[-1])
ind = None
for i, row in enumerate(r):
if int(row[-1]) >= key:
ind = i
break
r.insert(ind, new) if ind is not None else r.append(new)
print(r)
Output:
[['Kris', 'Rice', '17', '1'], ['Olly', 'Fallows', '17', '2'], ['foo', 'bar', '12', '3'], ['Olivia', 'Bird', '17', '4'], ['Louis', 'Martin', '18', '5']
To always keep that file in order when adding a new value we just need to write to a temp file, writing the line in the correct place and then replace the original with the updated file:
import csv
from tempfile import NamedTemporaryFile
from shutil import move
with open("foo.csv") as f, NamedTemporaryFile(dir=".", delete=False) as temp:
r = csv.reader(f)
wr = csv.writer(temp)
new = ["foo", "bar", "12", "3"]
key, ind = int(new[-1]), None
for i, row in enumerate(r):
if int(row[-1]) >= key:
wr.writerow(new)
wr.writerow(row)
wr.writerows(r)
break
wr.writerow(row)
else:
wr.writerow(new)
move(temp.name, "foo.csv")
foo.csv after will have the data in order:
Kris,Rice,17,1
Olly,Fallows,17,2
foo,bar,12,3
Olivia,Bird,17,4
Louis,Martin,18,5
You can check if your list has the same length as your file and if not, you run addScouts again, and if true, you end. Like this:
def addScouts(self,I):
i = I
with open(fileName,"r") as f:
lines = f.readlines()
for line in lines:
if str(line.split(",")[3])[:-1] == str(i):
self.scoutList.insert(END,line[:-1])
i += 1
return self.addScouts(i)
if len(scoutList) < len(lines):
return self.addScouts(i+1)
else:
return

Categories

Resources