Python csv list index out of range - python

I'm trying to write from json to csv, so each value(pH) is in different row, but I keep getting
Traceback (most recent call last):
File "C:/Users/User/PycharmProjects/Meslosana/getValues.py", line 22, in
lines[i][1] = pH
IndexError: list index out of range
I will also add different values in the same rows but different columns.
My csv file looks like this, it does not have empty lines.
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
and each time I run my code it creates empty line at the bottom.
Here is my code
import json
import csv
file = 'LaukiGeojson/Zemdegas.geojson'
cord = []
with open(file) as f:
data = json.load(f)
i = 1
for feature in data['features']:
cord = feature['geometry']['coordinates'][0]
pH = feature['properties']['pH']
print(pH)
print(feature)
print(data)
# saglabaa
r = csv.reader(open('LaukiAnalizes/Zemdegas.csv'))
lines = list(r)
print(len(lines))
#lines[i][0] = 1
lines[i][1] = pH
#lines[i][2] = 1
#lines[i][3] = 1
#lines[i][4] = 1
i += 1
writer = csv.writer(open('LaukiAnalizes/Zemdegas.csv', 'w', newline=''))
writer.writerows(lines)
# saglabaa
r = csv.reader(open('LaukiAnalizes/Zemdegas.csv'))
lines = list(r)
lines[0][0] = 'ID'
lines[0][1] = 'pH'
lines[0][2] = 'P'
lines[0][3] = 'K'
lines[0][4] = 'Mg'
writer = csv.writer(open('LaukiAnalizes/Zemdegas.csv', 'w', newline=''))
writer.writerows(lines)
open('LaukiAnalizes/Zemdegas.csv').close()

I would avoid code that blindly assumes that a CSV file has a given number of lines. This is better:
Check if the current line even exists in the CSV, if it does update the pH value in-place, otherwise append a new line.
import json
import csv
GEOJSON_FILE = 'LaukiGeojson/Zemdegas.geojson'
CSV_FILE = 'LaukiAnalizes/Zemdegas.csv'
with open(GEOJSON_FILE, encoding='utf8') as f:
geo_data = json.load(f)
with open(CSV_FILE, encoding='utf8', newline='') as f:
reader = csv.reader(f, delimiter=',')
lines = list(reader)
for i, feature in enumerate(geo_data['features']):
pH = feature['properties']['pH']
if i < len(lines):
lines[i][1] = pH
else:
lines.append([1, pH, 1, 1, 1])
with open(CSV_FILE, 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f, delimiter=',')
writer.writerows(lines)
The number of times you are opening and closing the CSV file seemed rather confusing to me. Read it once, write it once. Also, use a context manager (with open(...) as ...) for all your file interactions.

Python is 0-indexed. This means the index of the first line is 0, and the index of the final element is length-1. This is where you get the error, as when your i=9, this will be attempting to access a row that doesn't exist. Fix this by setting i=0 at the start instead of i=1.

Related

removing a column from a CSV file in python

I am attempting to remove a duplicate column from a csv file in Python3. I am able to get the code to run without error...however when I attempt to print the number of rows processed I receive a blank response in codio.
The response to running comes back as 1001codio#sigma-portal:~/workspace$
Can anyone point out how I can fix this? Ideally I would like it to print how many columns were deleted.
import csv
import re
data = []
import csv
input_file = 'customerdata.csv'
output_file = 'outputcustomerdata.csv'
cols_to_remove = [11] # Column indexes to be removed (starts at 0)
cols_to_remove = sorted(cols_to_remove, reverse=True) # Reverse so we remove from the end first
row_count = 0 # Current amount of rows processed
with open(input_file, "r") as source:
reader = csv.reader(source)
with open(output_file, "wt") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count), end='') # Print rows processed
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
Try pandas
import pandas as pd
df = pd.read_csv('customerdata.csv')
df = df.drop('column_name', axis=1)
# save your file
df.to_csv('outputcustomerdata.csv', encoding='utf-8')
print(df)

Getting List Index out of range for Reading a CSV

I'm Writing A code to read a csv file and find the value from the list then update it with "ABCD".
I have tried reading the csv using codecs but no luck.
import csv
import codecs
Testnumber = ['xxxxxxxxx','yyyyyyy']
i =0
is_in_file = "BLACK LISTED NUMBER NOT FOUND IN THIS BASE"
with open('D1_SD_ROI.csv', 'rb') as csvfile:
my_content = csv.reader(csvfile, delimiter=' ')
for row in my_content:
for A in row:
i=i+1 #couting number of loops eventually row number
for B in Testnumber:
if A == B:
print B
print i
r = csv.reader(csvfile)
lines = list(r)
lines[i-1][0] = 'ABCD'
writer = csv.writer(open('D1_SD_ROI.csv', 'w'))
writer.writerows(lines)
print ("D1_SD_ROI")
print is_in_file
I'm Getting this error:
Traceback (most recent call last): File "sun.py", line 53, in
lines[i-1][0] = 'ABCD' IndexError: list index out of
range

Create single CSV from two CSV files with X and Y values in python

I have two CSV files; one containing X(longitude) and the other Y(latitude) values (they are 'float' data type)
I am trying to create a single CSV with all possible combinations (e.g. X1,Y1; X1, Y2; X1,Y3; X2,Y1; X2,Y2; X2,Y3... etc)
I have written the following which partly works. However the CSV file created has lines in between values and i also get the values stored like this with there list parentheses ['20.7599'] ['135.9028']. What I need is 20.7599, 135.9028
import csv
inLatCSV = r"C:\data\Lat.csv"
inLongCSV = r"C:\data\Long.csv"
outCSV = r"C:\data\LatLong.csv"
with open(inLatCSV, 'r') as f:
reader = csv.reader(f)
list_Lat = list(reader)
with open(inLongCSV, 'r') as f:
reader = csv.reader(f)
list_Long = list(reader)
with open(outCSV, 'w') as myfile:
for y in list_Lat:
for x in list_Long:
combVal = (y,x)
#print (combVal)
wr = csv.writer(myfile)
wr.writerow(combVal)
Adding a argument to the open function made the difference:
with open(my_csv, 'w', newline="") as myfile:
combinations = [[y,x] for y in list_Lat for x in list_Long]
wr = csv.writer(myfile)
wr.writerows(combinations)
Any time you're doing something with csv files, Pandas is a great tool
import pandas as pd
lats = pd.read_csv("C:\data\Lat.csv",header=None)
lons = pd.read_csv("C:\data\Long.csv",header=None)
lats['_tmp'] = 1
lons['_tmp'] = 1
df = pd.merge(lats,lons,on='_tmp').drop('_tmp',axis=1)
df.to_csv('C:\data\LatLong.csv',header=False,index=False)
We create a dataframe for each file, and merge them on a temporary column, which produces the cartesian product. https://pandas.pydata.org/pandas-docs/version/0.20/merging.html

How to get data with python in certain rows and columns

I have data, example :
2017/06/07 10:42:35,THREAT,url,192.168.1.100,52.25.xxx.xxx,Rule-VWIRE-03,13423523,,web-browsing,80,tcp,block-url
2017/06/07 10:43:35,THREAT,url,192.168.1.101,52.25.xxx.xxx,Rule-VWIRE-03,13423047,,web-browsing,80,tcp,allow
2017/06/07 10:43:36,THREAT,end,192.168.1.100,52.25.xxx.xxx,Rule-VWIRE-03,13423047,,web-browsing,80,tcp,block-url
2017/06/07 10:44:09,TRAFFIC,end,192.168.1.101,52.25.xxx.xxx,Rule-VWIRE-03,13423111,,web-browsing,80,tcp,allow
2017/06/07 10:44:09,TRAFFIC,end,192.168.1.103,52.25.xxx.xxx,Rule-VWIRE-03,13423111,,web-browsing,80,tcp,block-url
How to parse that only get data columns 4,5,7, and 12 in all rows?
This is my code :
import csv
file=open('filename.log', 'r')
f=open('fileoutput', 'w')
lines = file.readlines()
for line in lines:
result.append(line.split(' ')[4,5,7,12])
f.write (line)
f.close()
file.close()
The right way with csv.reader and csv.writer objects:
import csv
with open('filename.log', 'r') as fr, open('filoutput.csv', 'w', newline='') as fw:
reader = csv.reader(fr)
writer = csv.writer(fw)
for l in reader:
writer.writerow(v for k,v in enumerate(l, 1) if k in (4,5,7,12))
filoutput.csv contents:
192.168.1.100,52.25.xxx.xxx,13423523,block-url
192.168.1.101,52.25.xxx.xxx,13423047,allow
192.168.1.100,52.25.xxx.xxx,13423047,block-url
192.168.1.101,52.25.xxx.xxx,13423111,allow
192.168.1.103,52.25.xxx.xxx,13423111,block-url
This is wrong:
line.split(' ')[4,5,7,12]
You want this:
fields = line.split(' ')
fields[4], fields[5], fields[7], fields[12]
a solution using pandas
import pandas as pd
df = pd.read_csv('filename.log', sep=',', header=None, index_col=False)
df[[3, 4, 6, 11]].to_csv('fileoutput.csv', header=False, index=False)
Note the use of [3, 4, 6, 11] instead of [4, 5, 7, 12] to account for 0-indexing in the dataframe's columns.
Content of fileoutput.csv:
192.168.1.100,52.25.xxx.xxx,13423523,block-url
192.168.1.101,52.25.xxx.xxx,13423047,allow
192.168.1.100,52.25.xxx.xxx,13423047,block-url
192.168.1.101,52.25.xxx.xxx,13423111,allow
192.168.1.103,52.25.xxx.xxx,13423111,block-url
You're on the right path, but your syntax is off. Here's an example using csv module:
import csv
log = open('filename.log')
# newline='\n' to prevent csv.writer to include additional newline when writing to file
log_write = open('fileoutput', 'w', newline='\n')
csv_log = csv.reader(log, delimiter=',')
csv_writer = csv.writer(log_write, delimiter=',')
for line in csv_log:
csv_writer.writerow([line[0], line[1], line[2], line[3]]) # output first 4 columns
log.close()
log_write.close()
Looking at the list compressions, you could have something like this without necessarily using csv module
file=open('filename.log','r')
f=open('fileoutput', 'w')
lines = file.readlines()
for line in lines:
f.write(','.join(line.split(',')[i] for i in [3,4,6,11]))
f.close()
file.close()
Notice the indices are 3,4,6,11 for our zero index based list
output
cat fileoutput
192.168.1.100,52.25.xxx.xxx,13423523,block-url
192.168.1.101,52.25.xxx.xxx,13423047,allow
192.168.1.100,52.25.xxx.xxx,13423047,block-url
192.168.1.101,52.25.xxx.xxx,13423111,allow
192.168.1.103,52.25.xxx.xxx,13423111,block-url

Python - merging of csv files with one axis in common

I need to merge two csv files, A.csv and B.csv, with one axis in common, extract:
9.358,3.0
9.388,2.0
and
8.551,2.0
8.638,2.0
I want the final file C.csv to have the following pattern:
8.551,0.0,2.0
8.638,0.0,2.0
9.358,3.0,0.0
9.388,2.0,0.0
How to you suggest to do it? Should I go for a for loop?
Just read from each file, writing out to the output file and adding in the 'missing' column:
import csv
with open('c.csv', 'wb') as outcsv:
# Python 3: use open('c.csv', 'w', newline='') instead
writer = csv.writer(outcsv)
# copy a.csv across, adding a 3rd column
with open('a.csv', 'rb') as incsv:
# Python 3: use open('a.csv', newline='') instead
reader = csv.reader(incsv)
writer.writerows(row + [0.0] for row in reader)
# copy b.csv across, inserting a 2nd column
with open('b.csv', 'rb') as incsv:
# Python 3: use open('b.csv', newline='') instead
reader = csv.reader(incsv)
writer.writerows(row[:1] + [0.0] + row[1:] for row in reader)
The writer.writerows() lines do all the work; a generator expression loops over the rows in each reader, either appending a column or inserting a column in the middle.
This works with whatever size of input CSVs you have, as only some read and write buffers are held in memory. Rows are processed in iterative fashion without ever needing to hold all of the input or output files in memory.
import numpy as np
dat1 = np.genfromtxt('dat1.txt', delimiter=',')
dat2 = np.genfromtxt('dat2.txt', delimiter=',')
dat1 = np.insert(dat1, 2, 0, axis=1)
dat2 = np.insert(dat2, 1, 0, axis=1)
dat = np.vstack((dat1, dat2))
np.savetxt('dat.txt', dat, delimiter=',', fmt='%.3f')
Here's a simple solution using a dictionary, which will work for any number of files:
from __future__ import print_function
def process(*filenames):
lines = {}
index = 0
for filename in filenames:
with open(filename,'rU') as f:
for line in f:
v1, v2 = line.rstrip('\n').split(',')
lines.setdefault(v1,{})[index] = v2
index += 1
for line in sorted(lines):
print(line, end=',')
for i in range(index):
print(lines[line].get(i,0.0), end=',' if i < index-1 else '\n')
process('A.csv','B.csv')
prints
8.551,0.0,2.0
8.638,0.0,2.0
9.358,3.0,0.0
9.388,2.0,0.0

Categories

Resources