Unable to access the values from the .csv file using Python3? - python

Using the following Python3 code, I am able to access the first column values but unable to access subsequent columns. The error is:
IndexError: list index out of range
with open('smallSample.txt', 'r') as file:
listOfLines = file.readlines()
for line in listOfLines:
print(line.strip())
header = listOfLines[0] #with all the labels
print(header.strip().split(','))
for row in listOfLines[1:]:
values = row.strip().split(',')
print(values[0]) #Able to access 1st row elements
print(values[1]) #ERROR Unable to access the Second Column Values
'''IndexError: list index out of range'''
The smallSample.txt data stored is:
Date,SP500,Dividend,Earnings,Consumer Price Index,Long Interest Rate,Real Price,Real Dividend,Real Earnings,PE10
1/1/2016,1918.6,43.55,86.5,236.92,2.09,2023.23,45.93,91.22,24.21
2/1/2016,1904.42,43.72,86.47,237.11,1.78,2006.62,46.06,91.11,24
3/1/2016,2021.95,43.88,86.44,238.13,1.89,2121.32,46.04,90.69,25.37```

Actually, your values is not a list. It is re-initialized again and again in for loop. Use this code:
with open('data.txt', 'r') as file:
listOfLines = file.readlines()
for line in listOfLines:
print(line.strip())
header = listOfLines[0] #with all the labels
print(header.strip().split(','))
values = [] # <= look at here
for row in listOfLines[1:]:
values.append(row.strip().split(',')) # <= look at here
print(values[0]) # <= outside for loop
print(values[1])

with open('SP500.txt', 'r') as file:
lines = file.readlines()
#for line in lines:
#print(line)
#header = lines[0]
#labels = header.strip().split(',')
#print(labels)
listOfData = []
totalSP = 0.0
for line in lines[6:18]:
values = line.strip().split(',')
#print(values[0], values[1], values[5])
totalSP = totalSP + float(values[1])
listOfData.append(float(values[5]))
mean_SP = totalSP/12.0
#print(listOfData)
max_interest = listOfData[0]
for i in listOfData:
if i>max_interest:
max_interest = i

Related

Python csv list index out of range

I'm trying to write from json to csv, so each value(pH) is in different row, but I keep getting
Traceback (most recent call last):
File "C:/Users/User/PycharmProjects/Meslosana/getValues.py", line 22, in
lines[i][1] = pH
IndexError: list index out of range
I will also add different values in the same rows but different columns.
My csv file looks like this, it does not have empty lines.
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
0,0,0,0,0
and each time I run my code it creates empty line at the bottom.
Here is my code
import json
import csv
file = 'LaukiGeojson/Zemdegas.geojson'
cord = []
with open(file) as f:
data = json.load(f)
i = 1
for feature in data['features']:
cord = feature['geometry']['coordinates'][0]
pH = feature['properties']['pH']
print(pH)
print(feature)
print(data)
# saglabaa
r = csv.reader(open('LaukiAnalizes/Zemdegas.csv'))
lines = list(r)
print(len(lines))
#lines[i][0] = 1
lines[i][1] = pH
#lines[i][2] = 1
#lines[i][3] = 1
#lines[i][4] = 1
i += 1
writer = csv.writer(open('LaukiAnalizes/Zemdegas.csv', 'w', newline=''))
writer.writerows(lines)
# saglabaa
r = csv.reader(open('LaukiAnalizes/Zemdegas.csv'))
lines = list(r)
lines[0][0] = 'ID'
lines[0][1] = 'pH'
lines[0][2] = 'P'
lines[0][3] = 'K'
lines[0][4] = 'Mg'
writer = csv.writer(open('LaukiAnalizes/Zemdegas.csv', 'w', newline=''))
writer.writerows(lines)
open('LaukiAnalizes/Zemdegas.csv').close()
I would avoid code that blindly assumes that a CSV file has a given number of lines. This is better:
Check if the current line even exists in the CSV, if it does update the pH value in-place, otherwise append a new line.
import json
import csv
GEOJSON_FILE = 'LaukiGeojson/Zemdegas.geojson'
CSV_FILE = 'LaukiAnalizes/Zemdegas.csv'
with open(GEOJSON_FILE, encoding='utf8') as f:
geo_data = json.load(f)
with open(CSV_FILE, encoding='utf8', newline='') as f:
reader = csv.reader(f, delimiter=',')
lines = list(reader)
for i, feature in enumerate(geo_data['features']):
pH = feature['properties']['pH']
if i < len(lines):
lines[i][1] = pH
else:
lines.append([1, pH, 1, 1, 1])
with open(CSV_FILE, 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f, delimiter=',')
writer.writerows(lines)
The number of times you are opening and closing the CSV file seemed rather confusing to me. Read it once, write it once. Also, use a context manager (with open(...) as ...) for all your file interactions.
Python is 0-indexed. This means the index of the first line is 0, and the index of the final element is length-1. This is where you get the error, as when your i=9, this will be attempting to access a row that doesn't exist. Fix this by setting i=0 at the start instead of i=1.

How to open a csv file and go through onto next line in a loop

I need to find profit/loss from two different lines on a csv file. I cant find a way to hold a variable whilst on one row and then once i move onto another line have the same variable to make a comparison.
I have already tried the next() function but have had no luck.
import csv
symbolCode = input("Please enter a symbol code: ")
with open("prices.csv", "r") as f:
reader = csv.reader(f, delimiter=",")
with open(symbolCode + ".csv", "w") as d:
writer = csv.writer(d)
for row in reader:
item = 0
item2 = 0
if symbolCode == row[1]:
print(row)
writer.writerow(row)
d.close()
I expect to find an output of a number but while having used the two other numbers to minus and equal the output
Are you looking for something like this?
symbolCode = input("Please enter a symbol code: ")
with open("prices.csv", "r") as f:
reader = csv.reader(f, delimiter=",")
with open(symbolCode + ".csv", "w") as d:
writer = csv.writer(d)
previous_row = None # <--- initialize with special (empty/none) value
for row in reader:
item = 0
item2 = 0
if symbolCode == row[1]:
print(row)
writer.writerow(row)
if previous_row != None: # <-- if we're not processing the very first row.
if previous_row[7] < row[7]: # <-- do your comparison with previous row
print("7th value is bigger now") # <-- do something
previous_row = row # <-- store this row to be the previous row in the next loop iteration
Note that I've left out the d.close() line. It's not needed when you open a file in a with statement. Other than that, I only added lines to your example, and marked these line with # <-- comments.

Split a row into multiple cells and keep the maximum value of second value for each gene

I am new to Python and I prepared a script that will modify the following csv file
accordingly:
1) Each row that contains multiple Gene entries separated by the /// such as:
C16orf52 /// LOC102725138 1.00551
should be transformed to:
C16orf52 1.00551
LOC102725138 1.00551
2) The same gene may have different ratio values
AASDHPPT 0.860705
AASDHPPT 0.983691
and we want to keep only the pair with the highest ratio value (delete the pair AASDHPPT 0.860705)
Here is the script I wrote but it does not assign the correct ratio values to the genes:
import csv
import pandas as pd
with open('2column.csv','rb') as f:
reader = csv.reader(f)
a = list(reader)
gene = []
ratio = []
for t in range(len(a)):
if '///' in a[t][0]:
s = a[t][0].split('///')
gene.append(s[0])
gene.append(s[1])
ratio.append(a[t][1])
ratio.append(a[t][1])
else:
gene.append(a[t][0])
ratio.append(a[t][1])
gene[t] = gene[t].strip()
newgene = []
newratio = []
for i in range(len(gene)):
g = gene[i]
r = ratio[i]
if g not in newgene:
newgene.append(g)
for j in range(i+1,len(gene)):
if g==gene[j]:
if ratio[j]>r:
r = ratio[j]
newratio.append(r)
for i in range(len(newgene)):
print newgene[i] + '\t' + newratio[i]
if len(newgene) > len(set(newgene)):
print 'missionfailed'
Thank you very much for any help or suggestion.
Try this:
with open('2column.csv') as f:
lines = f.read().splitlines()
new_lines = {}
for line in lines:
cols = line.split(',')
for part in cols[0].split('///'):
part = part.strip()
if not part in new_lines:
new_lines[part] = cols[1]
else:
if float(cols[1]) > float(new_lines[part]):
new_lines[part] = cols[1]
import csv
with open('clean_2column.csv', 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for k, v in new_lines.items():
writer.writerow([k, v])
First of all, if you're importing Pandas, know that you have I/O Tools to read CSV files.
So first, let's import it that way :
df = pd.read_csv('2column.csv')
Then, you can extract the indexes where you have your '///' pattern:
l = list(df[df['Gene Symbol'].str.contains('///')].index)
Then, you can create your new rows :
for i in l :
for sub in df['Gene Symbol'][i].split('///') :
df=df.append(pd.DataFrame([[sub, df['Ratio(ifna vs. ctrl)'][i]]], columns = df.columns))
Then, drop the old ones :
df=df.drop(df.index[l])
Then, I'll do a little trick to remove your lowest duplicate values. First, I'll sort them by 'Ratio (ifna vs. ctrl)' then I'll drop all the duplicates but the first one :
df = df.sort('Ratio(ifna vs. ctrl)', ascending=False).drop_duplicates('Gene Symbol', keep='first')
If you want to keep your sorting by Gene Symbol and reset indexes to have simpler ones, simply do :
df = df.sort('Gene Symbol').reset_index(drop=True)
If you want to re-export your modified data to your csv, do :
df.to_csv('2column.csv')
EDIT : I edited my answer to correct syntax errors, I've tested this solution with your csv and it worked perfectly :)
This should work.
It uses the dictionary suggestion of Peter.
import csv
with open('2column.csv','r') as f:
reader = csv.reader(f)
original_file = list(reader)
# gets rid of the header
original_file = original_file[1:]
# create an empty dictionary
genes_ratio = {}
# loop over every row in the original file
for row in original_file:
gene_name = row[0]
gene_ratio = row[1]
# check if /// is in the string if so split the string
if '///' in gene_name:
gene_names = gene_name.split('///')
# loop over all the resulting compontents
for gene in gene_names:
# check if the component is in the dictionary
# if not in dictionary set value to gene_ratio
if gene not in genes_ratio:
genes_ratio[gene] = gene_ratio
# if in dictionary compare value in dictionary to gene_ratio
# if dictionary value is smaller overwrite value
elif genes_ratio[gene] < gene_ratio:
genes_ratio[gene] = gene_ratio
else:
if gene_name not in genes_ratio:
genes_ratio[gene_name] = gene_ratio
elif genes_ratio[gene_name] < gene_ratio:
genes_ratio[gene_name] = gene_ratio
#loop over dictionary and print gene names and their ratio values
for key in genes_ratio:
print key, genes_ratio[key]

python - list index out of range, working with CSV?

I have a CSV that looks something like this:
F02303521,"Smith,Andy",GHI,"Smith,Andy",GHI,,,
F04300621,"Parker,Helen",CERT,"Yu,Betty",IOUS,,,
I want to delete all the lines where the 2nd column equal the 4th column (ex. when Smith,Andy = Smith,Andy). I tried to do this in python by using " as the delimiter and splitting the columns into:
F02303521, Smith,Andy ,GHI, Smith,Andy ,GHI,,,
I tried this python code:
testCSV = 'test.csv'
deletionText = 'linestodelete.txt'
correct = 'correctone.csv'
i = 0
j = 0 #where i & j keep track of line number
with open(deletionText,'w') as outfile:
with open(testCSV, 'r') as csv:
for line in csv:
i = i + 1 #on the first line, i will equal 1.
PI = line.split('"')[1]
investigator = line.split('"')[3]
#if they equal each other, write that line number into the text file
as to be deleted.
if PI == investigator:
outfile.write(i)
#From the TXT, create a list of line numbers you do not want to include in output
with open(deletionText, 'r') as txt:
lines_to_be_removed_list = []
# for each line number in the TXT
# remove the return character at the end of line
# and add the line number to list domains-to-be-removed list
for lineNum in txt:
lineNum = lineNum.rstrip()
lines_to_be_removed_list.append(lineNum)
with open(correct, 'w') as outfile:
with open(deletionText, 'r') as csv:
# for each line in csv
# extract the line number
for line in csv:
j = j + 1 # so for the first line, the line number will be 1
# if csv line number is not in lines-to-be-removed list,
# then write that to outfile
if (j not in lines_to_be_removed_list):
outfile.write(line)
but for this line:
PI = line.split('"')[1]
I get:
Traceback (most recent call last):
File "C:/Users/sskadamb/PycharmProjects/vastDeleteLine/manipulation.py", line 11, in
PI = line.split('"')[1]
IndexError: list index out of range
and I thought it would do PI = Smith,Andy investigator = Smith,Andy... why does that not happen?
Any help would be greatly appreciated, thanks!
When you think csv, think pandas, which is a great data analysis library for Python. Here's how to accomplish what you want:
import pandas as pd
fields = ['field{}'.format(i) for i in range(8)]
df = pd.read_csv("data.csv", header=None, names=fields)
df = df[df['field1'] != df['field3']]
print df
This prints:
field0 field1 field2 field3 field4 field5 field6 field7
1 F04300621 Parker,Helen CERT Yu,Betty IOUS NaN NaN NaN
Try splitting on comma, not qoute.
x.split(",")

Transforming a text file into column vectors

I have a text file that I would like to break up into column vectors:
dtstamp ozone ozone_8hr_avg
06/18/2015 14:00:00 0.071 0.059
06/18/2015 13:00:00 0.071 0.053
How do I produce output in the following format?
dtstamp = [06/18/2015 14:00:00, 06/18/2015]
ozone = [0.071, 0.071]
etc.
import datetime
dtstamp = [] # initialize the dtstamp list
ozone = [] # initialize the ozone list
with open('file.txt', 'r') as f:
next(f) # skip the title line
for line in f: # iterate through the file
if not line: continue # skip blank lines
day, time, value, _ = line.split() # split up the line
dtstamp.append(datetime.datetime.strptime(' '.join((date, time)),
'%m/%d/%Y %H:%M:%S') # add a date
ozone.append(float(value)) # add a value
You can then combine these lists with zip to work with corresponding dates/values:
for date, value in zip(dtstamp, ozone):
print(date, value) # just an example
Few of the other answers seem to give errors on running them.
Try this, it should work like a charm!
dtstmp = []
ozone = []
ozone_8hr_avg = []
with open('file.txt', 'r') as file:
next(file)
for line in file:
if (line=="\n") or (not line): #If a blank line occurs
continue
words = line.split() #Extract the words
dtstmp.append(' '.join(words[0::1]))#join the date
ozone.append(words[2]) #Add ozone
ozone_8hr_avg.append(words[3]) #Add the third entry
print "dtstmp =", dtstmp
print "ozone =", ozone
print "ozone_8hr_avg =", ozone_8hr_avg
I would check out pandashttp://pandas.pydata.org or the csv module. With cvs you'll have to make the columns yourself, since it will give you rows.
rows = [row for row in csv.reader(file, delimiter='\t') ] #get the rows
col0 = [ row[0] for row in rows ] # construct a colonm from element 0 of each row.
Try this my friend:
# -*- coding: utf8 -*-
file = open("./file.txt")
lines = file.readlines()
data = []
data_hour = []
ozone = []
ozone_8hr_avg = []
for i_line in lines:
data.append(i_line.split()[0:2])
data_hour.append(' '.join(data[-1]))
ozone.append(i_line.split()[2])
ozone_8hr_avg.append(i_line.split()[3])
#print (data)
print (data_hour)
print (ozone)
print (ozone_8hr_avg)
If that helps you remember accept the answer.

Categories

Resources