list index out of range CSV for twitter - python

I'm trying to create a data structure for each tweet and I have troubles with the following code:
with open('tweets.csv', 'rb') as csvfile:
reader = csv.reader('tweets.csv', delimiter=',')
next(reader)
for row in reader:
tweet= dict()
tweet['ID'] = row[0]
tweet['Tweet'] = row[1]
tweet['Tweet cleaned'] = row[2]
tweet['Ticker'] = row[3]
tweet['date'] = int(float(row[4]))
tweet['Return'] = int(float(row[5]))
It returns an error on the line tweet['Tweet'] = row[1] : list index out of range.
I'm new to python and after googling the solution couldn't really find anything that works. Thank you!

f=open('tweets.csv','r')
reader=csv.reader(f, delimiter=',')
l1=list(reader)
for row in l1:
tweet=dict()
tweet['id']=row[0]
tweet['Tweet']=row[1]
tweet['tweet cleaned']=row[2]

Related

Checking a CSV for the existence of a similar value in Python

Consider the following CSV:
date,description,amount
14/02/2020,march contract,-99.00
15/02/2020,april contract,340.00
16/02/2020,march contract,150.00
17/02/2020,april contract,-100.00
What I'd like to do is:
Iterate through all of the rows
Total the amounts of lines which have the same description
Return the last line which has that newly-calculated amount
Applied to the above example, the CSV would look like this:
16/02/2020,march contract,51.00
17/02/2020,april contract,240.00
So far, I've tried nesting csv.reader()s inside of each other and I'm not getting the result I am wanting.
I'd like to achieve this without any libraries and/or modules.
Here is the code I have so far, where first_row is each row in the CSV and second_row is the iteration of looking for matching descriptions:
csv_reader = csv.reader(report_file)
for first_row in csv_reader:
description_index = 5
amount_index = 13
print(first_row)
for second_row in csv_reader:
if second_row is not first_row:
print(first_row[description_index] == second_row[description_index])
if first_row[description_index] == second_row[description_index]:
first_row[amount_index] = float(first_row[amount_index]) + float(second_row[amount_index])
This will work:
import csv
uniques = {} # dictionary to store key/value pairs
with open(report_file, newline='') as f:
reader = csv.reader(f, delimiter=',')
next(reader, None) # skip header row
for data in reader:
date = data[0]
description = data[1]
if description in uniques:
cumulative_total = uniques[description][0]
uniques[description] = [cumulative_total+float(data[2]), date]
else:
uniques[description] = [float(data[2]), date]
# print output
for desc, val in uniques.items():
print(f'{val[0]}, {desc}, {val[1]}')
I know that you've asked for a solution without pandas, but you'll save yourself a lot of time if you use it:
df = pd.read_csv(report_file)
totals = df.groupby(df['description']).sum()
print(totals)
I suggest you should use pandas, it'll be efficient.
or if you still want to go with your way then this will help.
import csv
with open('mycsv.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
value_dict = {}
line_no = 0
for row in csv_reader:
if line_no == 0:
line_no += 1
continue
cur_date = row[0]
cur_mon = row[1]
cur_val = float(row[2])
if row[1] not in value_dict.keys():
value_dict[cur_mon] = [cur_date, cur_val]
else:
old_date, old_val = value_dict[cur_mon]
value_dict[cur_mon] = [cur_date, (old_val + cur_val)]
line_no += 1
for key, val_list in value_dict.items():
print(f"{val_list[0]},{key},{val_list[1]}")
Output:
16/02/2020,march contract,51.0
17/02/2020,april contract,240.0
Mark this as answer if it helps you.
working with dictionary makes it easy to access values
import csv
from datetime import datetime
_dict = {}
with open("test.csv", "r") as f:
reader = csv.reader(f, delimiter=",")
for i, line in enumerate(reader):
if i==0:
headings = [line]
else:
if _dict.get(line[1],None) is None:
_dict[line[1]] = {
'date':line[0],
'amount':float(line[2])
}
else:
if datetime.strptime(_dict.get(line[1]).get('date'),'%d/%m/%Y') < datetime.strptime(line[0],'%d/%m/%Y'):
_dict[line[1]]['date'] = line[0]
_dict[line[1]]['amount'] = _dict[line[1]]['amount'] + float(line[2])
Here your _dict will contain unique description and values
>>> print(_dict)
{'march contract': {'date': '16/02/2020', 'amount': 51.0},
'april contract': {'date': '17/02/2020', 'amount': 240.0}}
convert to list and add headings
headings.extend([[value['date'],key,value['amount']] for key,value in _dict.items()])
>>>print(headings)
[['date', 'description', 'amount'],['16/02/2020', 'march contract', 51.0], ['17/02/2020', 'april contract', 240.0]]
save list to csv
with open("out.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(headings)
You can also use itertools.groupby and sum() for this if you don't mind outputting in sorted form.
from datetime import datetime
from itertools import groupby
import csv
with open(report_file, 'r') as f:
reader = csv.reader(f)
lst = list(reader)[1:]
sorted_input = sorted(lst, key=lambda x : (x[1], datetime.strptime(x[0],'%d/%m/%Y'))) #sort by description and date
groups = groupby(sorted_input, key=lambda x : x[1])
for k,g in groups:
rows = list(g)
total = sum(float(row[2]) for row in rows)
print(f'{rows[-1][0]},{k},{total}') #print last date, description, total
Output:
17/02/2020,april contract,240.0
16/02/2020,march contract,51.0

Python CSV Row Loop

I am very new to Python programming and decided on a small project to learn the language.
Basically I am trying to:
Read the first cell of a CSV file.
Ask if that cell value is "liked".
If liked, write to the column next to the cell on 1., "1".
Else, write "0".
Repeat on next row until end of list.
My code right now:
import csv
reader = csv.reader(open("mylist.csv"), delimiter=',')
data = []
for row in reader:
data.append(row)
ask = (data[0][0])
ans = input("Do you like {}? ".format(ask))
if ans == ("y"):
f = open('mylist.csv', 'r')
reader = csv.reader(f)
data = list(reader)
f.close()
data[0][1] = '1'
my_new_list = open('mylist.csv', 'w', newline='')
csv_writer = csv.writer(my_new_list)
csv_writer.writerows(data)
my_new_list.close()
else:
f = open('mylist.csv', 'r')
reader = csv.reader(f)
data = list(reader)
f.close()
data[0][1] = '0'
my_new_list = open('mylist.csv', 'w', newline='')
csv_writer = csv.writer(my_new_list)
csv_writer.writerows(data)
my_new_list.close()
So basically, I am stuck trying to get the content of the next row.
FYI, I am looking to implement machine learning to this process.
First learning how to do this in a basic manner.
Any help is welcome.
Thank you!
You shouldn't read from and write to the same file/list/dict at the same time. If you do, references to data may change. You can start with something like this for your task. However, note that as the file grows you code becomes slower.
import csv
reader = csv.reader(open("test.csv", 'r'), delimiter=',')
content = []
for row in reader:
item = row[0]
ans = raw_input("Do you like {}? ".format(item))
if ans == 'y':
content.append([item, 1])
else:
content.append([item, 0])
writer = csv.writer(open('test.csv', 'w'))
writer.writerows(content)
In my last work with csv I opened the file so:
import csv
with open(name) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data.append(row)
If you want the resultant csv file to contain all of the data from the input file but with the question results added in, you could use something like this.
It will insert you answer (0 or 1) after the first item in each record.
import csv
reader = csv.reader(open("mylist.csv", 'r'), delimiter=',')
data = []
for row in reader:
data.append(row)
for row in data:
ans = raw_input("Do you like {}? ".format(row[0]))
if ans == 'y':
row[1:1] = "1"
else:
row[1:1] = "0"
writer = csv.writer(open('myresult.csv', 'w'))
writer.writerows(data)

Find duplicates in a column, then add values in adjacent column

I have a csv file that has a one word title and a description that is always a number.
My current code extracts just the title an description to another csv file and then converts the csv into an excel file.
import csv
import output
f = open("Johnny_Test-punch_list.csv")
csv_f = csv.reader(f)
m = open('data.csv', "w")
for row in csv_f:
m.write(row[1])
m.write(",")
m.write(row[3])
m.write("\n")
m.close()
output.toxlsx()
How can I look for matching Titles and then add the descriptions of the titles?
import csv
import output
f = open("Johnny_Test-punch_list.csv")
csv_f = csv.reader(f)
m = open('data.csv', "w")
dict_out = {}
for row in csv_f:
if row[1] in dict_out:
dict_out[row[1]] += row[3]
else:
dict_out[row[1]] = row[3]
for title, value in dict_out.iteritems():
m.write('{},{}\n'.format(title, value))
If I understood you correctly, you need to write in a single line as a string.
can you try with below code:
for row in csv_f:
m.write(row[1] + "," + str(row[3]) + "\n")

(Simple Python) CSV input to usernames

I have a CSV file names.csv
First_name, Last_name
Mike, Hughes
James, Tango
, Stoke
Jack,
....etc
What I want is to be able to take the first letter of the First_name and the full Last_name and output it on screen as usernames but not include the people with First_name and Last_name property's empty. I'm completely stuck any help would be greatly appreciated
import csv
ifile = open('names.csv', "rb")
reader = csv.reader(ifile)
rownum = 0
for row in reader:
if rownum == 0:
header = row
else:
colnum = 0
for col in row:
print '%-8s: %s' % (header[colnum], col)
colnum += 1
rownum += 1
ifile.close()
Attempt #2
import csv
dataFile = open('names.csv','rb')
reader = csv.reader(dataFile)
next(reader, None)
for row in reader:
if (row in reader )
print (row[0])
I haven't saved many attempts because none of them have worked :S
import csv
dataFile = open('names.csv','rb')
reader = csv.reader(dataFile, delimiter=',', quoting=csv.QUOTE_NONE)
for row in reader:
if not row[0] or not row[1]:
continue
print (row[0][0] + row[1]).lower()
Or
import csv
dataFile = open('names.csv','rb')
reader = csv.reader(dataFile, delimiter=',', quoting=csv.QUOTE_NONE)
[(row[0][0] + row[1]).lower() for row in reader if
row[0] and row[1]]
Once you get the text from the .csv you can use the split() function to break up the text by the new lines. Your sample text is a little inconsistent, but if I understand you question correctly you can say
import csv
dataFile = open('names.csv','rb')
reader = csv.reader(dataFile)
reader = reader.split('\n')
for x in reader
print(reader[x])
Or if you want to break it up by commas just replace the '\n' with ','
Maybe like this
from csv import DictReader
with open('names.csv') as f:
dw = DictReader(f, skipinitialspace=True)
fullnames = filter(lambda n: n['First_name'] and n['Last_name'], dw)
for f in fullnames:
print('{}{}'.format(f['First_name'][0], f['Last_name']))
You have headings in your csv so use a DictReader and just filter out those whose with empty first or last names and display the remaining names.

How to read multiple records from a CSV file?

I have a csv file, l__cyc.csv, that contains this:
trip_id, time, O_lat, O_lng, D_lat, D_lng
130041910101,1300,51.5841153671,0.134444590094,51.5718053872,0.134878021928
130041910102,1335,51.5718053872,0.134878021928,51.5786920389,0.180940040247
130041910103,1600,51.5786920389,0.180940040247,51.5841153671,0.134444590094
130043110201,1500,51.5712712038,0.138532882664,51.5334949484,0.130489470325
130043110202,1730,51.5334949484,0.130489470325,51.5712712038,0.138532882664
And I am trying to pull out separate values, using:
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
origincoords = ['{O_lat},{O_lng}'.format(**row) for row in reader]
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
trip_id = ['{trip_id}'.format(**row) for row in reader]
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
destinationcoords = ['{D_lat},{D_lng}'.format(**row) for row in reader]
Where origincoords should be 51.5841153671, 0.134444590094,
trip_id should be 130041910101, and destinationcoords should be
51.5718053872, 0.134878021928.
However, I get a KeyError:
KeyError: 'O_lat'
Is this something simple and there's something fundamental I'm misunderstanding?
You just avoid the space between headers
trip_id,time,O_lat,O_lng,D_lat,D_lng
OR
reader = csv.DictReader(csvfile, skipinitialspace=True)
First things first, you get the key error, because the key does not exist in your dictionary.
Next, I would advise against running through the file 3 times, when you can do it a single time!
For me it worked, when I added the fieldnames to the reader.
import csv
from cStringIO import StringIO
src = """trip_id, time, O_lat, O_lng, D_lat, D_lng
130041910101,1300,51.5841153671,0.134444590094,51.5718053872,0.134878021928
130041910102,1335,51.5718053872,0.134878021928,51.5786920389,0.180940040247
130041910103,1600,51.5786920389,0.180940040247,51.5841153671,0.134444590094
130043110201,1500,51.5712712038,0.138532882664,51.5334949484,0.130489470325
130043110202,1730,51.5334949484,0.130489470325,51.5712712038,0.138532882664
"""
f = StringIO(src)
# determine the fieldnames
fieldnames= "trip_id,time,O_lat,O_lng,D_lat,D_lng".split(",")
# read the file
reader = csv.DictReader(f, fieldnames=fieldnames)
# storage
origincoords = []
trip_id = []
destinationcoords = []
# iterate the rows
for row in reader:
origincoords.append('{O_lat},{O_lng}'.format(**row))
trip_id.append('{trip_id}'.format(**row))
destinationcoords.append('{D_lat},{D_lng}'.format(**row))
# pop the header off the list
origincoords.pop(0)
trip_id.pop(0)
destinationcoords.pop(0)
# show the result
print origincoords
print trip_id
print destinationcoords
I don't really know what you are trying to achieve there, but I'm sure there is a better way of doing it!

Categories

Resources