I am reading in a csv file with ten lines to transfer to JSON, console output as below. Python code attached. First step is to print csv data to console and error below occurring.
Syntax errors were occurring but after fixing them this error has began.
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
hmid = csvRow["hmid"]
data[hmid] = csvRow
Console output:
python csvjson.py
Traceback (most recent call last):
File "csvjson.py", line 12, in <module>
hmid = csvRow["hmid"]
KeyError: 'hmid'
Expected Output:
Prints out the CSV data to conole.
The KeyError exception means that the key you are requesting does not exist in that dictionary.
If that column "hmid" does not exist in every row of the csv, consider using the dict.get() method. This will return None if the key does not exist in the dictionary instead of the KeyError.
Alternatively you can catch that KeyError and skip the row. That would look something like this.
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
try:
data[csvRow["hmid"]] = csvRow
except KeyError:
pass
Or check if the key is in the dictionary before proceeding.
data = {}
with open(csvFilePath) as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
if "hmid" not in csvRow.keys():
continue
data[csvRow["hmid"]] = csvRow
I made the following file 'test.csv' :
hmid,first_name,last_name,email,gender,passport_number,departure_city,arrival_city,aircraft_type
1,Lotstring,Duobam,anatwick0#samsung.com,Female,7043833787,Changtang,Tours,B737
2,Rover,Red,rr#nowhere.com,Female,7043833787,Changtang,Tours,B737
pasted your code into Python 2.7, and it worked fine. data has two rows.
Maybe your file had an issue with terminators.
CSV files have a BOM (byte order mark) at the beginning of the file, so when you open the file you need to specify encoding='utf-8-sig' on the file open. Here is your code, corrected:
data = {}
with open(csvFilePath, encoding='utf-8-sig') as csvFile:
csvReader = csv.DictReader(csvFile)
for csvRow in csvReader:
hmid = csvRow["hmid"]
data[hmid] = csvRow
Related
I'm trying to edit a single row in a csv file. I've got a CSV file that looks like the bellow:
TYPE|FOOD TYPE|FEED TIME|WASH TIME
LION|MEAT|4H|1D
FOX|MEAT|5H|3D
HEN|SEED|6H|6D
FISH|PLANTS|7H|99D
I want to edit the row based on its TYPE. If the user wants to edit the FOX row they only need to type FOX when prompted. The issue I'm facing is that the I can't edit the file for some reason.
My code is bellow, I open the existing db, find the row in question, change it, then write it, along with the other rows, into a temp file that I can overwrite the original with.
def edit_animal_entry(type):
with open(animal_csv, 'r') as file_read:
reader = csv.reader(file_read, delimiter="|")
with open(temp, 'w') as file_write:
writer = csv.writer(file_write)
for row in reader:
print(f"{' | '.join(row)}")
if row[0] == type:
animal_type, animal_food, animal_feed, animal_wash = animal_inputs()
writer.writerow([animal_type, animal_food, (animal_feed+"H"), (animal_wash+"D")])
else:
writer.writerow(row)
shutil.move(temp, animal_csv)
You've 'closed' the read file by stopping the with block before reading anything out of it. Therefore you aren't looping over your input file. A solution would be to open the input and the output file in the same with statement:
def edit_animal_entry(type):
with open(animal_csv, 'r') as file_read, open(temp, 'w') as file_write:
reader = csv.reader(file_read, delimiter="|")
writer = csv.writer(file_write)
for row in reader:
print(f"{' | '.join(row)}")
if row[0] == type:
animal_type, animal_food, animal_feed, animal_wash = animal_inputs()
writer.writerow([animal_type, animal_food, (animal_feed+"H"), (animal_wash+"D")])
else:
writer.writerow(row)
shutil.move(temp, animal_csv)
I'm creating a Django app and I need to import several *.csv files.
One's of this file has this structure:
id|value (header)
12|¤this is the
value¤
34|¤this is another
value¤
I use this code for parse the file:
try:
csvfile = open(path, "r", encoding='utf-16')
except IOError:
return False
cursor.copy_from(csvfile , tblname, columns=['id', 'value'], sep='|')
But when I try to parse this file, it gave me this error:
psycopg2.DataError: ERROR: missing data for the column "value"
Is there a way to parse this file keeping carriage return inside text identifier ('¤')?
You could use Pythons csv module for reading that.
import csv
try:
csvfile = open(path, newline='')
except IOError:
return False
csvreader = csv.reader(csvfile, delimiter='|', quotechar='¤')
for row in csvreader:
print(', '.join(row)) # or do something else with the row of data.
One approach would be to build up the entries yourself as follows:
blocks = []
block = []
with open('input.csv') as f_input:
for row in f_input:
if '|' in row:
if len(block):
blocks.append(''.join(block).strip('\n').split('|'))
block = []
block.append(row)
else:
block.append(row)
if len(block):
blocks.append(''.join(block).strip('\n').split('|'))
print(blocks)
This would produce a list of blocks as follows:
[['id', 'value (header)'], ['12', '¤this is the\nvalue¤'], ['34', '¤this is another\nvalue¤']]
{"a":"1","b":"1","c":"1"}
{"a":"2","b":"2","c":"2"}
{"a":"3","b":"3","c":"3"}
{"a":"4","b":"4","c":"4"}
I have tried the following code but it gives error:-
from nltk.twitter import Twitter
from nltk.twitter.util import json2csv
with open('C:/Users/Archit/Desktop/raw_tweets.json', 'r') as infile:
# Variable for building our JSON block
json_block = []
for line in infile:
# Add the line to our JSON block
json_block.append(line)
# Check whether we closed our JSON block
if line.startswith('{'):
# Do something with the JSON dictionary
json2csv(json_block, 'tweets.csv', ['id','text','created_at','in_reply_to_user_id','in_reply_to_screen_name','in_reply_to_status_id','user.id','user.screen_name','user.name','user.location','user.friends_count','user.followers_count','source'])
# Start a new block
json_block = []
Error:
File "C:\Python34\lib\json\decoder.py", line 361, in raw_decode
raise ValueError(errmsg("Expecting value", s, err.value)) from None
ValueError: Expecting value: line 1 column 1 (char 0)
import csv, json
data = []
with open('C:\Users\Shahriar\Desktop\T.txt') as data_file:
for line in data_file:
data.append(json.loads(line))
keys = data[0].keys()
with open('data.csv', 'wb') as csvF:
csvWriter = csv.DictWriter(csvF, fieldnames=keys)
csvWriter.writeheader()
for d in data:
csvWriter.writerow(d)
Output:
a,c,b
1,1,1
2,2,2
3,3,3
4,4,4
This is way too late but I also stumbled upon some errors today. I figured that you actually have to import from nltk.twitter.common instead of util. Hope this helps others who stumbled upon this thread
# Read json
filename = 'C:/Users/Archit/Desktop/raw_tweets.json'
lines = [line.replace("{", "").replace("}", "").replace(":", ",") for line in open(filename)]
# Write csv
with open('out.csv', 'w') as csv_file:
for line in lines:
csv_file.write("%s\n" % line)
I have a csv file, l__cyc.csv, that contains this:
trip_id, time, O_lat, O_lng, D_lat, D_lng
130041910101,1300,51.5841153671,0.134444590094,51.5718053872,0.134878021928
130041910102,1335,51.5718053872,0.134878021928,51.5786920389,0.180940040247
130041910103,1600,51.5786920389,0.180940040247,51.5841153671,0.134444590094
130043110201,1500,51.5712712038,0.138532882664,51.5334949484,0.130489470325
130043110202,1730,51.5334949484,0.130489470325,51.5712712038,0.138532882664
And I am trying to pull out separate values, using:
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
origincoords = ['{O_lat},{O_lng}'.format(**row) for row in reader]
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
trip_id = ['{trip_id}'.format(**row) for row in reader]
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
destinationcoords = ['{D_lat},{D_lng}'.format(**row) for row in reader]
Where origincoords should be 51.5841153671, 0.134444590094,
trip_id should be 130041910101, and destinationcoords should be
51.5718053872, 0.134878021928.
However, I get a KeyError:
KeyError: 'O_lat'
Is this something simple and there's something fundamental I'm misunderstanding?
You just avoid the space between headers
trip_id,time,O_lat,O_lng,D_lat,D_lng
OR
reader = csv.DictReader(csvfile, skipinitialspace=True)
First things first, you get the key error, because the key does not exist in your dictionary.
Next, I would advise against running through the file 3 times, when you can do it a single time!
For me it worked, when I added the fieldnames to the reader.
import csv
from cStringIO import StringIO
src = """trip_id, time, O_lat, O_lng, D_lat, D_lng
130041910101,1300,51.5841153671,0.134444590094,51.5718053872,0.134878021928
130041910102,1335,51.5718053872,0.134878021928,51.5786920389,0.180940040247
130041910103,1600,51.5786920389,0.180940040247,51.5841153671,0.134444590094
130043110201,1500,51.5712712038,0.138532882664,51.5334949484,0.130489470325
130043110202,1730,51.5334949484,0.130489470325,51.5712712038,0.138532882664
"""
f = StringIO(src)
# determine the fieldnames
fieldnames= "trip_id,time,O_lat,O_lng,D_lat,D_lng".split(",")
# read the file
reader = csv.DictReader(f, fieldnames=fieldnames)
# storage
origincoords = []
trip_id = []
destinationcoords = []
# iterate the rows
for row in reader:
origincoords.append('{O_lat},{O_lng}'.format(**row))
trip_id.append('{trip_id}'.format(**row))
destinationcoords.append('{D_lat},{D_lng}'.format(**row))
# pop the header off the list
origincoords.pop(0)
trip_id.pop(0)
destinationcoords.pop(0)
# show the result
print origincoords
print trip_id
print destinationcoords
I don't really know what you are trying to achieve there, but I'm sure there is a better way of doing it!
I keep getting "NameError: name '[file name]' is not defined" after entering "process_csv(b.csv, b2.csv)" in IDLE. i'm not sure what to fix. any ideas?
import csv
def process_csv(file_name, new_file_name):
'''Reads from external CSV file.
puts each number in lowercase.
Writes values to new CSV file.
'''
file1 = open(file_name, 'r')
file2 = open(new_file_name, 'w', newline='')
data = csv.reader(file1)
new_data = csv.writer(file2)
for line in data:
row = []
for item in line:
if item.isalpha():
item = item.lower()
row.append(item)
new_data.writerow(row)
file1.close()
file2.close()
print('File processed.')
You need quotation marks around the file names:
"b.csv" and "b2.csv"