I am trying to store some data in csv file. I am able to store data but it looks like this
1901,1909,1911,1913,1917
5,5,5,4,6
the first row is year names and second row is values.
my code is as follow
import os
import csv
from collections import Counter
from csv import reader
def read_data(filename):
year = 3
with open(filename) as f:
next(f, None) # discard header
year2rel = Counter(int(line[YEAR]) for line in reader(f))
return year2rel
file_exists = os.path.isfile('mycsvfile.csv')
def store_data(value):
my_dict = read_data(filename)
print 'my_dict: ', my_dict
with open('mycsvfile.csv', 'wb') as f:
w = csv.DictWriter(f, my_dict.keys())
if not file_exists:
w.writeheader()
print 'w: ', w
w.writerow(my_dict)
if __name__ == '__main__':
filename = '/home/rob/songs_detail.csv'
a = read_songs(filename)
b = store_data(a)
but I want to include headers so that it looks like
year values
1901 5
1909 5
1911 5
1913 4
1917 6
I tried to change my code but it doesn't work for me well. I tried something like below but unsuccessful. thanks for any tip or help.
def store_data(value):
file_exists = os.path.isfile('mycsvfile.csv')
my_dict = read_data(filename)
print 'my_dict: ', my_dict
with open('mycsvfile.csv', 'wb') as csvfile:
headers = ['year', 'values']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
if not file_exists:
writer.writeheader() # file doesn't exist yet, write a header
writer.writerow({'year': my_dict[0], 'values': my_dict[1]})
this give me the following error.
'ValueError: dict contains fields not in fieldnames: 'values', 'year'
'
I solve my question and the code is as follow, might be helpful for someone.
def store_data(value):
file_exists = os.path.isfile('mycsvfile.csv')
my_dict = read_data(filename)
print 'my_dict: ', my_dict
print 'type of: ', type(my_dict)
with open('mycsvfile.csv', 'wb') as csvfile:
headers = ['year', 'values']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
if not file_exists:
writer.writeheader()
for k, v in my_dict.items():
print(k, v)
writer.writerow({'year': k, 'values': v})
Actually I was missing to add loop in the proper way, so it results always in the csv file without headers. now it works perfectly fine according to my requirements.
Related
I am trying to make a function that takes a threshold and determines which names from a csv file of song names and their lyrics that contain human names and the function
should create a csv file named outputfile that contains the number of distinct names, the name of
the song and the artist.
import csv
def findName(thresh, outputFile):
dictNames={}
with open('allNames.csv') as csvfile:
reader = csv.DictReader(csvfile, delimiter="\t")
for row in reader:
if row["name"] in dictNames:
dictNames[row["name"]] +=1
else:
dictNames[row["name"]]=1
with open(outputFile, "w", newline='') as outfile:
headers= ["song", "artist", "year"]
writer=csv.DictWriter(outfile, fieldnames=headers)
writer.writeheader()
for key, val in dictNames.items():
if val>= thresh:
writer.writerow({key: val})
csvfile.close()
outfile.close()
What's the rationale for not using Pandas here?
Not sure I fully understand your question, but I'm thinking something like:
df = pd.read_csv('allNames.csv')
#partition df after threshold
df['index'] = df.index
def partition_return(threshold, df):
df = df.loc[df['index'] >= threshold].reset_index(drop=true)
df = df[['song', 'artist', 'year]]
df['count_names_dist'] = len(df['artist'].unique())
df.to_csv('outfile.csv', index=False)
I have a CSV file with headers on row 0. The headers are often unique but sometimes they are not, for "comments" in this example. For each of several comments, the header is "Comment".
The problem with my function that makes dicts from CSVs is that it only returns the last column of Comment.
def csv_to_list_with_dicts(csvfile):
with open(csvfile) as f:
list_of_issues = [{k: v for k, v in row.items()}
for row in csv.DictReader(f, skipinitialspace=True)]
return list_of_issues
My CSV file columns are like this:
User;ID;Comment;Comment;Comment
If one of the headers is repeating, I need to add an index to make it unique (like Comment1;Comment2 without changing the CSV) in the dict or all comments included under just Comment.
This did return just the way I wanted. Just tweaked yours a small bit Happy Ahmad! HUGE THANKS!!! <3
def csv_to_list_with_dicts(csvfile):
with open(csvfile, "r") as file:
keys = file.readline().split(",")
alteredKeys = []
for eachKey in keys:
counter = 0
while(eachKey in alteredKeys):
counter += 1
eachKey = eachKey[:len(eachKey)-(0 if counter == 1 else 1)] + str(counter)
alteredKeys.append(eachKey)
list_of_issues = []
reader = csv.reader(file, delimiter=',', skipinitialspace=True)
for eachLine in reader:
eachIssue = dict()
columnIndex = 0
for eachColumn in eachLine:
if columnIndex < len(alteredKeys):
eachIssue[alteredKeys[columnIndex]] = eachColumn
columnIndex += 1
list_of_issues.append(eachIssue)
return list_of_issues
In this solution, I use an alterKey list that changes any repeated key in the header by adding an index at its end. Then, I iterate on the other lines of the CSV file and make a dictionary from each one.
def csv_to_list_with_dicts(csvfile):
with open(csvfile, "r") as file:
keys = file.readline().split(";")
alteredKeys = []
for eachKey in keys:
counter = 0
while(eachKey in alteredKeys):
counter += 1
eachKey = eachKey[:len(eachKey)-(0 if counter == 1 else 1)] + str(counter)
alteredKeys.append(eachKey)
list_of_issues = []
for eachLine in file:
eachIssue = dict()
columnIndex = 0
for eachColumn in eachLine.split(";")
if columnIndex < len(alteredKeys):
eachIssue[alteredKeys[columnIndex]] = eachColumn
columnIndex += 1
list_of_issues.append(eachIssue)
return list_of_issues
It woujld be fairly easy to write code that will automatically generate unique keys for you by simply keeping track of those already seen and generating a unique name for any encountered that conflicted with one before it. Checking for that would be quick if those seen were kept in a set which features fast membership testing.
For example, assume this was in a CSV file named non-unique.csv:
User;ID;Comment;Comment;Comment
Jose;1138;something1;something2;something3
Gene;2907;abc;def;ghi
Guido;6450;jkl;mno;pqr
Code:
import csv
def csv_to_list_with_dicts(csv_filename):
# Read the first row of the csv file.
with open(csv_filename, encoding='utf-8', newline='') as csv_file:
reader = csv.reader(csv_file, delimiter=';', skipinitialspace=True)
names = next(reader) # Header row.
# Create list of unique fieldnames for the namee in the header row.
seen = set()
fieldnames = []
for i, name in enumerate(names):
if name in seen:
name = f'_{i}'
else:
seen.add(name)
fieldnames.append(name)
# Read entire file and make each row a dictionary with keys based on the fieldnames.
with open(csv_filename, encoding='utf-8', newline='') as csv_file:
reader = csv.DictReader(csv_file, fieldnames=fieldnames, delimiter=';',
skipinitialspace=True)
next(reader) # Ignore header row.
return list(reader)
results = csv_to_list_with_dicts('non-unique.csv')
from pprint import pprint
pprint(results, sort_dicts=False, width=120)
Results:
[{'User': 'Jose', 'ID': '1138', 'Comment': 'something1', '_3': 'something2', '_4': 'something3'},
{'User': 'Gene', 'ID': '2907', 'Comment': 'abc', '_3': 'def', '_4': 'ghi'},
{'User': 'Guido', 'ID': '6450', 'Comment': 'jkl', '_3': 'mno', '_4': 'pqr'}]
Consider the following CSV:
date,description,amount
14/02/2020,march contract,-99.00
15/02/2020,april contract,340.00
16/02/2020,march contract,150.00
17/02/2020,april contract,-100.00
What I'd like to do is:
Iterate through all of the rows
Total the amounts of lines which have the same description
Return the last line which has that newly-calculated amount
Applied to the above example, the CSV would look like this:
16/02/2020,march contract,51.00
17/02/2020,april contract,240.00
So far, I've tried nesting csv.reader()s inside of each other and I'm not getting the result I am wanting.
I'd like to achieve this without any libraries and/or modules.
Here is the code I have so far, where first_row is each row in the CSV and second_row is the iteration of looking for matching descriptions:
csv_reader = csv.reader(report_file)
for first_row in csv_reader:
description_index = 5
amount_index = 13
print(first_row)
for second_row in csv_reader:
if second_row is not first_row:
print(first_row[description_index] == second_row[description_index])
if first_row[description_index] == second_row[description_index]:
first_row[amount_index] = float(first_row[amount_index]) + float(second_row[amount_index])
This will work:
import csv
uniques = {} # dictionary to store key/value pairs
with open(report_file, newline='') as f:
reader = csv.reader(f, delimiter=',')
next(reader, None) # skip header row
for data in reader:
date = data[0]
description = data[1]
if description in uniques:
cumulative_total = uniques[description][0]
uniques[description] = [cumulative_total+float(data[2]), date]
else:
uniques[description] = [float(data[2]), date]
# print output
for desc, val in uniques.items():
print(f'{val[0]}, {desc}, {val[1]}')
I know that you've asked for a solution without pandas, but you'll save yourself a lot of time if you use it:
df = pd.read_csv(report_file)
totals = df.groupby(df['description']).sum()
print(totals)
I suggest you should use pandas, it'll be efficient.
or if you still want to go with your way then this will help.
import csv
with open('mycsv.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
value_dict = {}
line_no = 0
for row in csv_reader:
if line_no == 0:
line_no += 1
continue
cur_date = row[0]
cur_mon = row[1]
cur_val = float(row[2])
if row[1] not in value_dict.keys():
value_dict[cur_mon] = [cur_date, cur_val]
else:
old_date, old_val = value_dict[cur_mon]
value_dict[cur_mon] = [cur_date, (old_val + cur_val)]
line_no += 1
for key, val_list in value_dict.items():
print(f"{val_list[0]},{key},{val_list[1]}")
Output:
16/02/2020,march contract,51.0
17/02/2020,april contract,240.0
Mark this as answer if it helps you.
working with dictionary makes it easy to access values
import csv
from datetime import datetime
_dict = {}
with open("test.csv", "r") as f:
reader = csv.reader(f, delimiter=",")
for i, line in enumerate(reader):
if i==0:
headings = [line]
else:
if _dict.get(line[1],None) is None:
_dict[line[1]] = {
'date':line[0],
'amount':float(line[2])
}
else:
if datetime.strptime(_dict.get(line[1]).get('date'),'%d/%m/%Y') < datetime.strptime(line[0],'%d/%m/%Y'):
_dict[line[1]]['date'] = line[0]
_dict[line[1]]['amount'] = _dict[line[1]]['amount'] + float(line[2])
Here your _dict will contain unique description and values
>>> print(_dict)
{'march contract': {'date': '16/02/2020', 'amount': 51.0},
'april contract': {'date': '17/02/2020', 'amount': 240.0}}
convert to list and add headings
headings.extend([[value['date'],key,value['amount']] for key,value in _dict.items()])
>>>print(headings)
[['date', 'description', 'amount'],['16/02/2020', 'march contract', 51.0], ['17/02/2020', 'april contract', 240.0]]
save list to csv
with open("out.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(headings)
You can also use itertools.groupby and sum() for this if you don't mind outputting in sorted form.
from datetime import datetime
from itertools import groupby
import csv
with open(report_file, 'r') as f:
reader = csv.reader(f)
lst = list(reader)[1:]
sorted_input = sorted(lst, key=lambda x : (x[1], datetime.strptime(x[0],'%d/%m/%Y'))) #sort by description and date
groups = groupby(sorted_input, key=lambda x : x[1])
for k,g in groups:
rows = list(g)
total = sum(float(row[2]) for row in rows)
print(f'{rows[-1][0]},{k},{total}') #print last date, description, total
Output:
17/02/2020,april contract,240.0
16/02/2020,march contract,51.0
I want to write some data into a csv file. But nothing was written except the header, which is words and number. What's wrong with my code?
Here is my code.
import os
import csv
csvFile = open('Trail.csv','w+')
fieldname = ['words', 'number']
trialDict = {'apple':1, 'banana':4, 'cat':6}
writer = csv.DictWriter(csvFile, fieldnames=fieldname, extrasaction='ignore')
writer.writeheader()
writer.writerow(trialDict)
csvFile.close()
I get want you mean and I modify my code. But there is still a problem.
import os
import csv
csvFile = open('Trail.csv','w+')
fieldname = ['words', 'number']
trialDict = {'words':'apple', 'number':5, 'words':'banana','number':6, 'words':'cat', 'number':5}
writer = csv.DictWriter(csvFile, fieldnames=fieldname, extrasaction='ignore') writer.writeheader()
writer.writerow(trialDict)
csvFile.close()
The output becomes:
word numbers
cat 5
How can I write everything into the file? I tried the writerows function, but it returned the error:
str object has no attribute get()
The real issue here is that you are overwriting your keys with TrialDict and it is only taking the last two instances of words and number.
>>> trialDict = {'words':'apple', 'number':5, 'words':'banana','number':6, 'words':'cat', 'number':5}
>>> trialDict
{'words': 'cat', 'number': 5}
What you want to do is have a dictionary of count by word altogether and just write the key,value pairs :
fieldnames = ['words', 'number']
trialDict = {'apple': 5, 'banana': 6, 'cat': 5}
with open("Trial.csv", "w+") as f:
f.write(','.join(fieldnames) + '\n')
for word, number in trialDict.items():
f.write(word + ',' + str(number) + '\n')
Field names are headers, you should include keys in your dict corresponding to your headers, if this is your requirement, this code should work:
import os
import csv
csvFile = open('Trail.csv','w+')
fieldname = ['apple', 'banana', 'cat']
trialDict = {'apple':1, 'banana':4, 'cat':6}
writer = csv.DictWriter(csvFile, fieldnames=fieldname, extrasaction='ignore')
writer.writeheader()
writer.writerow(trialDict)
csvFile.close()
You don't need DictWriter for this. The standard writer works. Also note the newline='' parameter when opening the file as per csv documentation:
import csv
fieldname = ['words', 'number']
trialDict = {'apple':1, 'banana':4, 'cat':6}
with open('Trail.csv','w',newline='') as csvFile:
writer = csv.writer(csvFile)
writer.writerow(fieldname)
for key,value in trialDict.items():
writer.writerow([key,value])
Output:
words,number
apple,1
banana,4
cat,6
To use DictWriter you need a list of dictionaries that have keys that are the header values:
import csv
fieldname = ['words', 'number']
trialList = [{'words':'apple','number':1},
{'words':'banana','number':4},
{'words':'cat','number':6}]
with open('Trail.csv','w',newline='') as csvFile:
writer = csv.DictWriter(csvFile,fieldnames=fieldname)
writer.writeheader()
writer.writerows(trialList)
Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)