How to make this dictionary/key python code work - python

I am trying to make a function that takes a threshold and determines which names from a csv file of song names and their lyrics that contain human names and the function
should create a csv file named outputfile that contains the number of distinct names, the name of
the song and the artist.
import csv
def findName(thresh, outputFile):
dictNames={}
with open('allNames.csv') as csvfile:
reader = csv.DictReader(csvfile, delimiter="\t")
for row in reader:
if row["name"] in dictNames:
dictNames[row["name"]] +=1
else:
dictNames[row["name"]]=1
with open(outputFile, "w", newline='') as outfile:
headers= ["song", "artist", "year"]
writer=csv.DictWriter(outfile, fieldnames=headers)
writer.writeheader()
for key, val in dictNames.items():
if val>= thresh:
writer.writerow({key: val})
csvfile.close()
outfile.close()

What's the rationale for not using Pandas here?
Not sure I fully understand your question, but I'm thinking something like:
df = pd.read_csv('allNames.csv')
#partition df after threshold
df['index'] = df.index
def partition_return(threshold, df):
df = df.loc[df['index'] >= threshold].reset_index(drop=true)
df = df[['song', 'artist', 'year]]
df['count_names_dist'] = len(df['artist'].unique())
df.to_csv('outfile.csv', index=False)

Related

How to loop arrays into csv with Python?

I am trying to loop multiple arrays and write them into csv file with under right filednames.
Here is the working code but is is bad approach and not professional:
# zip arrays
rows = zip(en,pl,tr,de)
# Write to CSV
with open('translations.csv', mode='w', newline='', encoding="utf-8") as csv_file:
fieldnames = ['English', 'Polish', 'Turkish', 'German']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
info = inspect.getmembers(row)
english = row[0]
polish = row[1]
turkish = row[2]
german = row[3]
writer.writerow({'English': english, 'Polish':polish, 'Turkish':turkish, 'German':german})
I tried to change the code and it is really messed up:
# zip arrays
rows = zip(en,pl,tr,de)
# Write to CSV
with open('translations.csv', mode='w', newline='', encoding="utf-8") as csv_file:
fieldnames = ['English', 'Polish', 'Turkish', 'German']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
for i in range(len(fieldnames)):
writer.writerow({fieldnames[i]:row[i]})
So every translation should under right filedname. Here the result of this code:
English,Polish,Turkish,German
Test Name,,,
,Test Name,,
,,Test Name,
,,,Test Name
Nazwa testu,,,
,Nazwa testu,,
,,Nazwa testu,
,,,Nazwa testu
test Adı,,,
,test Adı,,
,,test Adı,
,,,test Adı
Testname,,,
,Testname,,
,,Testname,
,,,Testname
It is really messed up. Please help me to solve this problem. Thank you!
You probably need this. Instead of
for i in range(len(fieldnames)):
writer.writerow({fieldnames[i]:row[i]})
Use:
writer.writerow(dict(zip(fieldnames, row)))

Continues columns in a csv-file with python

I have a Problem with continues writing my datas in a csv-file. I want a program that detects, if there is a csv-file for my measurements-data. If not it would be generated. When the csv-file is new generated the datas are written in the csv-file on the column after the header with the variable cycle = 0.
If the csv-file exists, the datas should be written continuously after the last line of the csv. Also the variable cycle should continue.
I have written a program that can detect if there is a file or not but with the continuously lines I have problems.
I hope someone can help me.
# mes = Array with 20 spaces filled with the Numbers 0-19
date = time.strftime("%d/%m/%Y")
def write(cycle, mes):
if os.path.exists('/home/pi/Documents/Ventilatorprüfstand_Programm/out.csv') is True: #does the out.csv existate?
print("Do something")
out = open('out.csv', 'w')
data = [[cycle, mes[0],mes[1],mes[2],mes[3],mes[4],mes[5],mes[6],mes[7],mes[8],mes[9],mes[10],mes[11],mes[12],mes[13],mes[14],mes[15],mes[16],mes[17],mes[18],mes[19], date]]
line = cycle+1
for row in data:
for line in row:
out.write('%s;' % line)
out.write('\n')
out.close()
else:
print("Do another something")
header = lookuptable.names()
out = open('out.csv', 'w')
for row in header:
for column in row:
out.write('%s' % column)
out.write('\t')
out.write('\n')
data = [[cycle, mes[0],mes[1],mes[2],mes[3],mes[4],mes[5],mes[6],mes[7],mes[8],mes[9],mes[10],mes[11],mes[12],mes[13],mes[14],mes[15],mes[16],mes[17],mes[18],mes[19], date]]
for row in data:
for column in row:
out.write('%s;' % column)
out.write('\n')
out.close()`
When opening the file with open() there is the option 'a' to append the new lines to the end:
'a' open for writing, appending to the end of the file if it exists
Here is an example using the csv Python standard library:
import csv
import os
import random
headers = ['cycle', 'date', 'speed', 'temp', 'power']
new_data = [[random.randint(0, 100) for _ in range(3)] for _ in range(2)]
date = '00/01/02'
cycle = 1
# Copy the data and include the date and the cycle number:
full_rows = [ [cycle, date, *row] for row in new_data ]
filename = 'example.csv'
# Check if the file exist, if not create the file with header
if not os.path.exists(filename):
print('creating a new file')
with open(filename, 'w') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerow(headers) # add the header
# Append the data to the file
with open(filename, 'a', newline='') as csvfile: # note the 'a' option
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerows(full_rows)

storing data with header in csv in python

I am trying to store some data in csv file. I am able to store data but it looks like this
1901,1909,1911,1913,1917
5,5,5,4,6
the first row is year names and second row is values.
my code is as follow
import os
import csv
from collections import Counter
from csv import reader
def read_data(filename):
year = 3
with open(filename) as f:
next(f, None) # discard header
year2rel = Counter(int(line[YEAR]) for line in reader(f))
return year2rel
file_exists = os.path.isfile('mycsvfile.csv')
def store_data(value):
my_dict = read_data(filename)
print 'my_dict: ', my_dict
with open('mycsvfile.csv', 'wb') as f:
w = csv.DictWriter(f, my_dict.keys())
if not file_exists:
w.writeheader()
print 'w: ', w
w.writerow(my_dict)
if __name__ == '__main__':
filename = '/home/rob/songs_detail.csv'
a = read_songs(filename)
b = store_data(a)
but I want to include headers so that it looks like
year values
1901 5
1909 5
1911 5
1913 4
1917 6
I tried to change my code but it doesn't work for me well. I tried something like below but unsuccessful. thanks for any tip or help.
def store_data(value):
file_exists = os.path.isfile('mycsvfile.csv')
my_dict = read_data(filename)
print 'my_dict: ', my_dict
with open('mycsvfile.csv', 'wb') as csvfile:
headers = ['year', 'values']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
if not file_exists:
writer.writeheader() # file doesn't exist yet, write a header
writer.writerow({'year': my_dict[0], 'values': my_dict[1]})
this give me the following error.
'ValueError: dict contains fields not in fieldnames: 'values', 'year'
'
I solve my question and the code is as follow, might be helpful for someone.
def store_data(value):
file_exists = os.path.isfile('mycsvfile.csv')
my_dict = read_data(filename)
print 'my_dict: ', my_dict
print 'type of: ', type(my_dict)
with open('mycsvfile.csv', 'wb') as csvfile:
headers = ['year', 'values']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
if not file_exists:
writer.writeheader()
for k, v in my_dict.items():
print(k, v)
writer.writerow({'year': k, 'values': v})
Actually I was missing to add loop in the proper way, so it results always in the csv file without headers. now it works perfectly fine according to my requirements.

How to read multiple records from a CSV file?

I have a csv file, l__cyc.csv, that contains this:
trip_id, time, O_lat, O_lng, D_lat, D_lng
130041910101,1300,51.5841153671,0.134444590094,51.5718053872,0.134878021928
130041910102,1335,51.5718053872,0.134878021928,51.5786920389,0.180940040247
130041910103,1600,51.5786920389,0.180940040247,51.5841153671,0.134444590094
130043110201,1500,51.5712712038,0.138532882664,51.5334949484,0.130489470325
130043110202,1730,51.5334949484,0.130489470325,51.5712712038,0.138532882664
And I am trying to pull out separate values, using:
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
origincoords = ['{O_lat},{O_lng}'.format(**row) for row in reader]
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
trip_id = ['{trip_id}'.format(**row) for row in reader]
with open('./l__cyc.csv', 'rU') as csvfile:
reader = csv.DictReader(csvfile)
destinationcoords = ['{D_lat},{D_lng}'.format(**row) for row in reader]
Where origincoords should be 51.5841153671, 0.134444590094,
trip_id should be 130041910101, and destinationcoords should be
51.5718053872, 0.134878021928.
However, I get a KeyError:
KeyError: 'O_lat'
Is this something simple and there's something fundamental I'm misunderstanding?
You just avoid the space between headers
trip_id,time,O_lat,O_lng,D_lat,D_lng
OR
reader = csv.DictReader(csvfile, skipinitialspace=True)
First things first, you get the key error, because the key does not exist in your dictionary.
Next, I would advise against running through the file 3 times, when you can do it a single time!
For me it worked, when I added the fieldnames to the reader.
import csv
from cStringIO import StringIO
src = """trip_id, time, O_lat, O_lng, D_lat, D_lng
130041910101,1300,51.5841153671,0.134444590094,51.5718053872,0.134878021928
130041910102,1335,51.5718053872,0.134878021928,51.5786920389,0.180940040247
130041910103,1600,51.5786920389,0.180940040247,51.5841153671,0.134444590094
130043110201,1500,51.5712712038,0.138532882664,51.5334949484,0.130489470325
130043110202,1730,51.5334949484,0.130489470325,51.5712712038,0.138532882664
"""
f = StringIO(src)
# determine the fieldnames
fieldnames= "trip_id,time,O_lat,O_lng,D_lat,D_lng".split(",")
# read the file
reader = csv.DictReader(f, fieldnames=fieldnames)
# storage
origincoords = []
trip_id = []
destinationcoords = []
# iterate the rows
for row in reader:
origincoords.append('{O_lat},{O_lng}'.format(**row))
trip_id.append('{trip_id}'.format(**row))
destinationcoords.append('{D_lat},{D_lng}'.format(**row))
# pop the header off the list
origincoords.pop(0)
trip_id.pop(0)
destinationcoords.pop(0)
# show the result
print origincoords
print trip_id
print destinationcoords
I don't really know what you are trying to achieve there, but I'm sure there is a better way of doing it!

How to convert CSV file to multiline JSON?

Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)

Categories

Resources