I am transforming JSON like data to CSV and having a few issues.
The code is here:
import json
import csv
def parse_file(inputed_file):
with open(input_file, 'r') as inputed_file:
content = inputed_file.readlines()
split_file = open('test.csv', 'w')
for line in content:
lines = line.split('\t')
data = json.loads(lines[0])
writer = csv.DictWriter(split_file, fieldnames = ["title", "firstname"], delimiter = ',')
writer.writeheader()
The problem is this is adding a header on each row for the data, I want to only have the header displayed once. Then add this for the data to go below the headers:
writer.writerow(data)
I have looked at this and tried it but failed: How can I convert JSON to CSV?.
Create the DictWriter outside the loop, and just call writer.writeheader() there. Then call writer.writerow() inside the loop.
def parse_file(inputed_file):
with open(input_file, 'r') as inputed_file:
content = inputed_file.readlines()
split_file = open('test.csv', 'w')
writer = csv.DictWriter(split_file, fieldnames = ["title", "firstname"], delimiter = ',')
writer.writeheader()
for line in content:
lines = line.split('\t')
data = json.loads(lines[0])
writer.writerow(data)
Hi I'm trying to finish this small piece of code for modifying csv files, I've got this far with some help:
edit... some more info.
Basically what I’m looking to do is make some small changes to the csv file depending on the project and parent issue in JIRA. Python will then make the changes to the csv file before it is then read into JIRA - that’s the second part of the program I’ve not even really looked at yet.
I’m only looking to change the BOX-123 type cells and leave the blank ones blank.
But the idea of the program is that I can use it to make some small changes to a template which will then automatically create some issues in JIRA.
import os
import csv
project = 'Dudgeon'
parent = 'BOX-111'
rows = (1,1007)
current = os.getcwd()
filename = 'test.csv'
filepath = os.path.join(os.getcwd(), filename)
#print(current)
#print(filename)
print(filepath)
with open(filepath, 'r') as csvfile:
readCSV = csv.reader(csvfile)
next(readCSV, None)
for row in readCSV:
print(row[16])
row_count =sum(1 for row in readCSV)
print(row_count)
with open(filepath, 'r') as infile, open('out.csv', 'w') as outfile:
outfile.write(infile.readline()) # write out the 1st line
for line in infile:
cols = line.strip().split(',')
cols[16] = project
outfile.write(','.join(cols) + '\n')
with open('out.csv', 'r') as infile, open('out1.csv', 'w') as outfile:
for row in infile:
if row % 2 != 0:
cols [15] = parent
outfile.write()
Any help really appreciated.
You want to use the row's index when comparing to 0. Use enumerate():
with open('out.csv', 'r') as infile, open('out1.csv', 'w') as outfile:
for rowidx,row in enumerate(infile):
cols = row.strip().split(',')
if rowidx % 2 != 0:
cols[15] = parent
outfile.write(cols)
You really should be using the csv module here, though. Untested but should get you started.
with open('out.csv', 'r') as infile, open('out1.csv', 'w') as outfile:
reader = csv.reader(infile)
writer = csv.writer(outfile)
for rowidx,row in enumerate(reader):
if rowidx % 2 != 0:
row[15] = parent
writer.write_row(row)
A friend helped me last night and this is what they came up with:
with open(filepath, 'r') as infile, open('out.csv', 'w') as outfile:
outfile.write(infile.readline()) # write out the 1st line
for line in infile:
cols = line.strip().split(',')
cols[16] = project
outfile.write(','.join(cols) + '\n')
with open('out.csv', 'r') as infile, open('out1.csv', 'w') as outfile:
outfile.write(infile.readline()) # write out the 1st line
lineCounter = 0
for line in infile:
lineCounter += 1
cols = line.strip().split(',')
if lineCounter % 2 != 0:
cols[15] = parent
outfile.write(','.join(cols) + '\n')
I am working on a simple program to open a file and read certain rows and then print them in another new file but I want to cut them and remove them from the earlier csv. how do I do that?. This is what I have tried.
import csv
f = open('1.csv')
csv_f = csv.reader(f)
content_value = []
for row in csv_f:
if 'yepme' in row[2]:
content_value.append(row)
g = open('output.csv', 'wb')
wr = csv.writer(g, dialect='excel')
wr.writerows(content_value)
I am editing and found the answer:
import csv
f = open('1.csv')
csv_f = csv.reader(f)
content_value = []
old_value = []
for row in csv_f:
if 'yepme' in row[2]:
content_value.append(row)
else:
old_value.append(row)
g = open('output.csv', 'wb')
wr = csv.writer(g, dialect='excel')
wr.writerows(content_value)
h = open('2.csv','wb')
ws = csv.writer(h, dialect='excel')
ws.writerows(old_value)
A similar problem is mentioned in this question.
Short solution: Write two files: One with the extracted lines, one with the leftovers.
Coded solution:
import csv
with open('1.csv', 'r') as f:
csv_f = csv.reader(f)
new_content = []
old_content = []
for row in csv_f:
if 'yepme' in row[2]:
new_content.append(row)
else:
old_content.append(row)
with open('output.csv', 'wb') as f:
wr = csv.writer(f, dialect='excel')
wr.writerows(new_content)
with open('1.csv', 'wb') as f:
wr = csv.writer(f, dialect='excel')
f.writerows(old_content)
I never used csv, but you should get the idea. If your csv-file is very huge, you should probably read and write line-by-line to avoid memory issues.
Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)
I must be missing something, but I don't get it. I have a csv, it has 1200 fields. I'm only interested in 30. How do you get that to work? I can read/write the whole shebang, which is ok, but i'd really like to just write out the 30. I have a list of the fieldnames and I'm kinda hacking the header.
How would I translate below to use DictWriter/Reader?
for file in glob.glob( os.path.join(raw_path, 'P12*.csv') ):
fileReader = csv.reader(open(file, 'rb'))
fileLength = len(file)
fileGeom = file[fileLength-7:fileLength-4]
table = TableValues[fileGeom]
filename = file.split(os.sep)[-1]
with open(out_path + filename, "w") as fileout:
for line in fileReader:
writer = csv.writer(fileout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
if 'ID' in line:
outline = line.insert(0,"geometryTable")
else:
outline = line.insert(0,table) #"%s,%s\n" % (line, table)
writer.writerow(line)
Here's an example of using DictWriter to write out only fields you care about. I'll leave the porting work to you:
import csv
headers = ['a','b','d','g']
with open('in.csv','rb') as _in, open('out.csv','wb') as out:
reader = csv.DictReader(_in)
writer = csv.DictWriter(out,headers,extrasaction='ignore')
writer.writeheader()
for line in reader:
writer.writerow(line)
in.csv
a,b,c,d,e,f,g,h
1,2,3,4,5,6,7,8
2,3,4,5,6,7,8,9
Result (out.csv)
a,b,d,g
1,2,4,7
2,3,5,8