Related
I have a txt data. it looks as follows
time pos
0.02 1
0.1 2
...
and so on. so the each line is separated with a space. I need to convert it in to a CSV file. like
time,pos
0.02,1
0.1,2
0.15,3
How can I do it with python ? This is what I have tried
time = []
pos = []
def get_data(filename):
with open(filename, 'r') as csvfile:
csvFileReader = csv.reader(csvfile)
next(csvFileReader)
for row in csvFileReader:
time.append((row[0].split(' ')[0]))
pos.append((row[1]))
return
with open(filename) as infile, open('outfile.csv','w') as outfile:
for line in infile:
outfile.write(line.replace(' ',','))
From here:
import csv
with open(filename, newline='') as f:
reader = csv.reader(f, delimiter=' ')
for row in reader:
print(row)
For writing just use default options and it would save file with comma as a delimiter.
try:
import pandas as pd
with open(filename, 'r') as fo:
data = fo.readlines()
for d in range(len(data)):
if d==0:
column_headings = data[d].split()
data_to_insert = data[d].split()
pd.DataFrame(data_to_insert).to_excel('csv_file.csv', header=False, index=False, columns = column_headings))
You can use this:
import csv
time = []
pos = []
def get_data(filename):
with open(filename, 'r') as csvfile:
csvfile1 = csv.reader(csvfile, delimiter=' ')
with open(filename.replace('.txt','.csv'), 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
for row in csvfile1:
writer.writerow(row)
I have to cleanup some files before doing the import into the Django's DB. I have an error as per the title saying that the index is out of range. My files have many rows, some of them have 10.000 rws or others have more.
Traceback (most recent call last):
File "/Users/cohen/my-python-project/venv/ofac/ofac_project/ofac_sdn/import_save/cleanup_sdn.py", line 9, in <module>
replaced7 = row[7].replace('-0-', newinteger)
IndexError: list index out of range
How can i fix this error? Despite the error pops up, the first file is cleaned up, but the rest of them are not.
Thank you in advance!
Please find below my code:
import csv, os, sys
newstring = "null"
newinteger = str(0)
newstring1= "null"
newstring2= "null"
newstring3= "null"
with open('sdn.csv', 'r') as file1, open('new_sdn.csv', 'w', newline='') as file2:
reader = csv.reader(file1, delimiter=',')
writer = csv.writer(file2, delimiter=',')
for row in reader:
replaced7 = row[7].replace('-0-', newinteger)
row[7]=replaced7
replaced8 = row[8].replace('-0-', newinteger)
row[8]=replaced8
replaced2 = row[2].replace('-0-', newstring)
row[2] = replaced2
replaced4 = row[4].replace('-0-', newstring)
row[4] = replaced4
replaced5 = row[5].replace('-0-', newstring)
row[5] = replaced5
replaced6 = row[6].replace('-0-', newstring)
row[6] = replaced6
replaced9 = row[9].replace('-0-', newstring)
row[9] = replaced9
replaced10 = row[10].replace('-0-', newstring)
row[10] = replaced10
replaced11 = row[11].replace('-0-', newstring)
row[11] = replaced11
writer.writerow(row)
with open('add.csv', 'r') as file3, open('new_add.csv', 'w', newline='') as file4:
reader1 = csv.reader(file3, delimiter=',')
writer1 = csv.writer(file4, delimiter=',')
for line in reader1:
replaced12 = line[2].replace('-0-', newstring1)
line[2] = replaced12
replaced13 = line[3].replace('-0-', newstring1)
line[3] = replaced13
replaced14 = line[4].replace('-0-', newstring1)
line[4] = replaced14
replaced15 = line[5].replace('-0-', newstring1)
line[5] = replaced15
writer1.writerow(line)
with open('alt.csv', 'r') as file5, open('new_alt.csv', 'w', newline='') as file6:
reader2 = csv.reader(file5, delimiter=',')
writer2 = csv.writer(file6, delimiter=',')
for rand in reader2:
replaced16 = rand[2].replace('-0-', newstring2)
rand[2] = replaced16
replaced17 = rand[3].replace('-0-', newstring2)
rand[3] = replaced17
replaced18 = rand[4].replace('-0-', newstring2)
rand[4] = replaced18
writer2.writerow(rand)
with open('sdn_comments.csv', 'r') as file7, open('new_sdn_comments.csv', 'w', newline='') as file8:
reader3 = csv.reader(file7, delimiter=',')
writer3 = csv.writer(file8, delimiter=',')
for linie in reader3:
replaced19 = linie[1].replace('-0-', newstring3)
linie[1] = replaced19
writer3.writerow(linie)
I have split the cleanup codes in 4 and in all 4 i have this error on different column etc.
I found my issue. On my last row i have a code from the export of the database. That code looks like that: ['\x1a']therefore i wrote the code from below in order to delete the last row.
readFile1 = open("sdn.csv")
lines1 = readFile1.readlines()
readFile1.close()
w1 = open("sdn.csv", 'w')
w1.writelines([item for item in lines1[:-1]])
w1.close()
readFile2 = open("add.csv")
lines2 = readFile2.readlines()
readFile2.close()
w2 = open("add.csv",'w')
w2.writelines([item for item in lines2[:-1]])
w2.close()
readFile3 = open("alt.csv")
lines3 = readFile3.readlines()
readFile3.close()
w = open("alt.csv",'w')
w.writelines([item for item in lines3[:-1]])
w.close()
I have the following list of numbers: ['Number', 1,2,3,4]
If I have the following CSV file:
`Name`
`First`
`Second`
`Third`
`Fourth`
How do I add my list of numbers to it and make it look like this:
`Name Number`
`First 1`
`Second 2`
`Third 3`
`Fourth 4`
You can use fileinput.input with inplace=True to modify the original file:
import fileinput
import sys
l =['Number', 1,2,3,4]
for ind, line in enumerate(fileinput.input("in.csv",inplace=True)):
sys.stdout.write("{} {}\n".format(line.rstrip(), l[ind]))
Input:
Name
First
Second
Third
Fourth
Output:
Name Number
First 1
Second 2
Third 3
Fourth 4
Or write to a tempfile and move with shutil.move to replace the original file:
l =['Number', 1,2,3,4]
from shutil import move
from tempfile import NamedTemporaryFile
with open('in.csv') as csvfile, NamedTemporaryFile("w",dir=".", delete=False) as temp:
r = csv.reader(csvfile)
wr = csv.writer(temp,delimiter=" ")
for row,new in zip(r,l):
wr.writerow(row+[new])
move(temp.name,"in.csv")
Not an elegant way but It works:
#!/usr/bin/python
import csv
import sys
def csv_to_dict(csv_file_path):
csv_file = open(csv_file_path, 'rb')
csv_file.seek(0)
sniffdialect = csv.Sniffer().sniff(csv_file.read(10000), delimiters='\t,;')
csv_file.seek(0)
dict_reader = csv.DictReader(csv_file, dialect=sniffdialect)
csv_file.seek(0)
dict_data = []
for record in dict_reader:
dict_data.append(record)
csv_file.close()
return dict_data
def dict_to_csv(csv_file_path, dict_data):
csv_file = open(csv_file_path, 'wb')
writer = csv.writer(csv_file, dialect='excel')
headers = dict_data[0].keys()
writer.writerow(headers)
for dat in dict_data:
line = []
for field in headers:
line.append(dat[field])
writer.writerow(line)
csv_file.close()
if __name__ == '__main__':
org_path = sys.argv[1]
new_path = sys.argv[2]
your_array = ['Number', 1, 2, 3, 4]
org_csv = csv_to_dict(org_path)
new_data = []
for line in org_csv:
new_line = dict()
new_line['Name'] = line['Name']
new_line[your_array[0]] = your_array[org_csv.index(line)+1]
new_data.append(new_line)
if new_data:
dict_to_csv(new_path, new_data)
Hope that will help!
import csv
with open('existing_file.csv', 'rb') as infile:
reader = csv.reader(infile)
your_list = list(reader)
list2 = ['Number', 1,2,3,4]
zipped= zip(your_list, list2)
with open("test.csv", "wb") as outfile:
writer = csv.writer(outfile)
writer.writerows(zipped)
I am working on a simple program to open a file and read certain rows and then print them in another new file but I want to cut them and remove them from the earlier csv. how do I do that?. This is what I have tried.
import csv
f = open('1.csv')
csv_f = csv.reader(f)
content_value = []
for row in csv_f:
if 'yepme' in row[2]:
content_value.append(row)
g = open('output.csv', 'wb')
wr = csv.writer(g, dialect='excel')
wr.writerows(content_value)
I am editing and found the answer:
import csv
f = open('1.csv')
csv_f = csv.reader(f)
content_value = []
old_value = []
for row in csv_f:
if 'yepme' in row[2]:
content_value.append(row)
else:
old_value.append(row)
g = open('output.csv', 'wb')
wr = csv.writer(g, dialect='excel')
wr.writerows(content_value)
h = open('2.csv','wb')
ws = csv.writer(h, dialect='excel')
ws.writerows(old_value)
A similar problem is mentioned in this question.
Short solution: Write two files: One with the extracted lines, one with the leftovers.
Coded solution:
import csv
with open('1.csv', 'r') as f:
csv_f = csv.reader(f)
new_content = []
old_content = []
for row in csv_f:
if 'yepme' in row[2]:
new_content.append(row)
else:
old_content.append(row)
with open('output.csv', 'wb') as f:
wr = csv.writer(f, dialect='excel')
wr.writerows(new_content)
with open('1.csv', 'wb') as f:
wr = csv.writer(f, dialect='excel')
f.writerows(old_content)
I never used csv, but you should get the idea. If your csv-file is very huge, you should probably read and write line-by-line to avoid memory issues.
Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)