How to update value and combine columns in csv file using Python - python

I have a csv file which contains columns like below. I want to change the date format to 2014-1-10 and combine it with time in a new column. I would like to do this without pandas ...
Date |Time
1/10/2014|0:09:31
1/10/2014|0:10:29
The result should look like this:
Date |Time |DateTime
1/10/2014|0:09:31|2014-1-10 0:09:31
1/10/2014|0:10:29|2014-1-10 0:10:29
I tried replace, matrix [][], etc. but somehow nothing works well so far. Will appreciate your help!!

Easiest way is to use PETL:
import petl as etl
import datetime
t = etl.fromcsv('my.csv')
t = etl.addfield(t, 'DateTime',
lambda row: datetime.combine(row[0], row[1]))
etl.tocsv(t, 'mynew.csv')

Using only Python built-int modules:
import csv
import os
import datetime
inFilePath = "C:\\Temp\\SO\\test.csv"
outFilePath = "C:\\Temp\\SO\\temp.csv"
inFile = open(inFilePath, "r")
outFile = open(outFilePath, "wb")
reader = csv.reader(inFile, delimiter='|')
writer = csv.writer(outFile, delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
if "Date " in row:
writer.writerow(row)
continue
newDate = datetime.datetime.strptime(row[0], '%d/%m/%Y').strftime('%Y-%m-%d')
newCell = newDate + " " + row[1]
row.append(newCell)
writer.writerow(row)
inFile.close()
outFile.close()
os.remove(inFilePath)
os.rename(outFilePath, inFilePath)

Related

How to make this dictionary/key python code work

I am trying to make a function that takes a threshold and determines which names from a csv file of song names and their lyrics that contain human names and the function
should create a csv file named outputfile that contains the number of distinct names, the name of
the song and the artist.
import csv
def findName(thresh, outputFile):
dictNames={}
with open('allNames.csv') as csvfile:
reader = csv.DictReader(csvfile, delimiter="\t")
for row in reader:
if row["name"] in dictNames:
dictNames[row["name"]] +=1
else:
dictNames[row["name"]]=1
with open(outputFile, "w", newline='') as outfile:
headers= ["song", "artist", "year"]
writer=csv.DictWriter(outfile, fieldnames=headers)
writer.writeheader()
for key, val in dictNames.items():
if val>= thresh:
writer.writerow({key: val})
csvfile.close()
outfile.close()
What's the rationale for not using Pandas here?
Not sure I fully understand your question, but I'm thinking something like:
df = pd.read_csv('allNames.csv')
#partition df after threshold
df['index'] = df.index
def partition_return(threshold, df):
df = df.loc[df['index'] >= threshold].reset_index(drop=true)
df = df[['song', 'artist', 'year]]
df['count_names_dist'] = len(df['artist'].unique())
df.to_csv('outfile.csv', index=False)

Python CSV get particular row data

This is csv file
name,country,code
Georgina,Saint Helena,ET
Brooks,Austria,LR
Rosaline,Peru,DZ
How to get a particular row data without looping the whole csv file?
Looking for following syntax:
If searchName exist in csv, extract the data
searchName = 'Brooks'
with open('name.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
if (row['name']) == searchName :
print(row['name'] + ' >> ' + row['country'])
Thanks
Update panda solution for those who interested
import pandas as pd
df = pd.read_csv('a.csv')
select_row = df.loc[df['name'] == 'Brooks']
if select_row.empty:
print('No records')
else:
print('Print Record')
print(select_row.country)
Get first instance
search_name = 'Brooks'
with open('name.csv', 'r') as file:
output = re.search(f'{search_name}.*', file.read())
row = output.group().split(',')
print(row[0], '>>' ,row[1])
Get all instances
search_name = 'Brooks'
with open('name.csv', 'r') as file:
output = re.findall(f'{search_name}.*', file.read())
for row in output:
items = row.split(',')
print(items[0], '>>' ,items[1])
Using DataFrames
import pandas as pd
search_name = 'Brooks'
df = pd.read_csv('name.csv')
output = df[df.name == search_name].iloc[0]
print(output['name'], '>>', output['country'])
You could try using pandas and make your life easier, try something like this :
import pandas as pd
df = pd.read_csv('name.csv')
if df.iloc[5, 6]:
# execute condition
else
# execute another condition
I have given you an outline,you can try to use this and come up with a solution for your issue.
Although dataframe seems to be the best option, if you treat the csv as a simple text file, This should help you:
searchName = 'Brooks'
with open('name.csv', 'r') as f:
foo = f.read()
items=re.findall(f"{searchName}.*$",foo,re.MULTILINE)
print(items)
Output:
['Brooks,Austria,LR']

Find duplicates in a column, then add values in adjacent column

I have a csv file that has a one word title and a description that is always a number.
My current code extracts just the title an description to another csv file and then converts the csv into an excel file.
import csv
import output
f = open("Johnny_Test-punch_list.csv")
csv_f = csv.reader(f)
m = open('data.csv', "w")
for row in csv_f:
m.write(row[1])
m.write(",")
m.write(row[3])
m.write("\n")
m.close()
output.toxlsx()
How can I look for matching Titles and then add the descriptions of the titles?
import csv
import output
f = open("Johnny_Test-punch_list.csv")
csv_f = csv.reader(f)
m = open('data.csv', "w")
dict_out = {}
for row in csv_f:
if row[1] in dict_out:
dict_out[row[1]] += row[3]
else:
dict_out[row[1]] = row[3]
for title, value in dict_out.iteritems():
m.write('{},{}\n'.format(title, value))
If I understood you correctly, you need to write in a single line as a string.
can you try with below code:
for row in csv_f:
m.write(row[1] + "," + str(row[3]) + "\n")

format the different types of date in CSV using python 2.6

I have formatted my csv file and now it looks like this:
100|1000|newyork|2015/10/04|2015/10/04 16:23:37.040000|
101|1001|london|2015/10/04|2015/10/04 16:23:37.040000|
102|1002|california|2015/10/04|2015/10/04 16:23:37.041000|
103|1003|Delhi|2015/10/04|2015/10/04 16:23:37.041000|
104|1004|Mumbai|2015/10/04|2015/10/04 16:23:37.041000|
105|1005|Islamabad|2015/10/04|2015/10/04 16:23:37.041000|
106|1006|karachi|2015/10/04|2015/10/04 16:23:37.041000|
Now I have two different format of dates which I want to convert it into 'YYmmdd' format.
Can any one suggest best way to achieve this.
Note: The file name should not get change and for your reference this is how I am achieveing the formatted file which is given here:
inputfile = 'c:\Working\HK.txt'
outputfile = inputfile + '.tmp'
with contextlib.nested(open(inputfile, 'rb'), open(outputfile, 'wb')) as (inf,outf):
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter='|')
for row in reader:
writer.writerow([col.replace('|', ' ') for col in row])
writer.writerow([])
os.remove(inputfile)
os.rename(outputfile,inputfile)
I think this should work. You can tweak the date format anyway you like by changing the strftime.
#!/usr/bin/python
from dateutil.parser import parse
lines = ['100|1000|newyork|2015/10/04|2015/10/04 16:23:37.040000|',
'101|1001|london|2015/10/04|2015/10/04 16:23:37.040000|',
'102|1002|california|2015/10/04|2015/10/04 16:23:37.041000|',
'103|1003|Delhi|2015/10/04|2015/10/04 16:23:37.041000|',
'104|1004|Mumbai|2015/10/04|2015/10/04 16:23:37.041000|',
'105|1005|Islamabad|2015/10/04|2015/10/04 16:23:37.041000|',
'106|1006|karachi|2015/10/04|2015/10/04 16:23:37.041000|']
for line in lines:
parts = line.split("|");
tmp_date = parse(parts[3])
parts[3] = tmp_date.strftime('%Y%m%d')
tmp_date = parse(parts[4])
parts[4] = tmp_date.strftime('%Y%m%d')
new_line = "|".join(parts)
print new_line
if you have Python 2.6+ you could do it just in python
from __future__ import print_function
import re
with open('data','r') as f, open('data_out', 'w') as f_out:
for line in f:
line = re.sub('(|\d{4})/(\d{2})/(\d{2})',r'\1\3\2', line)
line = re.sub('\s+\d{2}:\d{2}:\d{2}.\d+(|)',r'\1', line)
print(line, file=f_out)
this is what i got in my data_out
100|1000|newyork|20151004|20151004|
101|1001|london|20151004|20151004|
102|1002|california|20151004|20151004|
103|1003|Delhi|20151004|20151004|
104|1004|Mumbai|20151004|20151004|
105|1005|Islamabad|20151004|20151004|
106|1006|karachi|20151004|20151004|

How to convert CSV file to multiline JSON?

Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)

Categories

Resources