Edit CSV file in python which reads values from another json file in python - python

I wanted to edit a csv file which reads the value from one of my another json file in python 2.7
my csv is : a.csv
a,b,c,d
,10,12,14
,11,14,15
my json file is a.json
{"a":20}
i want my where the column 'a' will try to match in json file. if their is a match. it should copy that value from json and paste it to my csv file and the final output of my csv file should be looks like this.
a,b,c,d
20,10,12,14
20,11,14,15
Till now I what I have tried is
fileCSV = open('a.csv', 'a')
fileJSON = open('a.json', 'r')
jsonData = fileJSON.json()
for k in range(jsonData):
for i in csvRow:
for j in jsonData.keys():
if i == j:
if self.count == 0:
self.data = jsonData[j]
self.count = 1
else:
self.data = self.data + "," + jsonData[j]
self.count = 0
fileCSV.write(self.data)
fileCSV.write("\n")
k += 1
fileCSV.close()
print("File created successfully")
I will be really thankful if anyone can help me for this.
please ignore any syntactical and indentation error.
Thank You.

Some basic string parsing will get you here.. I wrote a script which works for the simple scenario which you refer to.
check if this solves your problem:
import json
from collections import OrderedDict
def list_to_csv(listdat):
csv = ""
for val in listdat:
csv = csv+","+str(val)
return csv[1:]
lines = []
csvfile = "csvfile.csv"
outcsvfile = "outcsvfile.csv"
jsonfile = "jsonfile.json"
with open(csvfile, encoding='UTF-8') as a_file:
for line in a_file:
lines.append(line.strip())
columns = lines[0].split(",")
data = lines[1:]
whole_data = []
for row in data:
fields = row.split(",")
i = 0
rowData = OrderedDict()
for column in columns:
rowData[columns[i]] = fields[i]
i += 1
whole_data.append(rowData)
with open(jsonfile) as json_file:
jsondata = json.load(json_file)
keys = list(jsondata.keys())
for key in keys:
value = jsondata[key]
for each_row in whole_data:
each_row[key] = value
with open(outcsvfile, mode='w', encoding='UTF-8') as b_file:
b_file.write(list_to_csv(columns)+'\n')
for row_data in whole_data:
row_list = []
for ecolumn in columns:
row_list.append(row_data.get(ecolumn))
b_file.write(list_to_csv(row_list)+'\n')
CSV output is not written to the source file but to a different file.
The output file is also always truncated and written, hence the 'w' mode.

I would recommend using csv.DictReader and csv.DictWriter classes which will read into and out of python dicts. This would make it easier to modify the dict values that you read in from the JSON file.

Related

Pick line in csv file to pull data

I have a code that is working for me in a Cinema 4d project. It is able to read 4 different data points and kick out outputs to the main project. Currently it is reading all of the lines of the csv file and I would like to pick one line and pull the data from that line only.
import c4d
import csv
def main():
path = Spreadsheet #Spreadsheet is an input filename path
with open(path, 'rb') as csv_file:
readed = csv.DictReader(csv_file,delimiter=',')
for i, row in enumerate(readed):
try:
Xcord = float(row["hc_x"])
Ycord = float(row["hc_y"])
Langle = float(row["launch_angle"])
Lspeed = float(row["launch_speed"])
except:
print "Error while reading - {}".format(row)
continue
global Output1
global Output2
global Output3
global Output4
Output1 = Xcord
Output2 = Ycord
Output3 = Langle
Output4 = Lspeed
This is about the first thing I have tried to code. So thanks.
csv.DictReader requires that you open the file with newline="" in order for it to parse the file correctly.
with open(path, 'rb', newline="") as csv_file:
readed = csv.DictReader(csv_file,delimiter=',')
You also don't have any condition to stop reading the file.
row_to_stop = 5
for i, row in enumerate(readed):
if i == row_to_stop:
Xcord = float(row["hc_x"])
Ycord = float(row["hc_y"])
Langle = float(row["launch_angle"])
Lspeed = float(row["launch_speed"])
break
If you only care about one line, don't look up and type cast values until you reach the line you care about.
I would like to pick one line and pull the data from that line only
The code below will return specific line (by index). You will have to split it and grab the data.
def get_interesting_line(file_name: str, idx: int):
cnt = 0
with open(file_name) as f:
while cnt != idx:
f.readline()
cnt += 1
return f.readline().strip()
# usage example below
print(get_interesting_line('data.txt',7))

Txt file to excel conversion in python

I'm trying to convert text file to excel sheet in python. The txt file contains data in the below specified formart
Column names: reg no, zip code, loc id, emp id, lastname, first name. Each record has one or more error numbers. Each record have their column names listed above the values. I would like to create an excel sheet containing reg no, firstname, lastname and errors listed in separate rows for each record.
How can I put the records in excel sheet ? Should I be using regular expressions ? And how can I insert error numbers in different rows for that corresponding record?
Expected output:
Here is the link to the input file:
https://github.com/trEaSRE124/Text_Excel_python/blob/master/new.txt
Any code snippets or suggestions are kindly appreciated.
Here is a draft code. Let me know if any changes needed:
# import pandas as pd
from collections import OrderedDict
from datetime import date
import csv
with open('in.txt') as f:
with open('out.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
#Remove inital clutter
while("INPUT DATA" not in f.readline()):
continue
header = ["REG NO", "ZIP CODE", "LOC ID", "EMP ID", "LASTNAME", "FIRSTNAME", "ERROR"]; data = list(); errors = list()
spamwriter.writerow(header)
print header
while(True):
line = f.readline()
errors = list()
if("END" in line):
exit()
try:
int(line.split()[0])
data = line.strip().split()
f.readline() # get rid of \n
line = f.readline()
while("ERROR" in line):
errors.append(line.strip())
line = f.readline()
spamwriter.writerow(data + errors)
spamwriter.flush()
except:
continue
# while(True):
# line = f.readline()
Use python-2 to run. The errors are appended as subsequent columns. It's slightly complicated the way you want it. I can fix it if still needed
Output looks like:
You can do this using the openpyxl library which is capable of depositing items directly into a spreadsheet. This code shows how to do that for your particular situation.
NEW_PERSON, ERROR_LINE = 1,2
def Line_items():
with open('katherine.txt') as katherine:
for line in katherine:
line = line.strip()
if not line:
continue
items = line.split()
if items[0].isnumeric():
yield NEW_PERSON, items
elif items[:2] == ['ERROR', 'NUM']:
yield ERROR_LINE, line
else:
continue
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws['A2'] = 'REG NO'
ws['B2'] = 'LASTNAME'
ws['C2'] = 'FIRSTNAME'
ws['D2'] = 'ERROR'
row = 2
for kind, data in Line_items():
if kind == NEW_PERSON:
row += 2
ws['A{:d}'.format(row)] = int(data[0])
ws['B{:d}'.format(row)] = data[-2]
ws['C{:d}'.format(row)] = data[-1]
first = True
else:
if first:
first = False
else:
row += 1
ws['D{:d}'.format(row)] = data
wb.save(filename='katherine.xlsx')
This is a screen snapshot of the result.

Read CSV file and filter results

Im writing a script where one of its functions is to read a CSV file that contain URLs on one of its rows. Unfortunately the system that create those CSVs doesn't put double-quotes on values inside the URL column so when the URL contain commas it breaks all my csv parsing.
This is the code I'm using:
with open(accesslog, 'r') as csvfile, open ('results.csv', 'w') as enhancedcsv:
reader = csv.DictReader(csvfile)
for row in reader:
self.uri = (row['URL'])
self.OriCat = (row['Category'])
self.query(self.uri)
print self.URL+","+self.ServerIP+","+self.OriCat+","+self.NewCat"
This is a sample URL that is breaking up the parsing - this URL comes on the row named "URL". (note the commas at the end)
ams1-ib.adnxs.com/ww=1238&wh=705&ft=2&sv=43&tv=view5-1&ua=chrome&pl=mac&x=1468251839064740641,439999,v,mac,webkit_chrome,view5-1,0,,2,
The following row after the URL always come with a numeric value between parenthesis. Ex: (9999) so this could be used to define when the URL with commas end.
How can i deal with a situation like this using the csv module?
You will have to do it a little more manually. Try this
def process(lines, delimiter=','):
header = None
url_index_from_start = None
url_index_from_end = None
for line in lines:
if not header:
header = [l.strip() for l in line.split(delimiter)]
url_index_from_start = header.index('URL')
url_index_from_end = len(header)-url_index_from_start
else:
data = [l.strip() for l in line.split(delimiter)]
url_from_start = url_index_from_start
url_from_end = len(data)-url_index_from_end
values = data[:url_from_start] + data[url_from_end+1:] + [delimiter.join(data[url_from_start:url_from_end+1])]
keys = header[:url_index_from_start] + header[url_index_from_end+1:] + [header[url_index_from_start]]
yield dict(zip(keys, values))
Usage:
lines = ['Header1, Header2, URL, Header3',
'Content1, "Content2", abc,abc,,abc, Content3']
result = list(process(lines))
assert result[0]['Header1'] == 'Content1'
assert result[0]['Header2'] == '"Content2"'
assert result[0]['Header3'] == 'Content3'
assert result[0]['URL'] == 'abc,abc,,abc'
print(result)
Result:
>>> [{'URL': 'abc,abc,,abc', 'Header2': '"Content2"', 'Header3': 'Content3', 'Header1': 'Content1'}]
Have you considered using Pandas to read your data in?
Another possible solution would be to use regular expressions to pre-process the data...
#make a list of everything you want to change
old = re.findall(regex, f.read())
#append quotes and create a new list
new = []
for url in old:
url2 = "\""+url+"\""
new.append(url2)
#combine the lists
old_new = list(zip(old,new))
#Then use the list to update the file:
f = open(filein,'r')
filedata = f.read()
f.close()
for old,new in old_new:
newdata = filedata.replace(old,new)
f = open(filein,'w')
f.write(newdata)
f.close()

extracting data from CSV file using a reference

I have a csv file with several hundred organism IDs and a second csv file with several thousand organism IDs and additional characteristics (taxonomic information, abundances per sample, etc)
I am trying to write a code that will extract the information from the larger csv using the smaller csv file as a reference. Meaning it will look at both smaller and larger files, and if the IDs are in both files, it will extract all the information form the larger file and write that in a new file (basically write the entire row for that ID).
so far I have written the following, and while the code does not error out on me, I get a blank file in the end and I don't exactly know why. I am a graduate student that knows some simple coding but I'm still very much a novice,
thank you
import sys
import csv
import os.path
SparCCnames=open(sys.argv[1],"rU")
OTU_table=open(sys.argv[2],"rU")
new_file=open(sys.argv[3],"w")
Sparcc_OTUs=csv.writer(new_file)
d=csv.DictReader(SparCCnames)
ids=csv.DictReader(OTU_table)
for record in ids:
idstopull=record["OTUid"]
if idstopull[0]=="OTUid":
continue
if idstopull[0] in d:
new_id.writerow[idstopull[0]]
SparCCnames.close()
OTU_table.close()
new_file.close()
I'm not sure what you're trying to do in your code but you can try this:
def csv_to_dict(csv_file_path):
csv_file = open(csv_file_path, 'rb')
csv_file.seek(0)
sniffdialect = csv.Sniffer().sniff(csv_file.read(10000), delimiters='\t,;')
csv_file.seek(0)
dict_reader = csv.DictReader(csv_file, dialect=sniffdialect)
csv_file.seek(0)
dict_data = []
for record in dict_reader:
dict_data.append(record)
csv_file.close()
return dict_data
def dict_to_csv(csv_file_path, dict_data):
csv_file = open(csv_file_path, 'wb')
writer = csv.writer(csv_file, dialect='excel')
headers = dict_data[0].keys()
writer.writerow(headers)
# headers must be the same with dat.keys()
for dat in dict_data:
line = []
for field in headers:
line.append(dat[field])
writer.writerow(line)
csv_file.close()
if __name__ == "__main__":
big_csv = csv_to_dict('/path/to/big_csv_file.csv')
small_csv = csv_to_dict('/path/to/small_csv_file.csv')
output = []
for s in small_csv:
for b in big_csv:
if s['id'] == b['id']:
output.append(b)
if output:
dict_to_csv('/path/to/output.csv', output)
else:
print "Nothing."
Hope that will help.
You need to read the data into a data structure, assuming OTUid is unique you can store this into a dictionary for fast lookup:
with open(sys.argv[1],"rU") as SparCCnames:
d = csv.DictReader(SparCCnames)
fieldnames = d.fieldnames
data = {i['OTUid']: i for i in d}
with open(sys.argv[2],"rU") as OTU_table, open(sys.argv[3],"w") as new_file:
Sparcc_OTUs = csv.DictWriter(new_file, fieldnames)
ids = csv.DictReader(OTU_table)
for record in ids:
if record['OTUid'] in data:
Sparcc_OTUs.writerow(data[record['OTUid']])
Thank you everyone for your help. I played with things and consulted with an advisor, and finally got a working script. I am posting it in case it helps someone else in the future.
Thanks!
import sys
import csv
input_file = csv.DictReader(open(sys.argv[1], "rU")) #has all info
ref_list = csv.DictReader(open(sys.argv[2], "rU")) #reference list
output_file = csv.DictWriter(
open(sys.argv[3], "w"), input_file.fieldnames) #to write output file with headers
output_file.writeheader() #write headers in output file
white_list={} #create empty dictionary
for record in ref_list: #for every line in my reference list
white_list[record["Sample_ID"]] = None #store into the dictionary the ID's as keys
for record in input_file: #for every line in my input file
record_id = record["Sample_ID"] #store ID's into variable record_id
if (record_id in white_list): #if the ID is in the reference list
output_file.writerow(record) #write the entire row into a new file
else: #if it is not in my reference list
continue #ignore it and continue iterating through the file

Want to read multiple csv file one by one and filepaths are stored in a text file using python

here is my code for readinng individual cell of one csv file. but want to read multiple csv file one by one from .txt file where csv file paths are located.
import csv
ifile = open ("C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv", "rb")
data = list(csv.reader(ifile, delimiter = ';'))
REQ = []
RES = []
n = len(data)
for i in range(n):
x = data[i][1]
y = data[i][2]
REQ.append (x)
RES.append (y)
i += 1
for j in range(2,n):
try:
if REQ[j] != '' and RES[j]!= '': # ignore blank cell
print REQ[j], ' ', RES[j]
except:
pass
j += 1
And csv file paths are stored in a .txt file like
C:\Desktop\Test_Specification\RDBI.csv
C:\Desktop\Test_Specification\ECUreset.csv
C:\Desktop\Test_Specification\RDTC.csv
and so on..
You can read stuff stored in files into variables. And you can use variables with strings in them anywhere you can use a literal string. So...
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip() # or was it trim()? I keep mixing them up
ifile = open(file_name, 'rb')
# ... the rest of your code goes here
Maybe we can fix this up a little...
import csv
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip()
csv_file = csv.reader(open(file_name, 'rb', delimiter=';'))
for record in csv_file[1:]: # skip header row
req = record[1]
res = record[2]
if len(req + res):
print req, ' ', res
you just need to add a while which will read your file containing your list of files & paths upon your first open statement, for example
from __future__ import with_statement
with open("myfile_which_contains_file_path.txt") as f:
for line in f:
ifile = open(line, 'rb')
# here the rest of your code
You need to use a raw string string your path contains \
import csv
file_list = r"C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv"
with open(file_list) as f:
for line in f:
with open(line.strip(), 'rb') as the_file:
reader = csv.reader(the_file, delimiter=';')
for row in reader:
req,res = row[1:3]
if req and res:
print('{0} {1}'.format(req, res))

Categories

Resources