trying to read xlrd, extract data, and write csv

trying to read xlrd, extract data, and write csv - python

I am trying to read an excel file, extract some data, and write it out as a csv. This is pretty new to me and I'm messing up somewhere: I keep getting an empty csv. I'm sure I'm missing something very basic, but darned if I can see it. Here is the code:
```
import xlrd
import os
import csv
from zipfile import ZipFile
import datetime
datafile = "./2013_ERCOT_Hourly_Load_Data.xls"
outfile = "./2013_Max_Loads.csv"
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
data = None
outputlist = []
for col in range(1, sheet.ncols):
cv = sheet.col_values(col, start_rowx=1, end_rowx=None)
header = sheet.cell_value(0,col)
maxval = max(cv)
maxpos = cv.index(maxval) + 1
maxtime = sheet.cell_value(maxpos, 0)
realtime = xlrd.xldate_as_tuple(maxtime, 0)
year = realtime[0]
month = realtime[1]
day = realtime[2]
hour = realtime[3]
data = [
'Region:', header,
'Year:', year,
'Month:', month,
'Day:', day,
'Hour:', hour,
maxpos,
maxtime,
realtime,
maxval,
]
path = "./2013_Max_Loads.csv"
return outputlist
def save_file(data, filename):
with open(filename, "wb") as f:
writer = csv.writer(f, delimiter='|')
for line in data:
writer.writerow(line)
parse_file(datafile)
save_file(parse_file(datafile),"2013_Max_Loads.csv")

You declare outfile but you don't use it
You aren't passing a directory (path) for the file to be saved in.
I also think that calling parse_file twice might be messing you up. Just pass the filename and call it from within the save_file function.
I also found that you were returning output list as a blank list.
So here, try this. I will assume your xlrd commands are correct, because I have not personally used the module.
import csv
import xlrd
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
outputlist = []
outputlist_append = outputlist.append
for col in range(1, sheet.ncols):
cv = sheet.col_values(col, start_rowx=1, end_rowx=None)
header = sheet.cell_value(0,col)
maxval = max(cv)
maxpos = cv.index(maxval) + 1
maxtime = sheet.cell_value(maxpos, 0)
realtime = xlrd.xldate_as_tuple(maxtime, 0)
year = realtime[0]
month = realtime[1]
day = realtime[2]
hour = realtime[3]
data = [
'Region:', header,
'Year:', year,
'Month:', month,
'Day:', day,
'Hour:', hour,
maxpos,
maxtime,
realtime,
maxval,
]
outputlist_append(data)
return outputlist
def save_file(data, filename):
parse_file(data)
with open(filename, 'wb') as f:
writer = csv.writer(f, delimiter='|')
for line in data:
writer.writerow(line)
return
datafile = "./2013_ERCOT_Hourly_Load_Data.xls"
outfile = "./2013_Max_Loads.csv"
save_file(datafile, outfile)
UPDATE: Edit in code in function save_file() to implement #wwii's suggestion.
Try substituting the new save_file() below:
def save_file(data, filename):
parse_file(data)
with open(filename, 'wb') as f:
wr = csv.writer(f, delimiter='|')
wr.writerows(data)
return
Also, change the variable (you used writer) to something like wr. You really want to avoid any possible conflicts with having a variable with the same name as a method, a function, or class you are calling.

Related

Saving Multiple files in Python

I am trying to create a new file each time the following runs. At the moment it creates 1 file and just overwrites it. Is there a to make it not overwrite and create a new file for each loop?
import xml.etree.ElementTree as ET
import time
import csv
with open('OrderCSV.csv', newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
orders_data = ET.Element('orders_data')
orders = ET.SubElement(orders_data, 'orders')
##Order Details
order_reference = ET.SubElement(orders, 'order reference')
order_reference.set('',"12345")
order_date = ET.SubElement(order_reference, 'order_date')
order_priority = ET.SubElement(order_reference, 'order_priority')
order_category = ET.SubElement(order_reference, 'order_category')
delivery_service = ET.SubElement(order_reference, 'delivery_service')
delivery_service.text = row['delivery_service']
timestr = time.strftime("%Y%m%d%H%M%S")
mydata = ET.tostring(orders_data)
myfile = open(timestr, "wb")
myfile.write(mydata)

You could see if the file already exists and wait a bit
while True:
timestr = time.strftime("%Y%m%d%H%M%S")
if not os.path.exists(timestr):
break
time.sleep(.1)
with open(timestr, "wb") as myfile:
mydata = ET.tostring(orders_data)
myfile.write(mydata)
Instead of waiting you could just add seconds. This will cause the file names to drift forward in time if you process a lot of them per second.
mytime = time.time()
while True:
timestr = time.strftime("%Y%m%d%H%M%S", time.localtime(mytime))
if not os.path.exists(timestr):
break
time.sleep(.1)
with open(timestr, "wb") as myfile:
mydata = ET.tostring(orders_data)
myfile.write(mydata)
Another option is to get a single timestamp before the loop and update it as you go.
mytime = time.strftime("%Y%m%d%H%M%S")
for index, row in enumerate(reader):
....
mytime = f"mytime-{index}"
....

change the variable name each time you run the loop and I would suggest using with statement for opening file as you also have to close it after you open it
with open(timestr, 'wb') as myfile:
myfile.write(mydata)
edit: only flaw I can imagine in your code is not closing the file after opening it

Continues columns in a csv-file with python

I have a Problem with continues writing my datas in a csv-file. I want a program that detects, if there is a csv-file for my measurements-data. If not it would be generated. When the csv-file is new generated the datas are written in the csv-file on the column after the header with the variable cycle = 0.
If the csv-file exists, the datas should be written continuously after the last line of the csv. Also the variable cycle should continue.
I have written a program that can detect if there is a file or not but with the continuously lines I have problems.
I hope someone can help me.
# mes = Array with 20 spaces filled with the Numbers 0-19
date = time.strftime("%d/%m/%Y")
def write(cycle, mes):
if os.path.exists('/home/pi/Documents/Ventilatorprüfstand_Programm/out.csv') is True: #does the out.csv existate?
print("Do something")
out = open('out.csv', 'w')
data = [[cycle, mes[0],mes[1],mes[2],mes[3],mes[4],mes[5],mes[6],mes[7],mes[8],mes[9],mes[10],mes[11],mes[12],mes[13],mes[14],mes[15],mes[16],mes[17],mes[18],mes[19], date]]
line = cycle+1
for row in data:
for line in row:
out.write('%s;' % line)
out.write('\n')
out.close()
else:
print("Do another something")
header = lookuptable.names()
out = open('out.csv', 'w')
for row in header:
for column in row:
out.write('%s' % column)
out.write('\t')
out.write('\n')
data = [[cycle, mes[0],mes[1],mes[2],mes[3],mes[4],mes[5],mes[6],mes[7],mes[8],mes[9],mes[10],mes[11],mes[12],mes[13],mes[14],mes[15],mes[16],mes[17],mes[18],mes[19], date]]
for row in data:
for column in row:
out.write('%s;' % column)
out.write('\n')
out.close()`

When opening the file with open() there is the option 'a' to append the new lines to the end:
'a' open for writing, appending to the end of the file if it exists
Here is an example using the csv Python standard library:
import csv
import os
import random
headers = ['cycle', 'date', 'speed', 'temp', 'power']
new_data = [[random.randint(0, 100) for _ in range(3)] for _ in range(2)]
date = '00/01/02'
cycle = 1
# Copy the data and include the date and the cycle number:
full_rows = [ [cycle, date, *row] for row in new_data ]
filename = 'example.csv'
# Check if the file exist, if not create the file with header
if not os.path.exists(filename):
print('creating a new file')
with open(filename, 'w') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerow(headers) # add the header
# Append the data to the file
with open(filename, 'a', newline='') as csvfile: # note the 'a' option
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerows(full_rows)

CSV writing cannot be done

CSV writing cannot be done.The list of "li_result" has data result and I want to write this data in csv file.
This is the code
fp = open('dataResult.csv', 'w')
w = csv.writer(fp, delimiter=',')
csvwrite = unicode(li_result)
csvwrite_result = csvwrite.encode('sjis')
w.writerow(csvwrite_result)
But dataResult.csv is empty. Nothing error happen so I do not know what is wrong.
And I want to write the data in sjis code in csv file.(Now I use python2.7 so unicode is used to write letters,right?) I deleted these codes
csvwrite = unicode(li_result)
csvwrite_result = csvwrite.encode('sjis')
Still nothing is written.
What should I do to fix this?
Sample codes
fp = open(CSV_FILE_NAME_ACCOUNT, 'aw')
w = csv.writer(fp, delimiter=',')
title = 'abc'
name = 'hoge'
time = '2010-04-20 0:0:0'
u_title = unicode(title)
u_name = unicode(name)
u_time = unicode(time)
s_title = u_title.encode('sjis')
s_name = u_name.encode('sjis')
s_time = u_time.encode('sjis')
list = [s_title, s_name, s_time]
w.writerow(list)

import csv
fp = open('system path to your file on which data to read', 'w')
w = csv.writer(fp, delimiter=',')
title = 'abc'
name = 'hoge'
time = '2010-04-20 0:0:0'
list = [title, name, time]
w.writerow(list)

How to split code into smaller functions

I have an application that works. But in the interest of attempting to understand functions and python better. I am trying to split it out into various functions.
I"m stuck on the file_IO function. I'm sure the reason it does not work is because the main part of the application does not understand reader or writer. To better explain. Here is a full copy of the application.
Also I'm curious about using csv.DictReader and csv.DictWriter. Do either provide any advantages/disadvantages to the current code?
I suppose another way of doing this is via classes which honestly I would like to know how to do it that way as well.
#!/usr/bin/python
""" Description This script will take a csv file and parse it looking for specific criteria.
A new file is then created based of the original file name containing only the desired parsed criteria.
"""
import csv
import re
import sys
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
#Prompt for File Name
def file_IO():
print "Please Enter a File Name, (Without .csv extension): ",
base_Name = raw_input()
print "You entered: ",base_Name
in_Name = base_Name + ".csv"
out_Name = base_Name + ".parsed.csv"
print "Input File: ", in_Name
print "OutPut Files: ", out_Name
#Opens Input file for read and output file to write.
in_File = open(in_Name, "rU")
reader = csv.reader(in_File)
out_File = open(out_Name, "wb")
writer = csv.writer(out_File, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
return (reader, writer)
file_IO()
# Read header
header = reader.next()
stored = []
writer.writerow([header[0], header[3]])
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow([row[0], row[3]])
# Closing Input and Output files.
in_File.close()
out_File.close()

If I were you, I'd only separate find_group.
import csv
def find_group(row):
GROUPS = ['aircheck', 'linkrunner at', 'onetouch at']
for idx, group in enumerate(GROUPS):
if group in map(str.lower, row):
return idx
return -1
def get_filenames():
# this might be the only other thing you'd want to factor
# into a function, and frankly I don't really like getting
# user input this way anyway....
basename = raw_input("Enter a base filename (no extension): ")
infilename = basename + ".csv"
outfilename = basename + ".parsed.csv"
return infilename, outfilename
# notice that I don't open the files yet -- let main handle that
infilename, outfilename = get_filenames()
with open(infilename, 'rU') as inf, open(outfilename, 'wb') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter=',',
quotechar='"', quoting=csv.QUOTE_ALL)
header = next(reader)
writer.writerow([[header[0], header[3]])
stored = sorted([(find_group(row),idx,row) for idx,row in
enumerate(reader)) if find_group(row) >= 0])
for _, _, row in stored:
writer.writerow([row[0], row[3]])

How to convert CSV file to multiline JSON?

Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"

The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')

You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)

import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()

I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')

You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)

I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise

Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)

Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)

How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.

As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)

def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.

import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

trying to read xlrd, extract data, and write csv - python

Related

Saving Multiple files in Python

Continues columns in a csv-file with python

CSV writing cannot be done

How to split code into smaller functions

How to convert CSV file to multiline JSON?

Categories

Resources