I am using a DictWriter to write to a csv file to a single row, however; only a two columns are being populated. Here are some code snippets of what I am working on with the focus being on the function Stat2():
def scheduledPerformanceAvailability(FILENAME, CATEGORY):
# get category column for current entry
entry = retrieveEntries(FILENAME, CATEGORY)
# grab the most frequent entry from the collections.counter
mostFrequent = entry.most_common(1)[0][1]
print "\n", mostFrequent;
# calculate the total number of values in the file
totalNumber = sum(entry.values())
print "\n", totalNumber
# caculate the percentage
percentage = float( mostFrequent / totalNumber ) * 100
print "\n", percentage, "%\n";
return percentage
def Stat2(FILENAME2, itemValue, percentage):
# store the values into a list
entry = []
percent = str(percentage)
displayPercentage = percent + ' %'
entry.append({'DEPARTURES_SCHEDULED': displayPercentage, 'UNIQUE_CARRIER_NAME':itemValue})
fieldnames = ['DEPARTURES_SCHEDULED','DEPARTURES_PERFORMED','SEATS', 'UNIQUE_CARRIER_NAME']
# open a file for writing
outfile = open(FILENAME2, 'a')
# create the csv writer object
csvwriter = csv.DictWriter(outfile, delimiter=',', fieldnames=fieldnames)
# check the file before appending any unecessary headers
ckfile = open(FILENAME2, 'r').read()
if(ckfile == ''):
csvwriter.writerow(dict((fn,fn) for fn in fieldnames))
for row in entry:
csvwriter.writerow(row)
# close the file
outfile.close()
def most_commonCatgoryO_(FILENAME1, FILENAME2, CATEGORY1, CATEGORY2):
# create the field names for each category
fieldnames = ['DEPARTURES_SCHEDULED','DEPARTURES_PERFORMED','SEATS', CATEGORY1]
# open a file for writing
outfile = open(FILENAME1,'wb')
# create the csv writer object
csvwriter = csv.DictWriter(outfile, delimiter=',', fieldnames=fieldnames, extrasaction = 'ignore')
csvwriter.writerow(dict((fn,fn) for fn in fieldnames))
entry = retrieveEntries('input/NC_SC Proj Data_2012 {Intermediate-File}.csv', CATEGORY1)
# grab the item value associated with the most frequent number
itemValue = entry.most_common(1)[0][0]
# print "\n", itemValue;
# reopen the intermediate for reading
infile = open('input/NC_SC Proj Data_2012 {Intermediate-File}.csv', 'rb')
reader = csv.DictReader(infile)
# populate the outfile using the pre-defined condition
for row in reader:
if(row[CATEGORY1] == itemValue):
csvwriter.writerow(row)
outfile.close()
#open the outfile for reading
with open(FILENAME1, 'rb') as infile:
# calculate the percentage
percentage = scheduledPerformanceAvailability(FILENAME1, CATEGORY2)
_Stat2_(FILENAME2, itemValue, percentage)
infile.close()
For example, my csv file:
DEPARTURES_SCHEDULED,DEPARTURES_PERFORMED,SEATS,UNIQUE_CARRIER_NAME
3.90977443609 %,,,US Airways Inc.
4.21052631579 %,,,US Airways Inc.
1.8045112782 %,,,US Airways Inc.
My desired output should be:
DEPARTURES_SCHEDULED DEPARTURES_PERFORMED SEATS UNIQUE_CARRIER_NAME
3 % 4% 1% US Airways Inc.
Related
I have a following text-file products.txt:
Product;Amount;Price
Apple;3;10.00
Banana;1;5.00
Lemon;2;3.00
Orange;4;20.00
Apple;4;8.00
I want read this file and make a new text-file newfile.txt, which contains value of each row (Amount X Price):
30.00
5.00
6.00
80.00
32.00
Finally, I want to find the total sum of newfile.txt (which is 30+5+6+80+32 = 153)
Note, the price of same product can vary and we are not interested total sum of each product.
I started with creating class.
class DATA:
product= ""
amount= 0
price= 0
def read (name):
list = []
file= open(name, 'r', encoding="UTF-8")
file.readline()
while (True):
row= file.readline()
if(rivi == ''):
break
columns= row[:-1].split(';')
info= DATA()
info.amount= int(columns[1])
info.price= int(columns[2])
info.total = info.amount * info.price
file.append(info)
tiedosto.close()
return list
This should work:
def read(name):
total = 0
ori = open(name, 'r', encoding="UTF-8")
row = ori.readline()
dest = open("newfile.txt", 'w', encoding="UTF-8")
row = ori.readline()
while (row != ""):
row = row[:-1].split(';')
res = int(row[1]) * float(row[2])
total += res
dest.write(str(res) + "\n")
row = ori.readline()
ori.close()
dest.close()
print(total)
read("products.txt")
A possibility would be to use csv from the standard library.
import csv
# fix files' paths
path1 = # file to read
path2 = # file to write
# read data and perform computations
rows_tot = []
with open(path1, 'r', newline='', encoding="utf-8") as fd:
reader = csv.DictReader(fd, delimiter=";")
for row in reader:
rows_tot.append(float(row['Amount']) * float(row['Price']))
# total sum
print("Total sum:", int(sum(rows_tot)))
# save to file the new data
with open(path2, 'w', newline='') as fd:
fieldnames = ("AmountXPrice",)
writer = csv.DictWriter(fd, fieldnames=fieldnames)
writer.writeheader()
for value in rows_tot:
writer.writerow({fieldnames[0]: f"{value:.2f}"})
Remark: it is not clear from the question the type of the various data, in case just change int with float or the other way around.
I have a Problem with continues writing my datas in a csv-file. I want a program that detects, if there is a csv-file for my measurements-data. If not it would be generated. When the csv-file is new generated the datas are written in the csv-file on the column after the header with the variable cycle = 0.
If the csv-file exists, the datas should be written continuously after the last line of the csv. Also the variable cycle should continue.
I have written a program that can detect if there is a file or not but with the continuously lines I have problems.
I hope someone can help me.
# mes = Array with 20 spaces filled with the Numbers 0-19
date = time.strftime("%d/%m/%Y")
def write(cycle, mes):
if os.path.exists('/home/pi/Documents/Ventilatorprüfstand_Programm/out.csv') is True: #does the out.csv existate?
print("Do something")
out = open('out.csv', 'w')
data = [[cycle, mes[0],mes[1],mes[2],mes[3],mes[4],mes[5],mes[6],mes[7],mes[8],mes[9],mes[10],mes[11],mes[12],mes[13],mes[14],mes[15],mes[16],mes[17],mes[18],mes[19], date]]
line = cycle+1
for row in data:
for line in row:
out.write('%s;' % line)
out.write('\n')
out.close()
else:
print("Do another something")
header = lookuptable.names()
out = open('out.csv', 'w')
for row in header:
for column in row:
out.write('%s' % column)
out.write('\t')
out.write('\n')
data = [[cycle, mes[0],mes[1],mes[2],mes[3],mes[4],mes[5],mes[6],mes[7],mes[8],mes[9],mes[10],mes[11],mes[12],mes[13],mes[14],mes[15],mes[16],mes[17],mes[18],mes[19], date]]
for row in data:
for column in row:
out.write('%s;' % column)
out.write('\n')
out.close()`
When opening the file with open() there is the option 'a' to append the new lines to the end:
'a' open for writing, appending to the end of the file if it exists
Here is an example using the csv Python standard library:
import csv
import os
import random
headers = ['cycle', 'date', 'speed', 'temp', 'power']
new_data = [[random.randint(0, 100) for _ in range(3)] for _ in range(2)]
date = '00/01/02'
cycle = 1
# Copy the data and include the date and the cycle number:
full_rows = [ [cycle, date, *row] for row in new_data ]
filename = 'example.csv'
# Check if the file exist, if not create the file with header
if not os.path.exists(filename):
print('creating a new file')
with open(filename, 'w') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerow(headers) # add the header
# Append the data to the file
with open(filename, 'a', newline='') as csvfile: # note the 'a' option
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerows(full_rows)
I am trying to get my function to work out the average, write it to a file and sort it using python. This is my code:
def average_score(filename):
with open(filename) as Class:
reader = c.reader(Class,delimiter=",")
for row in reader:
people = []
people.append(row[0])
user, *scores = row
average = sum([int(score) for score in scores]) / len(scores)
a = open(filename,"a").writer(Class)
data = [[average]]
a.writerows(data)
people.append(score)
count = count+1
list11.insert(count,people)
sort=sorted(list11, key = o.itemgetter(4), reverse = False)
for eachline in sort:
print( eachline)
csv file:
kieran,3,10,7
ben,4,8,5
ethan,9,1,4
oliver,7,2,3
Something like this should work
def average_score(filename):
averages = {}
with open(filename) as fd:
reader = c.reader(fd, delimiter=",")
for row in reader:
user, *scores = row
# the user has no score
if len(scores) == 0:
continue
averages[user] = sum([int(score) for score in scores]) / len(scores)
sorted_averages = sorted(averages.items(), key = o.itemgetter(1), reverse = False)
# This writes the average to the file, remove if necessary
with open(filename, 'a') as fd:
for item in sorted_averages:
fd.write("{}: {}\n".format(item[0], item[1]))
# This prints out to screen, remove if necessary
for item in sorted_averages:
print("{}: {}".format(item[0], item[1]))
I have an application that works. But in the interest of attempting to understand functions and python better. I am trying to split it out into various functions.
I"m stuck on the file_IO function. I'm sure the reason it does not work is because the main part of the application does not understand reader or writer. To better explain. Here is a full copy of the application.
Also I'm curious about using csv.DictReader and csv.DictWriter. Do either provide any advantages/disadvantages to the current code?
I suppose another way of doing this is via classes which honestly I would like to know how to do it that way as well.
#!/usr/bin/python
""" Description This script will take a csv file and parse it looking for specific criteria.
A new file is then created based of the original file name containing only the desired parsed criteria.
"""
import csv
import re
import sys
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
#Prompt for File Name
def file_IO():
print "Please Enter a File Name, (Without .csv extension): ",
base_Name = raw_input()
print "You entered: ",base_Name
in_Name = base_Name + ".csv"
out_Name = base_Name + ".parsed.csv"
print "Input File: ", in_Name
print "OutPut Files: ", out_Name
#Opens Input file for read and output file to write.
in_File = open(in_Name, "rU")
reader = csv.reader(in_File)
out_File = open(out_Name, "wb")
writer = csv.writer(out_File, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
return (reader, writer)
file_IO()
# Read header
header = reader.next()
stored = []
writer.writerow([header[0], header[3]])
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow([row[0], row[3]])
# Closing Input and Output files.
in_File.close()
out_File.close()
If I were you, I'd only separate find_group.
import csv
def find_group(row):
GROUPS = ['aircheck', 'linkrunner at', 'onetouch at']
for idx, group in enumerate(GROUPS):
if group in map(str.lower, row):
return idx
return -1
def get_filenames():
# this might be the only other thing you'd want to factor
# into a function, and frankly I don't really like getting
# user input this way anyway....
basename = raw_input("Enter a base filename (no extension): ")
infilename = basename + ".csv"
outfilename = basename + ".parsed.csv"
return infilename, outfilename
# notice that I don't open the files yet -- let main handle that
infilename, outfilename = get_filenames()
with open(infilename, 'rU') as inf, open(outfilename, 'wb') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter=',',
quotechar='"', quoting=csv.QUOTE_ALL)
header = next(reader)
writer.writerow([[header[0], header[3]])
stored = sorted([(find_group(row),idx,row) for idx,row in
enumerate(reader)) if find_group(row) >= 0])
for _, _, row in stored:
writer.writerow([row[0], row[3]])
#!/usr/bin/python
import csv
import re
string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')
#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "
#def does_match(string):
# stringl = string.lower()
# return any(s in stringl for s in searched)
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
for row in reader:
found = False
for col in row:
if col in [string_1, string_2, string_3] and not found:
writer.writerow(row)
found = True
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
inFile.close()
outFile.close()
I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.
Sample Data File
LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300
I'm trying to figure out how to search a CSV file for 3 items. If
those items exist print the row. Ideally I would like only Columns 1
and 3 to print to a new file.
Try this:
import csv
search_for = ['OneTouch AT','LinkRunner AT','AirCheck']
with open('in.csv') as inf, open('out.csv','w') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
if row[0] in search_for:
print('Found: {}'.format(row))
writer.writerow(row)
#!/usr/bin/python
import csv
import numpy as np
class search_csv(object):
def __init__(self, infile, outfile):
infile = open(infile, 'rb')
read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
self.non_numpy_data = read_infile
self.data = np.array(read_infile, dtype=None)
self.outfile = open(outfile, 'wb')
self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def write_to(self, matched_values):
self.writer_.writerows(matched_values)
print ' Matched Values Written '
return True
def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
find_these = np.array(items, dtype=None)
matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
matching_data = self.data[matching_y][:,return_cols]
self.write_to(matching_data)
self.outfile.close()
return True
def non_numpy_search(self, items, return_cols=[0,2]):
lst = []
for i in self.non_numpy_data:
for ii in items:
if ii in i:
z = []
for idx in return_cols:
z.append(i[idx])
lst.append(z)
break
self.write_to(lst)
return True
### now use the class ###
SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'
non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)
By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.
Things to note....
SEARCHING_FOR should be a list.
the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.
In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.
If you don't have numpy let me know.
#!/usr/bin/python
import csv
import re
import sys
import gdata.docs.service
#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
def does_match(string):
stringl = string.lower()
return any(s in stringl for s in searched)
#Opens Input file for read and output file to write.
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
#for row in reader:
# found = False
# for col in row:
# if col in [string_1, string_2, string_3] and not found:
# writer.writerow(row)
# found = True
"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
one stores a triple (1, 12, row)
When the triples are sorted later, all rows in group 0 will come first, then
all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
# Closing Input and Output files.
inFile.close()
outFile.close()