import csv
import output
fill = input("Enter File name:")
f = open(fill)
csv_f = csv.reader(f)
m = open('data.csv', "w")
dict_out = {}
for row in csv_f:
if row[1] in dict_out:
dict_out[row[1]] += row[3]
else:
dict_out[row[1]] = row[3]
for title, value in dict_out.items():
m.write('{},'.format(title))
m.write ('{} \n'.format(value))
m.close()
Prints my csv as
Title,Detail
Siding, 50 63 22 68 138 47 123 107 107 93 117
Asphalt, 49 8 72 19 125 95 33 83 123 144
Rail, 82 98 89 62 58 66 24 77 120 93
Grinding, 127 47 20 66 29 137 33 145 3 98
Concrete, 130 75 12 88 22 137 114 88 143 16
I would like to put a comma in between the numbers. I have tried m.write(',') after m.write('{} \n'.format(value)) but it only adds it after the last one. How can i format it so it will output as
Title,Detail
Siding, 50,63,22,68,138,47,123,107,107,93,117
Asphalt, 49,8,72,191,25,95,33,83,123,144
Rail, 82,98,89,62,58,66,24,77,120,93
Grinding, 127,47,20,66,29,137,33,145,3,98
Concrete, 130,75,12,88,22,137,114,88,143,16
not the best way but you can:
for title, value in dict_out.items():
m.write('{},'.format(title))
m.write ('{} \n'.format(value.replace(' ', ',')))
but you should definetly use csv writter,
import csv
import output
fill = input("Enter File name:")
f = open(fill)
csv_f = csv.reader(f)
c = open('data.csv', "w")
m = csv.writer(c)
dict_out = {}
for row in csv_f:
if row[1] in dict_out:
dict_out[row[1]].append(row[3])
else:
dict_out[row[1]] = [row[3]]
for title, value in dict_out.items():
m.writerow([title] + value)
c.close()
If value is a string then you need to use value.split(). If it is already a list then you don't need to use the split method.
with open('data.csv', "w") as m:
for title, value in dict_out.items():
m.write(title + "," + ",".join(value.split()) + "\n")
Related
I have a file of which the first column has repeated pattern as belows,
1999.2222 50 100
1999.2222 42 15
1999.2222 24 35
1999.2644 10 25
1999.2644 10 26
1999.3564 65 98
1999.3564 45 685
1999.3564 54 78
1999.3564 78 98
and I want this file into three files as
file1:
1999.2222 50 100
1999.2222 42 15
1999.2222 24 35
file2:
1999.2644 10 25
1999.2644 10 26
file3:
1999.3564 65 98
1999.3564 45 685
1999.3564 54 78
1999.3564 78 98
How could I split like this? Thanks:)
itertools.groupby is probably the most suitable choice for what you're after.
import itertools
with open('file.txt', 'r') as fin:
# group each line in input file by first part of split
for i, (k, g) in enumerate(itertools.groupby(fin, lambda l: l.split()[0]), 1):
# create file to write to suffixed with group number - start = 1
with open('file{0}.txt'.format(i), 'w') as fout:
# for each line in group write it to file
for line in g:
fout.write(line.strip() + '\n')
Any idea why is this always writing the same line in output csv?
21 files = glob.glob(path)
22 csv_file_complete = open("graph_complete_reddit.csv", "wb")
23 stat_csv_file = open("test_stat.csv", "r")
24 csv_reader = csv.reader(stat_csv_file)
25 lemmatizer = WordNetLemmatizer()
26 for file1, file2 in itertools.combinations(files, 2):
27 with open(file1) as f1:
28 print(file1)
29 f1_text = f1.read()
30 f1_words = re.sub("[^a-zA-Z]", ' ', f1_text).lower().split()
31 f1_words = [str(lemmatizer.lemmatize(w, wordnet.VERB)) for w in f1_words if w not in stopwords]
32 print(f1_words)
33 f1.close()
34 with open(file2) as f2:
35 print(file2)
36 f2_text = f2.read()
37 f2_words = re.sub("[^a-zA-Z]", ' ', f2_text).lower().split()
38 f2_words = [str(lemmatizer.lemmatize(w, wordnet.VERB)) for w in f2_words if w not in stopwords]
39 print(f2_words)
40 f2.close()
41
42 a_complete = csv.writer(csv_file_complete, delimiter=',')
43 print("*****")
44 print(file1)
45 print(file2)
46 print("************************************")
47
48 f1_head, f1_tail = os.path.split(file1)
49 print("************")
50 print(f1_tail)
51 print("**************")
52 f2_head, f2_tail = os.path.split(file2)
53 print(f2_tail)
54 print("********************************")
55 for row in csv_reader:
56 if f1_tail in row:
57 file1_file_number = row[0]
58 file1_category_number = row[2]
59 if f2_tail in row:
60 file2_file_number = row[0]
61 file2_category_number = row[2]
62
63 row_complete = [file1_file_number, file2_file_number, file1_category_number, file2_category_number ]
64 a_complete.writerow(row_complete)
65
66 csv_file_complete.close()
Those prints show different filenames!
This is test_stat.csv file which the code uses as input:
1 1,1bmmoc.txt,1
2 2,2b3u1a.txt,1
3 3,2mf64u.txt,2
4 4,4x74k3.txt,5
5 5,lsspe.txt,3
6 6,qbimg.txt,4
7 7,w95fm.txt,2
And here's what the code outputs:
1 7,4,2,5
2 7,4,2,5
3 7,4,2,5
4 7,4,2,5
5 7,4,2,5
6 7,4,2,5
7 7,4,2,5
8 7,4,2,5
9 7,4,2,5
10 7,4,2,5
11 7,4,2,5
12 7,4,2,5
13 7,4,2,5
14 7,4,2,5
15 7,4,2,5
16 7,4,2,5
17 7,4,2,5
18 7,4,2,5
19 7,4,2,5
20 7,4,2,5
21 7,4,2,5
please comment or suggest fixes.
You're never rewinding stat_csv_file, so eventually, your loop over csv_reader (which is a wrapper around stat_csv_file) isn't looping at all, and you write whatever you found on the last loop. Basically, the logic is:
On first loop, look through all of csv_reader, find hit (though you keep looking even when you find it, exhausting the file), write hit
On all subsequent loops, the file is exhausted, so the inner search loop doesn't even execute, and you end up writing the same values as last time
The slow, but direct way to fix this is to add stat_csv_file.seek(0) before you search it:
53 print(f2_tail)
54 print("********************************")
stat_csv_file.seek(0) # Rewind to rescan input from beginning
55 for row in csv_reader:
56 if f1_tail in row:
57 file1_file_number = row[0]
58 file1_category_number = row[2]
59 if f2_tail in row:
60 file2_file_number = row[0]
61 file2_category_number = row[2]
A likely better approach would be to load the input CSV into a dict once, then perform lookup there as needed, avoiding repeated (slow) I/O in favor of fast dict lookup. The cost would be higher memory use; if the input CSV is small enough, that's not an issue, if it's huge, you may need to use a proper database to get the rapid lookup without blowing memory.
It's a little unclear what the logic should be here, since your inputs and outputs don't align (your output should start with a repeated digit, but it doesn't for some reason?). But if the intent is that the input contains file_number, file_tail, category_number, then you could begin your code (above the top level loop) with:
# Create mapping from second field to associated first and third fields
tail_to_numbers = {ftail: (fnum, cnum) for fnum, ftail, cnum in csv_reader}
Then replace:
for row in csv_reader:
if f1_tail in row:
file1_file_number = row[0]
file1_category_number = row[2]
if f2_tail in row:
file2_file_number = row[0]
file2_category_number = row[2]
row_complete = [file1_file_number, file2_file_number, file1_category_number, file2_category_number ]
a_complete.writerow(row_complete)
with the simpler and much faster:
try:
file1_file_number, file1_category_number = tail_to_numbers[f1_tail]
file2_file_number, file2_category_number = tail_to_numbers[f2_tail]
except KeyError:
# One of the tails wasn't found in the lookup dict, so don't output
# (variables would be stale or unset); optionally emit some error to stderr
continue
else:
# Found both tails, output associated values
row_complete = [file1_file_number, file2_file_number, file1_category_number, file2_category_number]
a_complete.writerow(row_complete)
88 90 94 98 100 110 120
75 77 80 86 94 103 113
80 83 85 94 111 111 121
68 71 76 85 96 122 125
77 84 91 102 105 112 119
81 85 90 96 102 109 134
Hi i am very new to computer programming in general and I need some help with my current project. I need to read numbers from a text file into a table and calculate the averages and max.This is what I currently have.
def main():
intro()
#sets variables
n1=[]
n2=[]
n3=[]
n4=[]
n5=[]
n6=[]
n7=[]
numlines = 0
filename = input("Enter the name of the data file: ")
print() #turnin
infile = open(filename,"r")
for line in infile:
#splits the lines
data = line.split()
#takes vertical lines individually and converts them to integers
n1.append(int(data[0]))
n2.append(int(data[1]))
n3.append(int(data[2]))
n4.append(int(data[3]))
n5.append(int(data[4]))
n6.append(int(data[5]))
n7.append(int(data[6]))
datalist = n1,n2,n3,n4,n5,n6
#calculates the average speeds
n1av = (sum(n1))/len(n1)
n2av = (sum(n2))/len(n2)
n3av = (sum(n3))/len(n3)
n4av = (sum(n4))/len(n4)
n5av = (sum(n5))/len(n5)
n6av = (sum(n6))/len(n6)
n7av = (sum(n7))/len(n7)
#calculates the max speeds
n1max = max(n1)
n2max = max(n2)
n3max = max(n3)
n4max = max(n4)
n5max = max(n5)
n6max = max(n6)
n7max = max(n7)
#Calculates the average of the average speeds
Avgav = (n1av + n2av + n3av + n4av + n5av + n6av + n7av) / 7
#Calculates the average of the average max
Avmax = (n1max + n2max + n3max + n4max + n5max + n6max + n7max) / 7
#creates table
print(aver_speed)
print()
print(" "* 27, "Speed (MPH)")
print(" "*3,"Car :", "{:6}".format(30),"{:6}".format(40),"{:6}".format(50)
,"{:6}".format(60),"{:6}".format(70),"{:6}".format(80),
"{:6}".format(90)," :","{:14}".format ("Average Noise"))
print("-"*77)
for i in range(0,len(datalist)):
print("{:6}".format(int("1")+1)," "*2,":", "{:6}".format (n1[i]), "{:6}".format (n2[i]), "{:6}".format (n3[i]),
"{:6}".format (n4[i]),"{:6}".format (n5[i]),"{:6}".format (n6[i]),"{:6}".format (n7[i])," :", )
print("-"*77)
print(" ","Average","{:1}".format(":"), "{:8.1f}".format(n1av),"{:6.1f}".format(n2av),
"{:6.1f}".format(n3av),"{:6.1f}".format(n4av),"{:6.1f}".format(n5av),"{:6.1f}".format(n6av),
"{:6.1f}".format(n7av), "{:9.1f}".format(Avgav))
print()
print(" ","Maximum","{:1}".format(":"), "{:6}".format(n1max), "{:6}".format(n2max), "{:6}".format(n3max), "{:6}".format(n4max)
, "{:6}".format(n5max), "{:6}".format(n6max), "{:6}".format(n7max),"{:11.1f}".format(Avmax))
Any help would be appreciated.
Now that i have updated my code my table looks like this:
Car : 30 40 50 60 70 80 90 : Average Noise
2 : 88 90 94 98 100 110 120 :
2 : 75 77 80 86 94 103 113 :
2 : 80 83 85 94 111 111 121 :
2 : 68 71 76 85 96 122 125 :
2 : 77 84 91 102 105 112 119 :
2 : 81 85 90 96 102 109 134 :
Average : 78.2 81.7 86.0 93.5 101.3 111.2 122.0 96.3
Maximum : 88 90 94 102 111 122 134 105.9
I've been trying to figure out the calculations for average noise and how to list the cars 1 through 6. I was unable to fi
You have a lot of code now. You can do this easier. If you want calculate by strings:
with open(filename, 'r') as f:
for line in f.readlines():
list_of_speed = map(int, line.split())
max_speed = max(list_of_speed)
aver_speed = float(sum(list_of_speed))/len(list_of_speed)
If by column:
with open(filename, 'r') as f:
l = map(lambda x: map(int, x.split()), f.readlines())
for n in range(len(l[0])):
list_of_speed = [value[n] for value in l]
max_speed = max(list_of_speed)
aver_speed = float(sum(list_of_speed))/len(list_of_speed)
You can use sum() function on a list and len() function gives the number of elements in the list. So for average calculation you can simply do sum(n1)/float(len(n1)).
Try to use some dynamic way of keeping track of read data or calculate sum and avg on the fly and keep track of that data. Not to discourage you but using six lists doesn't look so elegant. Hope something similar to this might work:
from pprint import pprint
def main():
# intro()
filename = input("Enter the name of the data file:")
infile = open(filename,"r")
n = {} # a dictionary
for line in infile:
# apply typecasting on each element
data = map(int, line.split())
# add speeds into to a dictionary of lists
# supports any number of data sets
for i,d in enumerate(data):
if i+1 in n:
n[i+1].append(d)
else:
n[i+1] = [d]
pprint (n)
# do whatever you want with the dictionary
for d in n:
print ("-" * 10)
print (d)
print (sum(n[d]))
print (sum(n[d])/float(len(n[d])))
main()
For printing purposes you may want to use some thing like https://pypi.python.org/pypi/PTable
In this program, I am trying to write the index out to a text file named "index.txt", along with printing it out. However, whenever i run the program, I get an error saying "words" is not defined, and my index.txt file only prints out word/tLine Numbers.
Code:
from string import punctuation
def makeIndex(filename):
wordIndex = {}
with open(filename) as f:
lineNum = 1
for line in f:
words = line.lower().split()
for word in words:
for char in punctuation:
word = word.replace(char, '')
if word.isalpha():
if word in wordIndex.keys():
if lineNum not in wordIndex[word]:
wordIndex[word].append(lineNum)
else:
wordIndex[word] = [lineNum]
lineNum += 1
return wordIndex
def output(wordIndex):
print("Word\tLine Numbers")
for key in sorted(wordIndex.keys()):
print(key, '\t', end=" ")
for lineNum in wordIndex[key]:
print(lineNum, end=" ")
print()
def main():
filename = input("What is the file name to be indexed?")
index = makeIndex(filename)
output(index)
with open('index.txt', 'w') as writefile:
writefile.write("Word/tLine Numbers")
print('t', end= "")
for index in range(len(word)):
print(word[index])
writefile.write(word[index] + '/n')
main()
Output:
What is the file name to be indexed?test.txt
Word Line Numbers
a 8 12 38 70 78
all 85 101
also 91
an 34 96
anagrams 93 104
as 84
ask 28
blocks 4
called 61
create 69
different 59
difficulties 47
each 74
employed 65
figure 32
file 9
find 100
finds 92
following 22
for 18 73
given 37
has 80
have 56
here 66
in 7 48
interesting 19
is 52 67
it 103
its 42 87
jumble 25
large 3
letters 43
long 54
many 58
new 14
of 5 16 41 45 86 102
one 44
opens 10
out 33
permutations 62 88
possibilities 17
problem 51
program 23 90
programs 20
puzzles 26
range 15
reorderings 60
same 82
scrambled 39
set 40
signature 72 83
since 94
so 57 76
solver 30
solves 24
solving 49
strategy 64
text 6
that 53 77
the 21 29 46 63 81
this 50 89
to 31 68
typing 95
unique 71
unknown 35
unscrambled 97
up 11
which 27
whole 13
will 99
with 2
word 36 75 79 98
words 55
working 1
tTraceback (most recent call last):
File "C:\Users\jp19p_000\Desktop\wordIndex(1).py", line 46, in <module>
main()
File "C:\Users\jp19p_000\Desktop\wordIndex(1).py", line 41, in main
for index in range(len(word)):
NameError: name 'word' is not defined
This is the index.txt file:
Word/tLine Numbers
from collections import defaultdict
import string
import sys
# convert to lowercase, remove all digits and punctuation
trans = str.maketrans(string.ascii_uppercase, string.ascii_lowercase, string.digits + string.punctuation)
def get_unique_words(s, trans=trans):
return set(s.translate(trans).split())
def make_index(seq, start=1):
index = defaultdict(list)
for i,s in enumerate(seq, start):
for word in get_unique_words(s):
index[word].append(i)
return index
def write_index(index, file=sys.stdout):
print("Word\tLines", file=file)
for word in sorted(index.keys()):
lines = " ".join(str(i) for i in index[word])
print("{}\t{}".format(word, lines), file=file)
def main():
fname = input("What is the name of the file to be indexed? ")
with open(fname) as inf:
index = make_index(inf)
with open("index.txt", "w") as outf:
write_index(index, outf)
if __name__=="__main__":
main()
I am trying to read a file that looks like this:
83 124 125
83 125 126
83 126 127
83 127 128
83 128 128
154 120 120
154 120 121
154 121 122
154 122 123
154 123 124
122 92 93
122 93 94
122 94 95
122 95 96
122 96 97
And write to different files every set of values. The first value (83 / 154 / 122) needs to be the text file's name. The rest of the values should be written into their correspondent file like so: 124 128
So file "83.txt" contains 124 128. The file "154.txt" contains 120 124. And the file "122.txt should contain 92 97.
How do i iterate over the above mentioned input and remove duplicate values from the first column? Then proceed into taking the minimum and maximum values that each of the first values share? and finally write each separate line into a new text file inside a certain folder?
I have tried to use:
from itertools import chain
from collections import defaultdict
from operator import itemgetter
from itertools import groupby
def final(f):
with open (f+'.txt', 'r') as fin:
lines = fin.readlines().split().strip('\n')
v1, v2, v3 = lines[0], lines[1], lines[2]
for v1, g in groupby(enumerate(diffs[v1x]), lambda (i, x): i - x):
group = map(itemgetter(1), g)
lines.itertools.chain()
lines = defaultdict(list)
print (lines),
Which is an incomplete version of what i had earlier but i can't seem to make it work like it did before. Anyway my code ended up very long and not that readable... If more details are required for this to be answered I'll provide them. I'd like to see different takes on this and not simply posting my crude code and get it tweaked by someone.
Assuming the filenames are "in order", and the other items are in order, then:
from itertools import groupby
from operator import itemgetter
from collections import deque
with open('/home/jon/testdata.txt') as fin:
lines = (line.split() for line in fin)
for k, g in groupby(lines, itemgetter(0)):
fst = next(g)
lst = next(iter(deque(g, 1)), fst)
with open(k + '.txt', 'w') as fout:
fout.write(fst[1] + ' ' + lst[2])
long_string = """ 83 124 125
83 125 126
83 126 127
83 127 128
83 128 128
154 120 120
154 120 121
154 121 122
154 122 123
154 123 124
122 92 93
122 93 94
122 94 95
122 95 96
122 96 97
"""
files = {}
for line in long_string.split("\n"):
try:
filenum, minvalue, maxvalue = line.strip().split(" ")
try:
new_min = min(files[filenum][0], minvalue)
new_max = max(files[filenum][1], maxvalue)
files[filenum] = (new_min, new_max)
except KeyError:
files[filenum] = (minvalue, maxvalue)
except ValueError:
print("There are no 3 values as excpected")
for filename, values in files.iteritems():
with open(filename + ".txt", "wb") as writer:
writer.write(values[0] + " " + values[1])