import hashlib
import csv
import glob
def hash(text):
return hashlib.sha256(text.encode('UTF-8')).hexdigest()
def hash_file(input_file_name,output_file_name):
with open(input_file_name, newline='') as f_input, open(output_file_name, 'w', newline='') as f_output:
csv_input = csv.reader(f_input)
csv_output = csv.writer(f_output)
csv_output.writerow(next(csv_input)) # Copy the header row to the output
count = 0
print(count)
for customer_email in csv_input:
csv_output.writerow([hash(customer_email[0])])
count = count + 1
print(str(count) + " - " + customer_email[0])
f_input.close()
f_output.close
mylist = [f for f in glob.glob("*.csv")]
for file in mylist:
i_file_name = file
o_file_name = "hashed-" + file
hash_file(i_file_name,o_file_name)
I'm trying the above code and I keep getting a list index out of range. I have about 15 csv files that I would like to hash the email address on. It gets the first csv file and keeps iterating through it until I get the error message. Any help would be appreciated.
There was a blank line in my input that was causing the error
list index out of range mean that you try to acces a value of list by it's index, which is the length - 1 of the list value is not reach that index.
for example :
my_list = ['a', 'b']
lenght of my_list is 2. lenth - 1 = 1
mean that you only can acces the value of list until 1. like these:
my_list[0]
my_list[1]
if you try to access my_list[2], it will raise error list index out of range as your case.
to avoid the error you can:
index_to_access = 2
len(my_list) - index_to_access >= and my_list[index_to_access]
in your case maybe at here:
csv_output.writerow([hash(customer_email[0])])
count = count + 1
print(str(count) + " - " + customer_email[0])
change to :
customer_email = len(customer_email) - 1 >= 0 and customer_email[0] or ''
csv_output.writerow([hash(customer_email)])
count = count + 1
print(str(count) + " - " + customer_email)
actually there are many ways to handle that error.
Related
This is the atomic coordinates
XYZ
A 5.98974561 7.80124465 15.1032457
B 5.60245671 7.50214457 16.0012454
I started coding as
with open(filename) as f:
while True:
line = f.readlines()
if not line:
break
print((line[1]))
print((line[2]))
And the results came as:
A 5.98974561 7.80124465 15.1032457
B 5.60245671 7.50214457 16.0012454
But I wanted it to be
A = [5.98974561 7.80124465 15.1032457]
B = [5.60245671 7.50214457 16.0012454]
You could do it like this with Abuday's suggestion:
with open(filename) as f:
lines = f.readlines()[1:]
dictionary = {}
for i in lines:
dictionary[i.split()[0]] = i.split()[1:]
This will give you a dictionary with indices (A, B, ...) as keys and lists as values.
Something like this could suite you
with open(filename) as f:
line = f.readline() # to skip first line
col_0 = [] # so store the first column
col_1 = [] # so store the second column
col_2 = [] # so store the third column
col_3 = [] # so store the fourth column
while True:
line = f.readline() # read line one by one
if not line:
break
line = line.replace('\n','') # replace line return with nothing
splitline = line.split(' ')
col_0.append(splitline[0]) # append value of the first column + convert to float
col_1.append(float(splitline[1])) # append value of the second column + convert to float
col_2.append(float(splitline[2])) # append value of the third column + convert to float
col_3.append(float(splitline[3])) # append value of the fourth column + convert to float
print(splitline[0] + ' = ' + str(splitline[1]) + ' ' + str(splitline[2]) + ' ' + str(splitline[3]) )
print(col_0)
print(col_1)
print(col_2)
print(col_3)
But I think there is more efficient ways to read 3D coordinate files. For example using numpy:
import numpy as np
filename = 't.txt'
Name = np.loadtxt(filename, skiprows=1, usecols=0,dtype = 'S1' )
Coordianates = np.loadtxt(filename, skiprows=1, usecols=(1,2,3) )
print( Name,Coordianates)
In order to produce the exact output as shown in the question then:
FILENAME = 'foo.txt'
with open(FILENAME) as data:
for line in data.readlines()[1:]:
t = line.split()
print(f'{t[0]} = [{" ".join(t[1:])}]')
Output:
A = [5.98974561 7.80124465 15.1032457]
B = [5.60245671 7.50214457 16.0012454]
def list():
list_name = []
list_name_second = []
with open('CoinCount.txt', 'r', encoding='utf-8') as csvfile:
num_lines = 0
for line in csvfile:
num_lines = num_lines + 1
i = 0
while i < num_lines:
for x in volunteers[i].name:
if x not in list_name: # l
f = 0
while f < num_lines:
addition = []
if volunteers[f].true_count == "Y":
addition.append(1)
else:
addition.append(0)
f = f + 1
if f == num_lines:
decimal = sum(addition) / len(addition)
d = decimal * 100
percentage = float("{0:.2f}".format(d))
list_name_second.append({'Name': x , 'percentage': str(percentage)})
list_name.append(x)
i = i + 1
if i == num_lines:
def sort_percentages(list_name_second):
return list_name_second.get('percentage')
print(list_name_second, end='\n\n')
above is a segment of my code, it essentially means:
If the string in nth line of names hasn't been listed already, find the percentage of accurate coins counted and then add that all to a list, then print that list.
the issue is that when I output this, the program is stuck on a while loop continuously on addition.append(1), I'm not sure why so please can you (using the code displayed) let me know how to update the code to make it run as intended, also if it helps, the first two lines of code within the txt file read:
Abena,5p,325.00,Y
Malcolm,1p,3356.00,N
this doesn't matter much but just incase you need it, I suspect that the reason it is stuck looping addition.append(1) is because the first line has a "Y" as its true_count
Beginner here:
I'm trying to sort a list of nicknames to the corresponding countries in the same line.
They come in this format:
FODORGBR + HU-Szombathely-2
ZSOLDPTE + HU-Debrecen-3
THAUSKTR + DE-Herzogenaurach-1
WRIGHNIL + UK-SuttonColdfield-2
KUROTADR + SK-KysuckeNoveMesto-1
KLERNMTT + DE-Herzogenaurach-1
BIRKNJHA + DE-Erlangen-111
CANECVAD + SK-KysuckeNoveMesto-1
MALDESND + DE-Herzogenaurach-1
I want to sort it by the country initials (so HU, DE etc.) with a caption.
So something like:
DE:
THAUSKTR
KLERNMTT
BIRKNJHA
MALDESND
HU:
FODORGBR
ZSOLDPTE
This is what I came up with do define the countries but I can't figure out how to sort all lines with it.
fw = open("NameList.txt")
for line_fw in fw:
if not line_fw.strip():
continue
cross = line_fw.find("+")
country = line_fw[cross+2:cross+4]
First split the list on " " and use operator.itemgetter to iterate over last element of list.
Or replace -1 in itemgetter by 3 if country code is always 3rd element in the list.
from operator import itemgetter
x = ["FODORGBR + HU-Szombathely-2","ZSOLDPTE + HU-Debrecen-3","THAUSKTR + DE-Herzogenaurach-1",
"WRIGHNIL + UK-SuttonColdfield-2","KUROTADR + SK-KysuckeNoveMesto-1","KLERNMTT + DE-Herzogenaurach-1",
"BIRKNJHA + DE-Erlangen-111","CANECVAD + SK-KysuckeNoveMesto-1","MALDESND + DE-Herzogenaurach-1"]
new_list = [i.split() for i in x]
new_list.sort(key=itemgetter(-1))
print([" ".join(i) for i in new_list])
Output:
['BIRKNJHA + DE-Erlangen-111', 'THAUSKTR + DE-Herzogenaurach-1', 'KLERNMTT + DE-Herzogenaurach-1', 'MALDESND + DE-Herzogenaurach-1', '
ZSOLDPTE + HU-Debrecen-3', 'FODORGBR + HU-Szombathely-2', 'KUROTADR + SK-KysuckeNoveMesto-1', 'CANECVAD + SK-KysuckeNoveMesto-1', 'WRI
GHNIL + UK-SuttonColdfield-2']
Using re.search and collections.defaultdict:
import re
from collections import defaultdict
d = defaultdict(list)
with open('NameList.txt') as fw:
for line in fw:
code = re.search(' (\w{2})-', line).group(1)
nick = re.search('(\w{8}) +', line).group(1)
d[code].append(nick)
Output:
defaultdict(list,
{'DE': ['THAUSKTR', 'KLERNMTT', 'BIRKNJHA', 'MALDESND'],
'HU': ['FODORGBR', 'ZSOLDPTE'],
'SK': ['KUROTADR', 'CANECVAD'],
'UK': ['WRIGHNIL']})
Your code for finding the country names looks just fine. One Piece of advice when working with files: use the with- statement instead of open and close. When using open, and an error occurs sometime before close is called, it's possible that the file is not properly closed, which can mess up all kinds of things. with closes the file no matter what happens inside the corresponding code block (It works similar to try - finally, see the above link for more info). So, like this:
with open('NameList.txt', 'r') as fw:
for line_fw in fw:
...
it is ensured that the file will always close down. By the way, instead of using line.find('+'), you can just use line.split('+'), which takes away the whole string slicing part.
Now, to your question: there are a few possibilities to use here. The simplest would be defining a list for every country, and appending the corresponding names to the right list:
de = []
hu = []
uk = []
sk = []
with open('NameList.txt', 'r') as fw:
for line_fw in fw:
if not line_fw.strip():
continue
country = line_fw.split('+')[1].split('-')[0].strip()
nickname = line_fw.split('+')[0]
if country == 'DE':
de.append(nickname)
elif country == 'HU':
hu.append(nickname)
elif country == 'UK':
uk.append(nickname)
else:
sk.append(nickname)
this will return a list for every country, containing the corresponding nicknames. As you see, however, this is very clunky and long. A more elegant solution is using a dictionary with the countries as keys and a list of the names as values:
d = {}
with open('NameList.txt', 'r') as fw:
for line_fw in fw:
if not line_fw.strip():
continue
country = line_fw.split('+')[1].split('-')[0].strip()
nickname = line_fw.split('+')[0].strip()
try:
d[country].append(nickname) # if country already exists in d, append the nickname
except KeyError:
d[country] = [nickname] # if country doesn't exist in d, make a new entry
which will create a dictionary looking like this (i just took the first few lines to illustrate it):
{'HU': ['FODORGBR', 'ZSOLDPTE'], 'DE': ['THAUSKTR'], 'UK': ['WRIGHNIL']}
Now, there are more elegant solutions for extracting the countries and nicknames, but some of those have been pointed out in other answers.
Finally, if i got that right, you want to write your results to a new file, or at least print them. Let's say you have a dictionary of the above form. Simply iterate over it's keys via for k in d:, add some newlines ('\n') inbetween and use join to convert the listsinto one string with newlines between all items:
for k in d:
print(k + ':\n' + '\n'.join(d[k]) + '\n')
which will print:
HU:
FODORGBR
ZSOLDPTE
DE:
THAUSKTR
UK:
WRIGHNIL
by adding with open(outputfile, 'w') as f: and replacing print with f.write, you can easily write this to a new file.
Here below is the Snippet which would help you :
sample = '''
FODORGBR + HU-Szombathely-2
ZSOLDPTE + HU-Debrecen-3
THAUSKTR + DE-Herzogenaurach-1
WRIGHNIL + UK-SuttonColdfield-2
KUROTADR + SK-KysuckeNoveMesto-1
KLERNMTT + DE-Herzogenaurach-1
BIRKNJHA + DE-Erlangen-111
CANECVAD + SK-KysuckeNoveMesto-1
MALDESND + DE-Herzogenaurach-1
'''
def find_between( s, first, last ):
try:
start = s.index( first ) + len( first )
end = s.index( last, start )
return s[start:end]
except ValueError:
return ""
data = sample.splitlines()
elements = {}
for indv in data:
code = find_between(indv,"+","-").strip()
value = find_between(indv,"","+").strip()
if code != '' and code in elements:
values = []
values.append(value)
values.extend(elements[code])
values = list(filter(None, values))
values.sort()
elements[code] = values
elif code != '':
values = []
values.append(value)
elements[code] = values
print(elements)
Output :
{'HU': ['FODORGBR', 'ZSOLDPTE'], 'DE': ['BIRKNJHA', 'KLERNMTT', 'MALDESND', 'THAUSKTR'], 'UK': ['WRIGHNIL'], 'SK': ['CANECVAD', 'KUROTADR']}
I am having problem with a list for an averaging program, it says:
'line 20, in
nameletter = (letter[int(num)])
IndexError: list index out of range'
This is the part of the code:
f2 = open("Classes" + "/Sorted/" + "Alphabetical.txt", "w")
letter = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
print(letter[num])
#Loop
while num <= 26:
nameletter = (letter[int(num)])
if os.path.exists("Classes" + "/" + nameletter + ".txt"):
#Opening the students file
f = open("Classes" + "/" + nameletter + ".txt")
List = f.read().splitlines()
f.close()
#Writing data to the file
f2.write(List[5] + " - score = " + List[6])
f2.write("\n")
else:
pass
num = int(num) + 1
f2.close()
Usually you are using a loop like
for one_letter in letter:
# your code
to access all items of a list.
Looping with the help of an index is OK as well, but you have to stop at the right point. In your case you have 26 item and since the index is 0-based, the highest number is 25 not 26.
Lists are indexed from 0. So the last index of your letters is 25 not 26.
But better use a for-loop, then you don't have to count by hand:
import string
f2 = open("Classes" + "/Sorted/" + "Alphabetical.txt", "w")
for lettername in string.uppercase:
try:
with open('Classes/%s.txt' % lettername) as inp:
lines = inp.read().splitlines()
except IOError:
pass
else:
f2.write('%s - score = %s' % (lines[5], lines[6]))
f2.close()
If you want to make it cleaner and succinct and do it like a ninja
wf = os.path.join("Classes", "Sorted", "Alphabetical.txt")
rf_ = os.path.join("Classes","{letter}.txt")
line = "{} - score = {}"
# separate the output strs from logic
alphabet = [chr(i) for i in range(65, 91)] # ['A', 'B', ..., 'Z']
with open(wf, "wt") as fw:
for letter in alphabet:
rf = rf_.format(letter=letter)
if os.path.exists(rf):
with open(rf) as fr:
ls = fr.read().splitlines()
print(line.format(ls[5], ls[6]), file=fw) # this will write to fw
Always use os.path.join instead of str acrobatics to be robust(Beazly). If the code didn't work please comment.
I have to input a text file that contains comma seperated and line seperated data in the following format:
A002,R051,02-00-00,05-21-11,00:00:00,REGULAR,003169391,001097585,05-21-11,04:00:00,REGULAR,003169415,001097588,05-21-11,08:00:00,REGULAR,003169431,001097607
Multiple sets of such data is present in the text file
I need to print all this in new lines with the condition:
1st 3 elements of every set followed by 5 parameters in a new line. So solution of the above set would be:
A002,R051,02-00-00,05-21-11,00:00:00,REGULAR,003169391,001097585
A002,R051,02-00-00,05-21-11,04:00:00,REGULAR,003169415,001097588
A002,R051,02-00-00,05-21-11,08:00:00,REGULAR,003169431,001097607
My function to achieve it is given below:
def fix_turnstile_data(filenames):
for name in filenames:
f_in = open(name, 'r')
reader_in = csv.reader(f_in, delimiter = ',')
f_out = open('updated_' + name, 'w')
writer_out = csv.writer(f_out, delimiter = ',')
array=[]
for line in reader_in:
i = 0
j = -1
while i < len(line):
if i % 8 == 0:
i+=2
j+=1
del array[:]
array.append(line[0])
array.append(line[1])
array.append(line[2])
elif (i+1) % 8 == 0:
array.append(line[i-3*j])
writer_out.writerow(array)
else:
array.append(line[i-3*j])
i+=1
f_in.close()
f_out.close()
The output is wrong and there is a space of 3 lines at the end of those lines whose length is 8. I suspect it might be the writer_out.writerow(array) which is to blame.
Can anyone please help me out?
Hmm, the logic you use ends up being fairly confusing. I'd do it more along these lines (this replaces your for loop), and this is more Pythonic:
for line in reader_in:
header = line[:3]
for i in xrange(3, len(line), 5):
writer_out.writerow(header + line[i:i+5])