I have a text file which has the following contents such
ABC
AAD
ABE
Where A=5, B=1, C=31, D=101 and E=4.
Expected output should be
5,1,31
5,5,101
5,1,4
The problem is only the last line of the text file is being converted to number.
Here is what I have tried so far;
def replace_all(text, dic):
for i, j in dic.iteritems():
text = text.replace(i, j)
return text
with open('input.txt') as f:
content = f.readlines()
for i,j in enumerate(content):
my_text = content[i]
new_text = ','.join([my_text[i] for i in range(1, len(my_text), 1)])
reps = {'A':'5','B':'1','C':'31','D':'101','E':'4'}
txt = replace_all(new_text, reps)
with open('results.txt', 'a') as my_new_file:
my_new_file.write(txt)
What am I doing wrong?
You can write the whole thing much more simply as
reps = {'A':'5','B':'1','C':'31','D':'101','E':'4'}
with open('input.txt') as f, open('results.txt', 'w') as new_file:
for line in f:
new_text = ",".join(reps.get(char, char) for char in line)
new_file.write(new_text)
Your code only takes into account the value of new_text in the last iteration, which is the last line.
You should move all the logic inside the for loop.
def replace_all(text, dic):
for i, j in dic.iteritems():
text = text.replace(i, j)
return text
with open('input.txt') as f:
content = f.readlines()
reps = {'A':'5','B':'1','C':'31','D':'101','E':'4'}
with open('results.txt', 'a') as my_new_file:
for i,j in enumerate(content):
my_text = content[i]
new_text = ','.join([my_text[i] for i in range(1, len(my_text), 1)])
txt = replace_all(new_text, reps)
my_new_file.write(txt)
You can try this using list comprehension:
f = open('input.txt').readlines()
f = [i.strip('\n') for i in f]
reps = {'A':'5','B':'1','C':'31','D':'101','E':'4'}
new_list = [[reps[b] for b in i] for i in f]
new_file = open('results.txt', 'a')
for i in new_list:
new_file.write(','.join(i))
new_file.write('\n')
new_file.close()
Couldn't help myself:
# create a lookup table
lookup = {
'A': '5',
'B': '1',
'C': '31',
'D': '101',
'E': '4'
}
# open the input file and the output file
with open('input.txt', 'r') as f, open('results.txt', 'w') as f_new:
# for each line...
for line in f.readlines():
# ...construct a new line...
nums = [lookup[character] for character in line.strip()]
newline = ','.join(nums) + '\n'
# ... and save the new line to the result file
f_new.write(newline)
Related
I am working on a text file right now that is called "dracula.txt", and I have to do the following in python:
Save words that occur no more than 3 times in descending order in a file called less_common_words.txt. Each word with its count should be saved on a separate line.
I would appreciate any help! I've been working on this for too long.
I have already tokenized my file and counted the words. This is my code so far:
file = open("C:/Users/17733/Downloads/dracula.txt", 'r', encoding = 'utf-8-sig')
data = file.read()
data
data_list = data.split('\n')
data_list
new_list = []
for i in data_list:
if i !='':
ans_here = i.split(' ')
new_list.extend(ans_here)
new_list
import string
import re
puncs = list(string.punctuation)
puncs.append('"')
puncs.append('[')
puncs.append('.')
puncs.append('-')
puncs.append('_')
#append each seperately
new_2 = []
for i in new_list:
for p in puncs:
if p in i:
i_new = i.replace(p, ' ')
new_2.append(i_new)
new_2
new_2 = [i.replace(' ', ' ').strip().lower() for i in new_2]
new_2
from pathlib import Path
from collections import Counter
import string
filepath = Path('test.txt')
output_filepath = Path('outfile.txt')
# print(filepath.exists())
with open(filepath) as f:
content = f.readlines()
word_list = sum((
(s.lower().strip('\n').translate(str.maketrans('', '', string.punctuation))).split(' ')
for s in content
), [])
less_common_words = sorted([
key for key, value in Counter(word_list).items() if value <= 3
],reverse=True)
with open(output_filepath, mode='wt', encoding='utf-8') as myfile:
myfile.write('\n'.join(less_common_words))
This should exactly be what you need- I fixed my previous error by flattening the entire txt into a 2d list:
book_open = open('frankenstein.txt', 'r').readlines()
beauty_book = [i.split() for i in book_open]
flatten = []
for sublist in beauty_book:
for val in sublist:
flatten.append(val)
foo = 0
for i in flatten:
list_open = open('less_common_words.txt', 'r').readlines()
beauty_list = [i.replace('\n', '') for i in list_open]
count = flatten.count(flatten[foo])
compile = str((flatten[foo], count))
if count <= 3:
if compile not in beauty_list:
file = open('less_common_words.txt', 'a+')
file.write('\n'+compile)
file.close()
foo += 1
Here's my text file:
['hello', 'sffr', '18/08/2019 21:36:43', '1']
['bye', 'asadf', '19/08/2019 18:23:15', '1']
['bye', 'asadf', '19/08/2019 19:53:15', '2']
I am trying to retrieve the number in the final column by inputting a string that matches the word in first column. If there are any entries with the same name, I want to return the highest number out of all those entries.
Here is my main script:
import csv
word = input('\nAdd a word: ')
sorted_dates = []
with open('history.txt', 'r') as readFile:
csvreader = csv.reader(readFile, delimiter=',', quotechar="'")
for i, line in enumerate(csvreader, 1):
if not line:
continue
if word in line[0]:
sorted_dates.extend(line[3][-3])
highestCount = max(sorted_dates)
print(sorted_dates)
When I type the word "bye", it returns:
['1']
['1','2']
Instead, I would like to return the maximum value out of those:
2
You're printing sorted_dates in the loop.
One option is to just print(max(sorted_dates) outside the loop (highestCount isn't needed for this example).
If you enter bye when prompted, your result will be the desired 2.
history.txt:
['hello', 'sffr', '18/08/2019 21:36:43', '1']
['bye', 'asadf', '19/08/2019 18:23:15', '1']
['bye', 'asadf', '19/08/2019 19:53:15', '2']
Code:
import csv
word = input('\nAdd a word: ')
sorted_dates = []
with open('history.txt', 'r') as readFile:
csvreader = csv.reader(readFile, delimiter=',', quotechar="'")
for i, line in enumerate(csvreader, 1):
if not line:
continue
if word in line[0]:
sorted_dates.extend(line[3][-3])
# highestCount = max(sorted_dates)
print(max(sorted_dates))
Output:
2
Another solution is json.loads:
#!/usr/bin/env python3.8
import json
from pathlib import Path
word = input('\nAdd a word: ')
sorted_dates = []
fname = 'history.txt'
ss = Path(fname).read_text().splitlines()
for line in ss:
if not line:
continue
data = json.loads(line.replace("'", '"'))
if word == data[0]:
sorted_dates.append(data[-1])
highest_count = max(sorted_dates)
print(f'{sorted_dates = }')
print(f'{highest_count = }')
Good, I currently have the following code:
n = 0
with open('/home/user/test.filter') as f:
lines = f.readlines()
for l in lines:
if lines[n].startswith('-A vlan_8'):
if "-o bond1.8" in lines[n]:
f = open('vlan8.filter_in', 'a')
f.write(l)
else:
f = open('vlan8.filter_out', 'a')
f.write(l)
if lines[n].startswith('-A vlan_10'):
if "-o bond1.10" in lines[n]:
f = open('vlan10.filter_in', 'a')
f.write(l)
else:
f = open('vlan10.filter_out', 'a')
f.write(l)
if lines[n].startswith('-A vlan_15'):
if "-o bond1.15" in lines[n]:
f = open('vlan15.filter_in', 'a')
f.write(l)
else:
f = open('vlan15.filter_out', 'a')
f.write(l)
# [...]
n = n + 1
I thought about optimizing it with some accumulator or list to not make the script so extensive. Any suggestions?
Sure you can. Maintain a list of these numbers as so:
numList = [8, 10, 15, ...]
Now, all you need is a little string formatting.
with open('/home/user/test.filter') as f:
lines = f.readlines()
for i, l in enumerate(lines): # I've used enumerate to eliminate n but really you don't need it
for num in numList:
if l.startswith('-A vlan_%d' %num):
if "-o bond1.%d" %num in l:
f = open('vlan%d.filter_in' %num, 'a')
else:
f = open('vlan%d.filter_out' %num, 'a')
f.write(l)
f.close()
break
I think you want to make the code cleaner, not faster. If that is so maybe this would work:
import re
parameter_re = re.compile(r'^-A vla_(\d+).*')
with open('data.csv') as f:
lines = f.readlines()
for line in lines:
# Match required parameter
match = parameter_re.match(line)
# Skip line if it doesn't match the required parameter
if not match:
continue
# Extract number from parameter
number = int(match.group(1))
# Create output parameter string
output_str = '-o bond1.%d' % number
# Select correct filename to save the line
if output_str in line:
output_filename = 'vlan%d.filter_in' % number
else:
output_filename = 'vlan%d.filter_out' % number
# Write the line to the correct file
with open(output_filename, 'a') as f:
f.write(line)
And if you want to make it shorter (which I don't recommend, better for it to be readable):
import re
with open('data.csv') as f:
lines = f.readlines()
for line in lines:
match = re.match(r'^-A vla_(\d+).*', line)
if not match:
continue
number = int(match.group(1))
if '-o bond1.%d' % number in line:
output_filename = 'vlan%d.filter_in' % number
else:
output_filename = 'vlan%d.filter_out' % number
with open(output_filename, 'a') as f:
f.write(line)
I have a text file like below
Mo, M, 15,
Jen, F, 14
My code below replaces the age for 'Mo'
newAge = "20"
result = ""
with open("file.txt") as f:
for line in f:
if line.lower().startswith( "mo," ):
list = line.split()
list[2] = str( newAge )
line = ", ".join( list )
result += line + '\n'
f = open("file.txt", 'w')
f.write(result)
f.close()
How ever the file afterwards look like
[,',,M,,o,,M,,2,,0,,',]
How to I format it to look like the:
Mo, M, 20,
Use the csv module for both reading and writing the file. Below is a tested example.
newAge = ' 20'
result = []
with open('file.txt','rb') as fin, open('file_out.txt','wb') as fou:
cr = csv.reader(fin)
cw = csv.writer(fou)
for line in cr:
if line[0].lower() == "mo":
line[2] = newAge
cw.writerow(line)
You could try this instead:
newAge = "20"
result = ""
with open("file.txt") as f:
for line in f:
if line.lower().startswith( "mo," ):
list = line.split()
list[2] = newAge
line = ''
for element in list:
line += str(element)
line += ', '
result += line + '\n'
with open('file.txt', 'w') as inf:
inf.write(result)
If you're particular about the space at the end of the last element, you could even do:
newAge = "20"
result = ""
with open("file.txt") as f:
for line in f:
if line.lower().startswith( "mo," ):
list = line.split()
list[2] = newAge
line = ''
for index, element in enumerate(list):
line += str(element)
if not index is len(list) -1:
line += ', '
else:
line += ','
result += line + '\n'
with open('file.txt', 'w') as inf:
inf.write(result)
like #azalea mentioned, use .split(', '). Also, you should only use the newline character on strings you build because the existing strings already contain the new line.
newAge = "20"
result = ""
with open("file.txt") as f:
for line in f:
if line.lower().startswith("mo"):
list = line.split(', ')
list[2] = str(newAge)
line = ", ".join(list) + '\n'
result += line
f = open("file2.txt", 'w')
f.write(result)
f.close()
You split the line by space...then you should join it by space!
newAge = "20"
result = ""
with open("file.txt") as f:
for line in f:
if line.lower().startswith( "mo," ):
list = line.split()
list[2] = str( newAge )
line = " ".join( list )+"\n"
result += line
f = open("file.txt", 'w')
f.write(result)
f.close()
I prefer using keeping it simple . Just use the string module.
You can use it in this way.
import string
toFind = '15'
newAge = '20'
with open('file.txt','r') as f:
text = f.readlines()
text[0] = string.replace(text[0],toFind,newAge)
with open('file.txt','w') as f:
for item in text:
f.write(item)
I hope this helps!
I have a text file that has following structure:
mom:2
dad:3
mom:4
dad:2
me:4
And I need to make a dictionary that would display each name only once, but adding the numeric values together. In this case it would look like this:
{dad':5, 'me':4, 'mom':6}
How I should approach this problem?
I've tried
d = {}
try:
file = open(file.txt, "r")
for line in file:
(a, b) = line.split(":")
d[a] = float(b)
except IOError:
print()
but i haven't found a way to count up the values.
with open('file.txt', 'r') as f:
fp = f.readlines()
t = [l.strip().split(':') for l in fp if l != '\n']
d = {}
for l in t:
d[l[0]] = d.setdefault(l[0], 0) + int(l[1])