I have a .txt with:
#Date 111111:UhUidsiIds
#Name Sebastian-Forset
#Date 222222:UdfasdUDsa
#Name Sebastian_Forset2
#Date 333333:UDsafduD
#Name Solaris Mage
#Date 444444:Ghdsasra
#Name Marge S
and a file whith:
#Name Sebastian Forset
#Date 191020
#Name Sebastian Forset2
#Date 201020
#Date Homer S
#Date 281902
The names are the same, with some differences of characters (spaces, -, _ etc.)
I would copy the numbers of the second file to the first file in order to have a final file txt with:
#Name Sebastian Forset
#Date 191020:UhUidsiIds
#Name Sebastian Forset2
#Date 201020:UdfasdUDsa
#Name Solaris Mage
#Date 281902:UDsafduD
#Name Marge S
#Date 444444:Ghdsasra
This is my code, but merge the file, copy only same name
def isInFile(l, f):
with open(f, 'r') as f2:
for line in f2:
if l == line:
return True
return False
def similitudes(file1, file2):
same = 0
data = ''
copy = False
with open(file1, 'r') as f1:
for line in f1:
if copy == True:
data += line
if line == '\n' or line[0:6] != '#Name ':
copy = False
if (line[0:6] == '#Name ') or line[0:6] == '#Date ':
print line
if isInFile(line, file2) == True:
copy = True
data += line
print "true"
else:
print "ok"
same += 1
return data
def main(argv=2):
print (sys.argv[1])
print (sys.argv[2])
if argv == 2:
out = open('final.txt', 'w')
data = (
similitudes(sys.argv[1], sys.argv[2]) + '\n'
)
out.write(data)
out.close()
else:
print ("This program need 2 files")
exit (0)
return 0
if __name__ == '__main__':
status = main()
sys.exit(status)
First, list out the characters that will differ. Let's say "-" , "_" and " ".
Now split the two strings using these delimiters. you can use "re" package in python.
>>> a='Mr-Sebastian_Forset '
>>> import re
>>> re.split('- |_ | ',a)
['Mr', 'Sebastian', 'Forset']
If the resultant lists for the two strings are equal, paste the number in second file in first one.
You can use the same delimiter concept to split the number and paste it in other file.
Adding another answer, which will points out the bug in your code
Coming to the following piece of code
if (line[0:6] == '#Name ') or line[0:6] == '#Date ':
print line
if isInFile(line, file2) == True:
copy = True
data += line
Here, you are checking If your line starts with either "#Name " or "#Date ", and calling isInFile() method with line and file2 as arguments.
This is the first issue, there is no use of sending just one line that starts with "#Name " in your case.
If the current line starts with "#Date ", send the previous line and file as arguments to this method.
And second Issue is with the isInFile() definition, which is doing effectively nothing.
if l == line:
return true
You are just checking if two lines in file1 and file2 are same and if yes, you writing this line in sysout.
So, your program will just print the common lines between file1 and file2.
Modified code should like the below one:
def isInFile(l, f):
line_found = false
required_line = null
with open(f, 'r') as f2:
for line in f2:
if line_found:
required_line = line
break
elif l == line:
line_found = true
return (line_found, required_line)
def similitudes(file1, file2):
same = 0
data = ''
copy = False
previous_line = null
with open(file1, 'r') as f1:
for line in f1:
if copy == True:
data += line
if line == '\n' or line[0:6] != '#Name ':
copy = False
if (line[0:6] == '#Name '):
print line
previous_line = line
elif line[0:6] == '#Date ':
print line
file2_line_info = isInFile(previous_line, file2)
if file2_line_info[0] == True:
copy = True
data += file2_line_info[1]
print "true"
return data
def main(argv=2):
print (sys.argv[1])
print (sys.argv[2])
if argv == 2:
out = open('final.txt', 'w')
data = (
similitudes(sys.argv[1], sys.argv[2]) + '\n'
)
out.write(data)
out.close()
else:
print ("This program need 2 files")
exit (0)
return 0
if __name__ == '__main__':
status = main()
sys.exit(status)
Note: This is not the pythonic way of doing things. As I have mentioned in the above answer https://stackoverflow.com/a/34696778/3534696 use "re" module and solve the problem efficiently.
Read the first file into a dictionary, using maketrans/translate to clean up the name.
Using zip(file, file) to read 2 lines of the file at a time makes it much easier to handle.
And using .split(' ', 1)[1] to get rid of the first column.
And .strip() to get rid of any surrounding whitespace (i.e. \n)
Then you can read the second file updating the dictionary.
In Python3 this looks like:
>>> punc = str.maketrans('_-', ' ') # import string & string.maketrans() in Py2
>>> with open(filename1) as file1, open(filename2) as file2:
... data = {name.split(' ', 1)[1].strip().translate(punc):
... date.split(' ', 1)[1].strip().split(':')
... for name, date in zip(file1, file1)}
... for n, d in zip(file2, file2):
... data[n.split(' ', 1)[1].strip()][0] = d.split(' ', 1)[1].strip()
>>> data
{'Marge S': ['444444', 'Ghdsasra'],
'Sebastian Forset': ['191020', 'UhUidsiIds'],
'Sebastian Forset2': ['201020', 'UdfasdUDsa'],
'Solaris Mage': ['281902', 'UDsafduD']}
After that it is just a matter of writing the dictionary out to a new file.
>>> with open(<output>, 'w+') as output:
... for name, date in data.items():
... output.write('#Name {}\n'.format(name))
... output.write('#Date {}:{}\n'.format(*date))
Note: I had to change 'Homer S' to 'Solaris Mage' in the second file to get the stated output.
Related
I have a very large file of about 900k values. It is a repetition of values like
/begin throw
COLOR red
DESCRIPTION
"cashmere sofa throw"
10
10
156876
DIMENSION
140
200
STORE_ADDRESS 59110
/end throw
The values keep changing, but I need it like below:
/begin throw
STORE_ADDRESS 59110
COLOR red
DESCRIPTION "cashmere sofa throw" 10 10 156876
DIMENSION 140 200
/end throw
Currently, my approach is removing the new line and including space in them:
the store address is constant throughout the file so I thought of removing it from the index and inserting it before the description
text_file = open(filename, 'r')
filedata = text_file.readlines();
for num,line in enumerate(filedata,0):
if '/begin' in line:
for index in range(num, len(filedata)):
if "store_address 59110 " in filedata[index]:
filedata.remove(filedata[index])
filedata.insert(filedata[index-7])
break
if "DESCRIPTION" in filedata[index]:
try:
filedata[index] = filedata[index].replace("\n", " ")
filedata[index+1] = filedata[index+1].replace(" ","").replace("\n", " ")
filedata[index+2] = filedata[index+2].replace(" ","").replace("\n", " ")
filedata[index+3] = filedata[index+3].replace(" ","").replace("\n", " ")
filedata[index+4] = filedata[index+4].replace(" ","").replace("\n", " ")
filedata[index+5] = filedata[index+5].replace(" ","").replace("\n", " ")
filedata[index+6] = filedata[index+6].replace(" ","").replace("\n", " ")
filedata[index+7] = filedata[index+7].replace(" ","").replace("\n", " ")
filedata[index+8] = filedata[index+8].replace(" ","")
except IndexError:
print("Error Index DESCRIPTION:", index, num)
if "DIMENSION" in filedata[index]:
try:
filedata[index] = filedata[index].replace("\n", " ")
filedata[index+1] = filedata[index+1].replace(" ","").replace("\n", " ")
filedata[index+2] = filedata[index+2].replace(" ","").replace("\n", " ")
filedata[index+3] = filedata[index+3].replace(" ","")
except IndexError:
print("Error Index DIMENSION:", index, num)
After which I write filedata into another file.
This approach is taking too long to run(almost an hour and a half) because as mentioned earlier it is a large file.
I was wondering if there was a faster approach to this issue
You can read the file structure by structure so that you don't have to store the whole content in memory and manipulate it there. By structure, I mean all the values between and including /begin throw and /end throw. This should be much faster.
def rearrange_structure_and_write_into_file(structure, output_file):
# TODO: rearrange the elements in structure and write the result into output_file
current_structure = ""
with open(filename, 'r') as original_file:
with open(output_filename, 'w') as output_file:
for line in original_file:
current_structure += line
if "/end throw" in line:
rearrange_structure_and_write_into_file(current_structure, output_file)
current_structure = ""
The insertion and removal of values from a long list is likely to make this code slower than it needs to be, and also makes it vulnerable to any errors and difficult to reason about. If there are any entries without store_address then the code would not work correctly and would search through the remaining entries until it finds a store address.
A better approach would be to break down the code into functions that parse each entry and output it:
KEYWORDS = ["STORE_ADDRESS", "COLOR", "DESCRIPTION", "DIMENSION"]
def parse_lines(lines):
""" Parse throw data from lines in the old format """
current_section = None
r = {}
for line in lines:
words = line.strip().split(" ")
if words[0] in KEYWORDS:
if words[1:]:
r[words[0]] = words[1]
else:
current_section = r[words[0]] = []
else:
current_section.append(line.strip())
return r
def output_throw(throw):
""" Output a throw entry as lines of text in the new format """
yield "/begin throw"
for keyword in KEYWORDS:
if keyword in throw:
value = throw[keyword]
if type(value) is list:
value = " ".join(value)
yield f"{keyword} {value}"
yield "/end throw"
with open(filename) as in_file, open("output.txt", "w") as out_file:
entry = []
for line in in_file:
line = line.strip()
if line == "/begin throw":
entry = []
elif line == "/end throw":
throw = parse_lines(entry)
for line in output_throw(throw):
out_file.write(line + "\n")
else:
entry.append(line)
Or if you really need to maximize performance by removing all unnecessary operations you could read, parse and write in a single long condition, like this:
with open(filename) as in_file, open("output.txt", "w") as out_file:
entry = []
in_section = True
def write(line):
out_file.write(line + "\n")
for line in in_file:
line = line.strip()
first = line.split()[0]
if line == "/begin throw":
in_section = False
write(line)
entry = []
elif line == "/end throw":
in_section = False
for line_ in entry:
write(line_)
write(line)
elif first == "STORE_ADDRESS":
in_section = False
write(line)
elif line in KEYWORDS:
in_section = True
entry.append(line)
elif first in KEYWORDS:
in_section = False
entry.append(line)
elif in_section:
entry[-1] += " " + line
I have INI file formatted like this:
But i need it to look like this:
What would be the easiest solution to write such converter?
I tried to do it in Python, but it don't work as expected. My code is below.
def fix_INI_file(in_INI_filepath, out_INI_filepath):
count_lines = len(open( in_INI_filepath).readlines() )
print("Line count: " + str(count_lines))
in_INI_file = open(in_INI_filepath, 'rt')
out_arr = []
temp_arr = []
line_flag = 0
for i in range(count_lines):
line = in_INI_file.readline()
print (i)
if line == '':
break
if (line.startswith("[") and "]" in line) or ("REF:" in line) or (line == "\n"):
out_arr.append(line)
else:
temp_str = ""
line2 = ""
temp_str = line.strip("\n")
wh_counter = 0
while 1:
wh_counter += 1
line = in_INI_file.readline()
if (line.startswith("[") and "]" in line) or ("REF:" in line) or (line == "\n"):
line2 += line
break
count_lines -= 1
temp_str += line.strip("\n") + " ; "
temp_str += "\n"
out_arr.append(temp_str)
out_arr.append(line2 )
out_INI_file = open(out_INI_filepath, 'wt+')
strr_blob = ""
for strr in out_arr:
strr_blob += strr
out_INI_file.write(strr_blob)
out_INI_file.close()
in_INI_file.close()
Fortunately, there's a much easier way to handle this than by parsing the text by hand. The built-in configparser module supports keys without values via the allow_no_values constructor argument.
import configparser
read_config = configparser.ConfigParser(allow_no_value=True)
read_config.read_string('''
[First section]
s1value1
s1value2
[Second section]
s2value1
s2value2
''')
write_config = configparser.ConfigParser(allow_no_value=True)
for section_name in read_config.sections():
write_config[section_name] = {';'.join(read_config[section_name]): None}
with open('/tmp/test.ini', 'w') as outfile:
write_config.write(outfile)
While I don't immediately see a way to use the same ConfigParser object for reading and writing (it maintains default values for the original keys), using the second object as a writer should yield what you're looking for.
Output from the above example:
[First section]
s1value1;s1value2
[Second section]
s2value1;s2value2
I need to write my Python shell to an output text file. I have some of it written into an output text file but all I need is to now add the number of lines and numbers in each line to my output text file.
I have tried to add another for loop outside the for loop. I've tried putting it inside the for loop and it was just complicated.
Text file list of numbers:
1.0, 1.12, 1.123
1.0,1.12,1.123
1
Code:
import re
index = 0
comma_string = ', '
outfile = "output2.txt"
wp_string = " White Space Detected"
tab_string = " tab detected"
mc_string = " Missing carriage return"
ne_string = " No Error"
baconFile = open(outfile,"wt")
with open("Version2_file.txt", 'r') as f:
for line in f:
flag = 0
carrera = ""
index = index +1
print("Line {}: ".format(index))
baconFile.write("Line {}: ".format(index))
if " " in line: #checking for whitespace
carrera = carrera + wp_string + comma_string + carrera
flag = 1
a = 1
if "\t" in line: #checking for tabs return
carrera = carrera + tab_string + comma_string + carrera
flag = 1
if '\n' not in line:
carrera = carrera + mc_string + ne_string + carrera
flag = 1
if flag == 0: #checking if no error is true by setting flag equal to zero
carrera = ne_string
print('\t'.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line )))
print (carrera)
baconFile.write('\t'.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line ) ))
baconFile.write(carrera + "\n")
with open("Version2_file.txt", 'r') as f:
content = f.readlines()
print('Number of Lines: {}'.format(len(content)))
for i in range(len(content)):
print('Numbers in Line {}: {}'.format(i+1, len(content[i].split(','))))
baconFile.write('Number of lines: {}'.format(len(content)))
baconFile.write('Numbers in Line {}: {}'.format(i+1, len(content[i].split(','))))
baconFile.close()
Expected to write in output file:
Line 1: 1 2 3 Tab detected, whitespace detected
Line 2: 1 2 3 No error
Line 3: 1 Missing carriage return No error
Number of Lines: 3
Numbers in Line 1: 3
Numbers in Line 2: 3
Numbers in Line 3: 1
Actual from output file:
Line 1: 1 3 2White Space Detected, tab detected, White Space Detected,
Line 2: 1 3 2No Error
Line 3: 0Missing carriage returnNo Error
Number of lines: 3Numbers in Line 1: 3Number of lines: 3Numbers in Line 2: 3Numb
You have closed baconFile in the first open block, but do not open it again in the second open block. Additionally, you never write to baconFile in the second open block, which makes sense considering you've not opened it there, but then you can't expect to have written to it. It seems you simply forgot to add some write statements. Perhaps you confused write with print. Add those write statements in and you should be golden.
baconFile = open(outfile,"wt")
with open("Version2_file.txt", 'r') as f:
for line in f:
# ... line processing ...
baconFile.write(...) # line format info here
# baconFile.close() ## <-- move this
with open("Version2_file.txt", 'r') as f:
content = f.readlines()
baconFile.write(...) # number of lines info here
for i in range(len(content)):
baconFile.write(...) # numbers in each line info here
baconFile.close() # <-- over here
Here's a useful trick you can use to make print statements send their output to a specified file instead of the screen (i.e. stdout):
from contextlib import contextmanager
import os
import sys
#contextmanager
def redirect_stdout(target_file):
save_stdout = sys.stdout
sys.stdout = target_file
yield
sys.stdout = save_stdout
# Sample usage
with open('output2.txt', 'wt') as target_file:
with redirect_stdout(target_file):
print 'hello world'
print 'testing', (1, 2, 3)
print 'done' # Won't be redirected.
Contents of output2.txt file after running the above:
hello world
testing (1, 2, 3)
Here is this block of code I'm trying to finish:
elif parameter == 'statistics':
outfile.write(stats(infile))
for line in infile:
outfile.write(line)
So essentially, I am trying to write the statistics of the file into the new file that is being copied. The statistics works and everything as when I open the file, the statistics are written in. However, I noticed because of the two outfile.write it seems to close after the first one, so only the statistics go in and not the rest of the content in the original file.
The error that I am getting is this:
ValueError: I/O operation on closed file.
I am unsure why the file is closing.
EDIT: Here is the whole code, as requested
def copy_file():
infile_name = input("Please enter the name of the file to copy: ")
infile = open(infile_name, 'r', encoding='utf8')
parameter = input("Please enter a parameter(line numbers, Gutenberg trim, statistics, none): ")
outfile_name = input("Please enter the name of the new copy: ")
outfile = open(outfile_name, 'w', encoding='utf8')
counter = 1
if parameter == 'line numbers':
for line in infile:
outfile.write(f' {counter:6}: {line}')
counter += 1
elif parameter == 'Gutenberg trim':
copyStart = False
for line in infile:
#print(line.strip())
if '*** START' in line.strip():
copyStart = True
continue
elif '*** END' in line.strip():
copyStart = False
break
if copyStart == True:
outfile.write(line)
elif parameter == 'statistics':
outfile.write(stats(infile))
for line in infile:
outfile.write(line)
else:
for line in infile:
outfile.write(line)
infile.close()
outfile.close()
copy_file()
EDIT2: So sorry for not including it. Here is the stats function:
def stats(text) -> str:
with text as infile:
totallines = 0
emplines = 0
characters = 0
for line in infile:
totallines += 1
characters += len(line)
if len(line.strip()) == 0:
emplines += 1
lines = totallines - emplines
totalaveChars = characters/totallines
nonempaveChars = characters/lines
result = (f'{totallines:5} lines in list \n'
f'{emplines:5} empty lines in list \n'
f'{totalaveChars:5.1f} average characters per line \n'
f'{nonempaveChars:5.1f} average chars per non-empty line')
return result
print(stats(open('ASH.txt', 'r', encoding='utf8')))
Here is the result from stats:
13052 lines in list
2666 empty lines in list
44.6 average characters per line
56.0 average chars per non-empty line
The issue is in the stats function. The with statement will close the file with the local name text, which is infile in your case!
def stats(text) -> str:
totallines = 0
emplines = 0
characters = 0
for line in text:
totallines += 1
characters += len(line)
if len(line.strip()) == 0:
emplines += 1
lines = totallines - emplines
totalaveChars = characters/totallines
nonempaveChars = characters/lines
result = (f'{totallines:5} lines in list \n'
f'{emplines:5} empty lines in list \n'
f'{totalaveChars:5.1f} average characters per line \n'
f'{nonempaveChars:5.1f} average chars per non-empty line')
return result
In your main program, you passed to the function stats the variable infile, which is a file. You do not need to reopen it with with inside the stats functions. Moreover, with will ensure the closing at the end. Thus in your main loop, the infile is closed after the call on stats.
Try the following;
def copy_file():
infile_name = input("Please enter the name of the file to copy: ")
parameter = input("Please enter a parameter(line numbers, Gutenberg trim, statistics, none): ")
outfile_name = input("Please enter the name of the new copy: ")
counter = 1
with open(infile_name, 'r', encoding='utf8') as infile:
with open(outfile_name, 'w', encoding='utf8') as outfile:
if parameter == 'line numbers':
for line in infile:
outfile.write(f' {counter:6}: {line}')
counter += 1
elif parameter == 'Gutenberg trim':
copyStart = False
for line in infile:
#print(line.strip())
if '*** START' in line.strip():
copyStart = True
continue
elif '*** END' in line.strip():
copyStart = False
break
if copyStart == True:
outfile.write(line)
elif parameter == 'statistics':
outfile.write(stats(infile))
for line in infile:
outfile.write(line)
else:
for line in infile:
outfile.write(line)
copy_file()
Using with open(filename, 'r') as file: it will automatically close the file once the operation has finished, and not before.
elif parameter == 'statistics':
outfile.write(stats(infile))
for line in infile:
outfile.write(line)
... only the statistics go in and not the rest of the content in the
original file ...
My educated guess is that the stats function consumes and possibly
closes the input stream (IS).
If stats is somehow well behaved and limits itself to consuming the
IS, one can rewind it
...
infile.seek(0) # rewind the input stream
for line in infile:
outfile.write(line)
If, on the other hand, stats is a bit disruptive and closes
altogether the IS one can use the .name attribute of the file object
to reopen it, like this
...
for line in open(infile.name):
outfile.write(line)
This second solution works even in the first, milder hypotesis and
works even if the code was passed the infile file object from a
outer call.
Another possibility, if you can access and modify the stats source
code, is to undo the reading performed by the function, memorizing
the current position in the input stream before any read operation
and later rewind the IS to that position
def stats(infile):
...
current_pos = infile.tell()
# do your stuff
...
infile.seek(current_pos)
return workload
For this to work, of course, the file object has not to be closed
before the .seek(), either explicitly (by a .close()) or
implicitly (by falling outside the scope of a with block).
If this is your situation (closed file), please remove either the
explicit infile.close() or the (unnecessary) with statement and
the rewind will be correct.
I am trying to create a program that gives the user a short quiz and create a score, which I have done, then I would like to add them to a list in a .txt file. In the program I will ask them their name, so say I have a list such as this;
Bob,7
Bill,5
Jane,6
and someone takes the quiz and inputs the name Bob and gets a score 4 the list will update to;
Bob,4
Bill,5
Jane,6
or someone new takes a quiz, Sarah it will change to;
Bob,4
Bill,5
Jane,6
Sarah,7
So far I have;
import random
file = open("scores.txt", "r")
UserScore=random.randint(0,10)
lines = file.readlines()
file.close()
student=input('What is your name? ')
file = open("scores.txt", "w")
for line in lines:
line = line.strip()
name, score = line.strip().split(",")
if name!=student:
file.write(line)
else:
file.write(name +',' +str(UserScore))
I've randomised the score for now to make it easier to read, however that will be from what the user answered correctly, and I thought this code would read the file then check each name from each line and if the name they entered is the same to the name in the list the line will be replaced with the name and score. However, the file just ends up blank, what am I doing wrong?
Here is what I think is a better idea using the Python pickle module:
In [1]: import pickle
In [2]: scores={'Bob':75, 'Angie':60, 'Anita':80} #create a dict called scores
In [3]: pickle.dump(scores,open('scores.dat','wb')) #dump the pickled object into the file
In [4]: !ls scores.dat #verify that the file has been created
scores.dat
In [5]: !cat scores.dat #list out the file
(dp0
S'Bob'
p1
I75
sS'Angie'
p2
I60
sS'Anita'
p3
I80
s.
In [9]: tscores = pickle.load(open('scores.dat','rb')) #Verification: load the pickled object from the file into a new dict
In [10]: tscores #Verification: list out the new dict
Out[10]: {'Angie': 60, 'Anita': 80, 'Bob': 75}
In [11]: scores == tscores #Verify that the dict object is equivalent to the newly created dict object
Out[11]: True
I tried your code and the first time you run it, then you rewrite the file in one single line. So the next time you run the script on this single line file, you get an unpack exception in the split function and hence you write nothing to the file, resulting in an empty file.
A solution could be to add the newline char again when writing the lines to the file.
import random
file = open("scores.txt", "r")
UserScore=random.randint(0,10)
lines = file.readlines()
file.close()
student=input('What is your name? ')
file = open("scores.txt", "w")
for line in lines:
line = line.strip()
name, score = line.strip().split(",")
if name!=student:
file.write(line + '\n')
else:
file.write(name +',' +str(UserScore) + '\n')
This should do what you want
import random
file = open("scores.txt", "r")
UserScore=random.randint(0,10)
lines = file.readlines()
file.close()
student=input('What is your name? ')
flag = True
file = open("scores.txt", "w")
for line in lines:
line = line.strip()
name, score = line.strip().split(",")
if name!=student:
file.write(line + '\n')
else:
file.write(name +',' +str(UserScore) + '\n')
flag = False
if flag:
file.write(student +',' +str(UserScore) + '\n')
I adjusted a bit of your code and took the liberty to remove the random part and name, score part. But I got some working code. I assume you can make it work for your situation.
file = open("scores.txt", "r+")
lines = file.readlines()
file.close()
us = 15
student = input('What is your name? ')
ls = []
file = open("scores.txt", "r+")
found_student = False
for line in lines:
line = line.strip()
ls = line.split(",")
print("Parsing: " + str(ls))
if not line:
print("Empty line")
pass
elif ls[0] != student:
file.write(line + "\n")
else:
found_student = True
file.write(ls[0] + ',' + str(us) + "\n")
if not found_student:
file.write(student + ',' + str(us) + "\n" )
file.close()