Python - Reversing a Whole File

Python - Reversing a Whole File - python

I need to be able to reverse a whole file, or at least the contents of it. My code so far is:
def reverse_file(of, rf):
oldfile = open(of, 'r')
reversefile = open(rf, 'w')
filedata = oldfile.read()
rdata = str_reverse(filedata)
reversefile.write(rdata)
oldfile.close()
reversefile.close()
The problem is I need to define str_reverse and I'm not sure how to create a function that reverses everything. Any help?

If you want to reverse the entire file, you can just call write with data[::-1]
def reverse_file(of, rf):
with open(of) as oldfile:
with open(rf, "w") as reversefile:
reversefile.write(oldfile.read()[::-1])
example:
% cat testdata
line1
line2
line3
% cat reverse_file.py
def reverse_file(of, rf):
with open(of) as oldfile:
with open(rf, "w") as reversefile:
reversefile.write(oldfile.read()[::-1])
if __name__ == "__main__":
reverse_file("testdata", "newdata")
% python reverse_file.py
% cat newdata
3enil
2enil
1enil

To support files that do not fit in memory (based on #Darius Bacon's answer):
import os
from io import DEFAULT_BUFFER_SIZE
def reverse_blocks(file, blocksize=DEFAULT_BUFFER_SIZE):
"""Yield blocks from the file in reverse order."""
file.seek(0, os.SEEK_END) # move file position to the end
position = file.tell()
while position > 0:
delta = min(blocksize, position)
file.seek(position - delta, os.SEEK_SET)
yield file.read(delta)
position -= blocksize
# reverse input and write it to output
with open("input", "rb") as infile, open("output", "wb") as outfile:
for block in reverse_blocks(infile):
outfile.write(block[::-1])

Related

Store the contents of text file in return variable and manipulate wherever required in python?

I have below function & I am trying to get/store the contents of text file in another temp file(removing unnecessary line) with appending special character.
But I also want the same content which is in temp text file with different special character next time but I am not able to do that.Below function is creating a temp file.To get desired output I need to create file every time with same function again which is not good way.Is there anything we can do without creating a temp/extra file and store the contents in return variable and append the special character whatever we want multiple times
import os
import re
def mainfest():
pathfile = "abc_12.txt"
with open(pathfile, 'r') as firstfile, open('tmp.txt', 'r') as s:
for line in firstfile:
if line.strip().startswith("-") or line.startswith("<"):
print"ignore")
elif re.search('\\S', line):
name = str(os.path.basename(line))
s.write("*" +fname)
def contents():
temppath = "temp.txt"
with open(temp path, 'r') as f:
lines = f.readlines()
lines+= lines
return lines
manifest()
value = contents()
file abc_12.txt
---ABC-123
nice/abc.py
xml/abc.py
<<NOP-123
bac\nice.py
abc.py
---CDEF-345
jkl.oy
abc.py
I want the contents of abc_12.txt file I can get in return something like that
abc.py
abc.py
nice.py
abc.py
jkl.oy
abc.py
and manipulate them wherever I want similar to below output
Output 1:
* abc.py
* abc.py
* nice.py
* abc.py
* jkl.oy
* abc.py
Output 2:
##abc.py
##abc.py
##nice.py
##abc.py
##jkl.oy
##abc.py

Maybe first you should read file, search names and keep on list
def read_data():
results = []
with open("abc_12.txt") as infile:
for line in infile:
if line.strip().startswith(("-", "<")): # `startswith`/`endswith` can use tuple
print("ignore:", line)
elif re.search('\\S', line):
name = os.path.basename(line)
results.append(name)
return results
And later you can use this list to create temp file or other file
data = read_data()
with open('temp.txt', 'w') as outfile:
for line in data:
outfile.write(f'* {line}')
#print(f'* {line}', end='')
with open('other.txt', 'w') as outfile:
for line in data:
outfile.write(f'##{line}')
#print(f'##{line}', end='')
EDIT:
Minimal working code.
I used io.StringIO only to simulate file in memory - so everyone can simply copy and test it.
import os
import re
import io
text = r'''---ABC-123
nice/abc.py
xml/abc.py
<<NOP-123
bac\nice.py
abc.py
---CDEF-345
jkl.oy
abc.py
'''
def read_data():
results = []
with io.StringIO(text) as infile:
#with open("abc_12.txt") as infile:
for line in infile:
line = line.strip()
if line:
if line.startswith(("-", "<")): # `startswith`/`endswith` can use tuple
print("ignore:", line)
else:
name = os.path.basename(line)
results.append(name)
return results
data = read_data()
with open('temp.txt', 'w') as outfile:
for line in data:
outfile.write(f'* {line}\n')
print(f'* {line}')
with open('other.txt', 'w') as outfile:
for line in data:
outfile.write(f'##{line}\n')
print(f'##{line}')
EDIT:
If you don't want to save in file then you still need for-loop to create string
data = read_data()
string_1 = ''
for line in data:
string_1 += f'* {line}\n'
string_2 = ''
for line in data:
string_2 += f'##{line}\n'
or to create new list (and eventually string)
data = read_data()
list_1 = []
for line in data:
list_1.append(f'* {line}')
list_2 = []
for line in data:
list_2.append(f'##{line}')
string_1 = "\n".join(list_1)
string_2 = "\n".join(list_2)

multiprocessing always returns an empty file when doing apply_async

I have a file (input.txt) containing half-a-million lines, and I want to encrypt these lines with my encrypt function, and save them to one single file called output.txt. For example the input.txt is
aab
abb
abc
Then I want to have my output.txt to be
001
011
012
Simple for loop version
I have a working for loop, however it takes nearly 9 hours to encrypt all the lines:
encryption_map = {}
encryption_map['a']=0
encryption_map['b']=1
encryption_map['c']=2
def encrypt(input_str):
output_int = ''
for i in input_str:
for ch in i.split('\n')[0]: # remove line break symbol \n
output_int += str(encryption_map[ch])
return output_int
text_path = 'input.txt'
with open(text_path, 'r') as input_file:
lines = input_file.readlines()
with open('output.txt', 'w') as output_file:
for l in lines:
output_int = encrypt(l)
output_file.write(output_int + '\n')
apply_async version
Since I want to keep the same ordering, in the output.txt, it seems I have to use apply_async. Then my code becomes:
import multiprocessing as mp
encryption_map = {}
encryption_map['a']=0
encryption_map['b']=1
encryption_map['c']=2
def encrypt(input_str):
output_int = ''
for i in input_str:
for ch in i.split('\n')[0]: # remove line break symbol \n
output_int += str(encryption_map[ch])
return output_int
def write_result(output):
output_file.write(ipa_output + '\n')
# output_file.flush() # This line is suggested by another stack question
pool = mp.Pool(20)
text_path = 'input.txt'
with open(text_path, 'r') as input_file:
lines = input_file.readlines()
with open('output.txt', 'w') as output_file:
for l in lines:
pool.apply_async(encrypt, args=l, callback=write_result)
pool.close()
pool.join()
It runs much faster, however, the output.txt is always empty. What's wrong with my code? I found one post that also has difficulty in writing out the file, and they suggest us to put f.flush() inside the write function, but it also doesn't work.

You need to write args=(line,) like this:
import multiprocessing as mp
encryption_map = {}
encryption_map['a'] = 0
encryption_map['b'] = 1
encryption_map['c'] = 2
output_file = open('output.txt', 'w')
def encrypt(input_str):
output_int = ''
for i in input_str:
for ch in i.split('\n')[0]:
output_int += str(encryption_map[ch])
return output_int
def write_result(output):
output_file.write(output + '\n')
def main():
#mp.set_start_method('spawn') # Only needed on OSX
pool = mp.Pool(2)
with open('input.txt') as input_file:
lines = input_file.readlines()
for line in lines:
pool.apply_async(encrypt, args=(line,), callback=write_result)
pool.close()
pool.join()
output_file.close()
if __name__ == '__main__':
main()
EDIT:
In the above code, since we are using apply_async, the order of lines in the output might not be the same as that of the input.
If we want to preserve order, then we can either use map/map_async/imap.
In this case, imap might be the best option since the callback operation (IO bound) is much slower than the worker operation (CPU bound):
import multiprocessing as mp
encryption_map = {}
encryption_map['a'] = 0
encryption_map['b'] = 1
encryption_map['c'] = 2
output_file = open('output.txt', 'w')
def encrypt(input_str):
output_int = ''
for i in input_str:
for ch in i.split('\n')[0]:
output_int += str(encryption_map[ch])
return output_int
def main():
mp.set_start_method('spawn') # Only needed on OSX
pool = mp.Pool(2)
with open('input.txt') as input_file:
lines = input_file.readlines()
for output in pool.imap(encrypt, lines):
output_file.write(output + '\n')
pool.close()
pool.join()
if __name__ == '__main__':
main()

How to erase line from text file in Python?

I'm trying to make a code to rewrite a specific line from a .txt file.
I can get to write in the line i want, but i can't erase the previous text on the line.
Here is my code:
(i'm trying a couple of things)
def writeline(file,n_line, text):
f=open(file,'r+')
count=0
for line in f:
count=count+1
if count==n_line :
f.write(line.replace(str(line),text))
#f.write('\r'+text)
You can use this code to make a test file for testing:
with open('writetest.txt','w') as f:
f.write('1 \n2 \n3 \n4 \n5')
writeline('writetest.txt',4,'This is the fourth line')
Edit: For Some reason, if i use 'if count==5:' the code compiles ok (even if it doen't erase the previous text), but if i do 'if count==n_line: ', the file ends up with a lot of garbage.
The Answers work, but i would like to know what are the problems with my code, and why i can't read and write. Thanks!

You are reading from the file and also writing to it. Don't do that. Instead, you should write to a NamedTemporaryFile and then rename it over the original file after you finish writing and close it.
Or if the size of the file is guaranteed to be small, you can use readlines() to read all of it, then close the file, modify the line you want, and write it back out:
def editline(file,n_line,text):
with open(file) as infile:
lines = infile.readlines()
lines[n_line] = text+' \n'
with open(file, 'w') as outfile:
outfile.writelines(lines)

Use temporary file:
import os
import shutil
def writeline(filename, n_line, text):
tmp_filename = filename + ".tmp"
count = 0
with open(tmp_filename, 'wt') as tmp:
with open(filename, 'rt') as src:
for line in src:
count += 1
if count == n_line:
line = line.replace(str(line), text + '\n')
tmp.write(line)
shutil.copy(tmp_filename, filename)
os.remove(tmp_filename)
def create_test(fname):
with open(fname,'w') as f:
f.write('1 \n2 \n3 \n4 \n5')
if __name__ == "__main__":
create_test('writetest.txt')
writeline('writetest.txt', 4, 'This is the fourth line')

Splitting a text file

I have a folder with a set of text documents. I want to split each document to two or three documents, each one should be 45-70kb.
How сan I do it? I tried:
def split_file(filename, pattern, size):
with open(filename, 'rb') as f:
for index, line in enumerate(f, start=1):
with open(pattern.format(index), 'wb') as out:
n=0
for line in chain([line], f):
out.write(line)
n += len(line)
if n >= 450000 and n <=700000:
break
if __name__ == '__main__':
split_file('folderadress', 'part_{0:03d}.txt', 20000)
but it seems to me it's completely wrong.

This uses a different approach to yours. I have set the maximum size for each file to be 1000 bytes for testing purposes:
import glob
import os
dname = './gash' # directory name
unit_size = 1000 # maximum file size
for fname in glob.iglob("%s/*" % dname):
with open(fname, 'rb') as fo:
data = True
n = 1
while data:
# read returns "" (False) on EOF
data = fo.read(unit_size)
if data:
sub_fname = fname + str(n)
with open(sub_fname, 'wb') as out:
out.write(data)
n += 1
What this might do is to split a line between files, however you do not state if this could be an issue or not.

f.seek() and f.tell() to read each line of text file

I want to open a file and read each line using f.seek() and f.tell():
test.txt:
abc
def
ghi
jkl
My code is:
f = open('test.txt', 'r')
last_pos = f.tell() # get to know the current position in the file
last_pos = last_pos + 1
f.seek(last_pos) # to change the current position in a file
text= f.readlines(last_pos)
print text
It reads the whole file.

ok, you may use this:
f = open( ... )
f.seek(last_pos)
line = f.readline() # no 's' at the end of `readline()`
last_pos = f.tell()
f.close()
just remember, last_pos is not a line number in your file, it's a byte offset from the beginning of the file -- there's no point in incrementing/decrementing it.

Is there any reason why you have to use f.tell and f.seek? The file object in Python is iterable - meaning that you can loop over a file's lines natively without having to worry about much else:
with open('test.txt','r') as file:
for line in file:
#work with line

A way for getting current position When you want to change a specific line of a file:
cp = 0 # current position
with open("my_file") as infile:
while True:
ret = next(infile)
cp += ret.__len__()
if ret == string_value:
break
print(">> Current position: ", cp)

Skipping lines using islice works perfectly for me and looks like is closer to what you're looking for (jumping to a specific line in the file):
from itertools import islice
with open('test.txt','r') as f:
f = islice(f, last_pos, None)
for line in f:
#work with line
Where last_pos is the line you stopped reading the last time. It will start the iteration one line after last_pos.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python - Reversing a Whole File - python

Related

Store the contents of text file in return variable and manipulate wherever required in python?

multiprocessing always returns an empty file when doing apply_async

How to erase line from text file in Python?

Splitting a text file

f.seek() and f.tell() to read each line of text file

Categories

Resources