mmap write followed by flush does not seem to write to disk - python

I want the following to replace the binary string \x01\x02\x03 in the file test.bin with the binary string \x04\x05\x06. When I run the script it runs without error. However, when I run cmp -bl test.bin test.bin.old (where test.bin.old is an unmodified copy of the original file) then I get nothing (i.e. test.bin has not been modified). What's wrong please?
#!/usr/bin/python
import mmap
filename = "test.bin"
with open(filename, "r+b") as f:
mm = mmap.mmap(f.fileno(), 0, mmap.ACCESS_WRITE)
oldstr = '\x01\x02\x03'
foundpos = mm.find(oldstr)
if foundpos == -1:
print "Could not find string to replace"
exit(-1)
mm.seek(foundpos)
mm.write('\x04\x05\x06')
mm.flush()
mm.close()
f.close()
exit(0)

Related

Check if command line argument is already used

I'm trying to check reverse lookup of IP address (argument). and then write the result to txt file.
How I can check if the IP address (argument) is already registered in the file? If so, I need to get out of the script.
My script:
import sys, os, re, shlex, urllib, subprocess
cmd = 'dig -x %s #192.1.1.1' % sys.argv[1]
proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
out, err = proc.communicate()
# Convert to list of str lines
out = out.decode().split('\n')
# Only write the line containing "PTR"
with open("/tmp/test.txt", "w") as f:
for line in out:
if "PTR" in line:
f.write(line)
If the file is not too large you could do:
with open('file.txt','r') as f:
content = f.read()
if ip in content:
sys.exit(0)
Now if the file is big and you want to avoid possible memory problems you could use mmap like so:
import mmap
with open("file.txt", "r+b") as f:
# memory-map the file, size 0 means whole file
mm = mmap.mmap(f.fileno(), 0)
if mm.find(ip) != -1:
sys.exit(0)
The mmap.find(string[, start[, end]]) is well documented here.
Something like:
otherIps = [line.strip() for line in open("<path to ipFile>", 'r')]
theIp = "192.168.1.1"
if theIp in otherIps:
sys.exit(0)
otherIps contains a list of the ip addresses on ipFile, then you need to check if theIp is already on otherIps, if so, exit the script.

Python how to make an executable from a text file

So what i want to do is write a .exe bynary to a .txt file and then write the .txt file to a .exe bynary, i tried this:
with open("old_File.exe", "rb") as f:
text_file = open("File.txt", "w")
byte = f.read(1)
while byte != "":
text_file.write(str(byte))
byte = f.read(1)
text_file.close()
with open("New_File.exe", "wb") as f:
text_file = open("File.txt", "r")
byte = text_file.read(12)
while byte != "":
print byte
f.write(byte)
byte = text_file.read(12)
text_file.close()
f.close()
but if i run the New_File.exe windows tels me it is not a valid aplication.
What am doing wrong?
The answer is:
The second time you were reading the *.txt file, you didn't open it in read binary mode, just in read, which is in fact, read text mode.
With older versions of Python it was platform dependent, i.e. this will be a problem only on Windows.
In Python 3, this will make you a problem on any platform.
Advice: Don't read a file in so small chunks if you don't have to, you will throttle poor Windows. Do it with at least 1024. It's often done with 4096 bytes. If the file is small, just do newfile.write(oldfile.read()) Todays PCs have enough RAM to put few MB in it without any problem.
And, there is no need for str(byte) as it is already a string.
To copy two files and preserve metadata, use shutil.copy2. This is a much safer way to copy files.
i foud the answer myself:
exe = open("exe.exe", "rb")
txt = open("txt.txt", "wb")
data = exe.read(100000)
while data != "":
txt.write(data)
data = exe.read(100000)
exe.close()
txt.close()
you actually have to write the binary on the text file instead of writing it as a string on the file itself.
#create new file
N_exe = open("N-exe.exe", "w+")
N_exe.close()
N_exe = open("N-exe.exe", "wb")
Txt = open("txt.txt", "rb")
data = Txt.read(100000)
while data != "":
N_exe.write(data)
data = Txt.read(100000)
N_exe.close()
Txt.close()

Error when trying to read and write multiple files

I modified the code based on the comments from experts in this thread. Now the script reads and writes all the individual files. The script reiterates, highlight and write the output. The current issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file.
Here is the modified code:
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = source+'\\'+f
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w+')
outfile.seek(0)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()
The error message tells you what the error is:
No such file or directory: 'sample1.html'
Make sure the file exists. Or do a try statement to give it a default behavior.
The reason why you get that error is because the python script doesn't have any knowledge about where the files are located that you want to open.
You have to provide the file path to open it as I have done below. I have simply concatenated the source file path+'\\'+filename and saved the result in a variable named as filepath. Now simply use this variable to open a file in open().
import os
import sys
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = source+'\\'+f # This is the file path
infile = open(filepath, 'r')
Also there are couple of other problems with your code, if you want to open the file for both reading and writing then you have to use r+ mode. More over in case of Windows if you open a file using r+ mode then you may have to use file.seek() before file.write() to avoid an other issue. You can read the reason for using the file.seek() here.

Python non-specific write exception

Currently teaching myself Python, and learning file I/O by writing a script to both read from and add text to an existing file. The script runs up until I call the write() method, at which point it throws out a non-specific exception - this is the traceback:
File "test.py", line 13, in <module>
f.write(txt)
IOError: [Errno 0] Error
My code:
from sys import argv
script, filename = argv
f = open(filename, 'a+')
print("The contents of %s are:") % filename
print f.read()
txt = raw_input("What would you like to add? ")
f.write(txt)
print("The new contents are:")
print f.read()
f.close()
My environment is Python 2.7.3 in Win7, PowerShell, and Notepad++.
What is causing this? How would I fix it? In my understanding, the a+ access mode should allow me to both read and append to the file. Changing the access mode to r+ yields the same exception.
Clarifications:
I have an existing text file (a.txt) with a single word in it that I pass as an argument to the script, like so:
python test.py a.txt
I am under an admin account in Windows.
Results:
At the minimum, adding two seek() commands fixes the issue - detailed in the answer post.
A problem when one tries to add a text of little size: it remains in the buffer, that keeps the text before the real writing is done after receiving more data.
So, to be sure to write really, do as it is described in the doc concerning os.fsync() and flush()
By the way, it is better to use the with statement.
And it's still more better to use binary mode. In your case, there shouldn't be a problem because you just add text after the reading and just use seek(o,o) . But when one wants to move correctly the file's pointer into the bytes of the file, it is absolutely necessary to use binary mode [ the 'b' in open(filename, 'rb+') ]
I personnaly never use 'a+', I've never understood what are its effects.
from sys import argv
from os import fsync
script, filename = argv
with open(filename, 'rb+') as f:
print("The contents of %s are:") % filename
print f.read()
f.seek(0,2)
txt = raw_input("What would you like to add? ")
f.write(txt)
f.flush()
fsync(f.fileno())
f.seek(0,0)
print("The new contents are:")
print f.read()
For some reason print f.read() doesn't work for me on OS X when you have opened the file in a+ mode.
On Max OS X, changing the open mode to r+ and then adding a f.seek(0) line before the second read makes it work. Sadly, this doesn't help windows.
This is the working code on Mac OS:
from sys import argv
script, filename = argv
f = open(filename, 'r+')
print("The contents of %s are:") % filename
print f.read()
txt = raw_input("What would you like to add? ")
f.write(txt)
print("The new contents are:")
f.seek(0)
print f.read()
f.close()
This is the only way I could get it to work on windows 7:
from sys import argv
script, filename = argv
f = open(filename, 'r')
print("The contents of %s are:") % filename
print f.read()
f.close()
txt = raw_input("What would you like to add? ")
f = open(filename, 'a')
f.write(txt)
f.close()
f = open(filename, 'r')
print("The new contents are:")
print f.read()
f.close()
Which seems super hacky. This should also work on Mac OS X too.

How to modify a text file?

I'm using Python, and would like to insert a string into a text file without deleting or copying the file. How can I do that?
Unfortunately there is no way to insert into the middle of a file without re-writing it. As previous posters have indicated, you can append to a file or overwrite part of it using seek but if you want to add stuff at the beginning or the middle, you'll have to rewrite it.
This is an operating system thing, not a Python thing. It is the same in all languages.
What I usually do is read from the file, make the modifications and write it out to a new file called myfile.txt.tmp or something like that. This is better than reading the whole file into memory because the file may be too large for that. Once the temporary file is completed, I rename it the same as the original file.
This is a good, safe way to do it because if the file write crashes or aborts for any reason, you still have your untouched original file.
Depends on what you want to do. To append you can open it with "a":
with open("foo.txt", "a") as f:
f.write("new line\n")
If you want to preprend something you have to read from the file first:
with open("foo.txt", "r+") as f:
old = f.read() # read everything in the file
f.seek(0) # rewind
f.write("new line\n" + old) # write the new line before
The fileinput module of the Python standard library will rewrite a file inplace if you use the inplace=1 parameter:
import sys
import fileinput
# replace all occurrences of 'sit' with 'SIT' and insert a line after the 5th
for i, line in enumerate(fileinput.input('lorem_ipsum.txt', inplace=1)):
sys.stdout.write(line.replace('sit', 'SIT')) # replace 'sit' and write
if i == 4: sys.stdout.write('\n') # write a blank line after the 5th line
Rewriting a file in place is often done by saving the old copy with a modified name. Unix folks add a ~ to mark the old one. Windows folks do all kinds of things -- add .bak or .old -- or rename the file entirely or put the ~ on the front of the name.
import shutil
shutil.move(afile, afile + "~")
destination= open(aFile, "w")
source= open(aFile + "~", "r")
for line in source:
destination.write(line)
if <some condition>:
destination.write(<some additional line> + "\n")
source.close()
destination.close()
Instead of shutil, you can use the following.
import os
os.rename(aFile, aFile + "~")
Python's mmap module will allow you to insert into a file. The following sample shows how it can be done in Unix (Windows mmap may be different). Note that this does not handle all error conditions and you might corrupt or lose the original file. Also, this won't handle unicode strings.
import os
from mmap import mmap
def insert(filename, str, pos):
if len(str) < 1:
# nothing to insert
return
f = open(filename, 'r+')
m = mmap(f.fileno(), os.path.getsize(filename))
origSize = m.size()
# or this could be an error
if pos > origSize:
pos = origSize
elif pos < 0:
pos = 0
m.resize(origSize + len(str))
m[pos+len(str):] = m[pos:origSize]
m[pos:pos+len(str)] = str
m.close()
f.close()
It is also possible to do this without mmap with files opened in 'r+' mode, but it is less convenient and less efficient as you'd have to read and temporarily store the contents of the file from the insertion position to EOF - which might be huge.
As mentioned by Adam you have to take your system limitations into consideration before you can decide on approach whether you have enough memory to read it all into memory replace parts of it and re-write it.
If you're dealing with a small file or have no memory issues this might help:
Option 1)
Read entire file into memory, do a regex substitution on the entire or part of the line and replace it with that line plus the extra line. You will need to make sure that the 'middle line' is unique in the file or if you have timestamps on each line this should be pretty reliable.
# open file with r+b (allow write and binary mode)
f = open("file.log", 'r+b')
# read entire content of file into memory
f_content = f.read()
# basically match middle line and replace it with itself and the extra line
f_content = re.sub(r'(middle line)', r'\1\nnew line', f_content)
# return pointer to top of file so we can re-write the content with replaced string
f.seek(0)
# clear file content
f.truncate()
# re-write the content with the updated content
f.write(f_content)
# close file
f.close()
Option 2)
Figure out middle line, and replace it with that line plus the extra line.
# open file with r+b (allow write and binary mode)
f = open("file.log" , 'r+b')
# get array of lines
f_content = f.readlines()
# get middle line
middle_line = len(f_content)/2
# overwrite middle line
f_content[middle_line] += "\nnew line"
# return pointer to top of file so we can re-write the content with replaced string
f.seek(0)
# clear file content
f.truncate()
# re-write the content with the updated content
f.write(''.join(f_content))
# close file
f.close()
Wrote a small class for doing this cleanly.
import tempfile
class FileModifierError(Exception):
pass
class FileModifier(object):
def __init__(self, fname):
self.__write_dict = {}
self.__filename = fname
self.__tempfile = tempfile.TemporaryFile()
with open(fname, 'rb') as fp:
for line in fp:
self.__tempfile.write(line)
self.__tempfile.seek(0)
def write(self, s, line_number = 'END'):
if line_number != 'END' and not isinstance(line_number, (int, float)):
raise FileModifierError("Line number %s is not a valid number" % line_number)
try:
self.__write_dict[line_number].append(s)
except KeyError:
self.__write_dict[line_number] = [s]
def writeline(self, s, line_number = 'END'):
self.write('%s\n' % s, line_number)
def writelines(self, s, line_number = 'END'):
for ln in s:
self.writeline(s, line_number)
def __popline(self, index, fp):
try:
ilines = self.__write_dict.pop(index)
for line in ilines:
fp.write(line)
except KeyError:
pass
def close(self):
self.__exit__(None, None, None)
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
with open(self.__filename,'w') as fp:
for index, line in enumerate(self.__tempfile.readlines()):
self.__popline(index, fp)
fp.write(line)
for index in sorted(self.__write_dict):
for line in self.__write_dict[index]:
fp.write(line)
self.__tempfile.close()
Then you can use it this way:
with FileModifier(filename) as fp:
fp.writeline("String 1", 0)
fp.writeline("String 2", 20)
fp.writeline("String 3") # To write at the end of the file
If you know some unix you could try the following:
Notes: $ means the command prompt
Say you have a file my_data.txt with content as such:
$ cat my_data.txt
This is a data file
with all of my data in it.
Then using the os module you can use the usual sed commands
import os
# Identifiers used are:
my_data_file = "my_data.txt"
command = "sed -i 's/all/none/' my_data.txt"
# Execute the command
os.system(command)
If you aren't aware of sed, check it out, it is extremely useful.

Categories

Resources