How can I preserve the file structure after substitution?
# -*- coding: cp1252 -*-
import os
import os.path
import sys
import fileinput
path = "C:\\Search_replace" # Insert the path to the directory of interest
#os.path.exists(path)
#raise SystemExit
Abspath = os.path.abspath(path)
print(Abspath)
dirList = os.listdir(path)
print ('seaching in', os.path.abspath(path))
for fname in dirList:
if fname.endswith('.txt') or fname.endswith('.srt'):
#print fname
full_path=Abspath + "\\" + fname
print full_path
for line in fileinput.FileInput(full_path, inplace=1):
line = line.replace("þ", "t")
line = line.replace("ª", "S")
line = line.replace("º", "s")
print line
print "done"
The question is not great in the clarity department, but if you want Python to print stuff to standard out without a newline at the end you can use sys.stdout.write() instead of print().
If you want to perform substitution and save it to a file, you can do what Senthil Kumaran suggested.
Instead of print line in the fileinput line, do a sys.stdout.write(line) at the end. And don't use print something in the other places in the loop.
Instead of using fileinput for word replace, you also use this simple method for word substitution:
import shutil
o = open("outputfile","w") #open an outputfile for writing
with open("inputfile") as infile:
for line in infile:
line = line.replace("someword","newword")
o.write(line + "\n")
o.close()
shutil.move("outputfile","inputfile")
When you iterate the lines of the file with
for line in fileinput.FileInput(full_path,inplace=1)
the line will contain the line data including the linefeed character if this isn't the last line. So usually in this kind of pattern you will either want to strip the extra whitespace with
line = line.rstrip()
or print them out without appending your own linefeed (like print does) by using
sys.stdout.write(line)
Related
I want to loop over the contents of a text file and do a search and replace on some lines and write the result back to the file. I could first load the whole file in memory and then write it back, but that probably is not the best way to do it.
What is the best way to do this, within the following code?
f = open(file)
for line in f:
if line.contains('foo'):
newline = line.replace('foo', 'bar')
# how to write this newline back to the file
The shortest way would probably be to use the fileinput module. For example, the following adds line numbers to a file, in-place:
import fileinput
for line in fileinput.input("test.txt", inplace=True):
print('{} {}'.format(fileinput.filelineno(), line), end='') # for Python 3
# print "%d: %s" % (fileinput.filelineno(), line), # for Python 2
What happens here is:
The original file is moved to a backup file
The standard output is redirected to the original file within the loop
Thus any print statements write back into the original file
fileinput has more bells and whistles. For example, it can be used to automatically operate on all files in sys.args[1:], without your having to iterate over them explicitly. Starting with Python 3.2 it also provides a convenient context manager for use in a with statement.
While fileinput is great for throwaway scripts, I would be wary of using it in real code because admittedly it's not very readable or familiar. In real (production) code it's worthwhile to spend just a few more lines of code to make the process explicit and thus make the code readable.
There are two options:
The file is not overly large, and you can just read it wholly to memory. Then close the file, reopen it in writing mode and write the modified contents back.
The file is too large to be stored in memory; you can move it over to a temporary file and open that, reading it line by line, writing back into the original file. Note that this requires twice the storage.
I guess something like this should do it. It basically writes the content to a new file and replaces the old file with the new file:
from tempfile import mkstemp
from shutil import move, copymode
from os import fdopen, remove
def replace(file_path, pattern, subst):
#Create temp file
fh, abs_path = mkstemp()
with fdopen(fh,'w') as new_file:
with open(file_path) as old_file:
for line in old_file:
new_file.write(line.replace(pattern, subst))
#Copy the file permissions from the old file to the new file
copymode(file_path, abs_path)
#Remove original file
remove(file_path)
#Move new file
move(abs_path, file_path)
Here's another example that was tested, and will match search & replace patterns:
import fileinput
import sys
def replaceAll(file,searchExp,replaceExp):
for line in fileinput.input(file, inplace=1):
if searchExp in line:
line = line.replace(searchExp,replaceExp)
sys.stdout.write(line)
Example use:
replaceAll("/fooBar.txt","Hello\sWorld!$","Goodbye\sWorld.")
This should work: (inplace editing)
import fileinput
# Does a list of files, and
# redirects STDOUT to the file in question
for line in fileinput.input(files, inplace = 1):
print line.replace("foo", "bar"),
Based on the answer by Thomas Watnedal.
However, this does not answer the line-to-line part of the original question exactly. The function can still replace on a line-to-line basis
This implementation replaces the file contents without using temporary files, as a consequence file permissions remain unchanged.
Also re.sub instead of replace, allows regex replacement instead of plain text replacement only.
Reading the file as a single string instead of line by line allows for multiline match and replacement.
import re
def replace(file, pattern, subst):
# Read contents from file as a single string
file_handle = open(file, 'r')
file_string = file_handle.read()
file_handle.close()
# Use RE package to allow for replacement (also allowing for (multiline) REGEX)
file_string = (re.sub(pattern, subst, file_string))
# Write contents to file.
# Using mode 'w' truncates the file.
file_handle = open(file, 'w')
file_handle.write(file_string)
file_handle.close()
As lassevk suggests, write out the new file as you go, here is some example code:
fin = open("a.txt")
fout = open("b.txt", "wt")
for line in fin:
fout.write( line.replace('foo', 'bar') )
fin.close()
fout.close()
If you're wanting a generic function that replaces any text with some other text, this is likely the best way to go, particularly if you're a fan of regex's:
import re
def replace( filePath, text, subs, flags=0 ):
with open( filePath, "r+" ) as file:
fileContents = file.read()
textPattern = re.compile( re.escape( text ), flags )
fileContents = textPattern.sub( subs, fileContents )
file.seek( 0 )
file.truncate()
file.write( fileContents )
A more pythonic way would be to use context managers like the code below:
from tempfile import mkstemp
from shutil import move
from os import remove
def replace(source_file_path, pattern, substring):
fh, target_file_path = mkstemp()
with open(target_file_path, 'w') as target_file:
with open(source_file_path, 'r') as source_file:
for line in source_file:
target_file.write(line.replace(pattern, substring))
remove(source_file_path)
move(target_file_path, source_file_path)
You can find the full snippet here.
fileinput is quite straightforward as mentioned on previous answers:
import fileinput
def replace_in_file(file_path, search_text, new_text):
with fileinput.input(file_path, inplace=True) as file:
for line in file:
new_line = line.replace(search_text, new_text)
print(new_line, end='')
Explanation:
fileinput can accept multiple files, but I prefer to close each single file as soon as it is being processed. So placed single file_path in with statement.
print statement does not print anything when inplace=True, because STDOUT is being forwarded to the original file.
end='' in print statement is to eliminate intermediate blank new lines.
You can used it as follows:
file_path = '/path/to/my/file'
replace_in_file(file_path, 'old-text', 'new-text')
Create a new file, copy lines from the old to the new, and do the replacing before you write the lines to the new file.
Expanding on #Kiran's answer, which I agree is more succinct and Pythonic, this adds codecs to support the reading and writing of UTF-8:
import codecs
from tempfile import mkstemp
from shutil import move
from os import remove
def replace(source_file_path, pattern, substring):
fh, target_file_path = mkstemp()
with codecs.open(target_file_path, 'w', 'utf-8') as target_file:
with codecs.open(source_file_path, 'r', 'utf-8') as source_file:
for line in source_file:
target_file.write(line.replace(pattern, substring))
remove(source_file_path)
move(target_file_path, source_file_path)
Using hamishmcn's answer as a template I was able to search for a line in a file that match my regex and replacing it with empty string.
import re
fin = open("in.txt", 'r') # in file
fout = open("out.txt", 'w') # out file
for line in fin:
p = re.compile('[-][0-9]*[.][0-9]*[,]|[-][0-9]*[,]') # pattern
newline = p.sub('',line) # replace matching strings with empty string
print newline
fout.write(newline)
fin.close()
fout.close()
if you remove the indent at the like below, it will search and replace in multiple line.
See below for example.
def replace(file, pattern, subst):
#Create temp file
fh, abs_path = mkstemp()
print fh, abs_path
new_file = open(abs_path,'w')
old_file = open(file)
for line in old_file:
new_file.write(line.replace(pattern, subst))
#close temp file
new_file.close()
close(fh)
old_file.close()
#Remove original file
remove(file)
#Move new file
move(abs_path, file)
I have some problem to open and read a txt-file in Python. The txt file contains text (cat text.txt works fine in Terminal). But in Python I only get 5 empty lines.
print open('text.txt').read()
Do you know why?
I solved it. Was a utf-16 file.
print open('text.txt').read().decode('utf-16-le')
if this prints the lines in your file then perhaps the file your program is selecting is empty? I don't know, but try this:
import tkinter as tk
from tkinter import filedialog
import os
def fileopen():
GUI=tk.Tk()
filepath=filedialog.askopenfilename(parent=GUI,title='Select file to print lines.')
(GUI).destroy()
return (filepath)
filepath = fileopen()
filepath = os.path.normpath(filepath)
with open (filepath, 'r') as fh:
print (fh.read())
or alternatively, using this method of printing lines:
fh = open(filepath, 'r')
for line in fh:
line=line.rstrip('\n')
print (line)
fh.close()
or if you want the lines loaded into a list of strings:
lines = []
fh = open(filepath, 'r')
for line in fh:
line=line.rstrip('\n')
lines.append(line)
fh.close()
for line in lines:
print (line)
When you open file I think you have to specify how do you want to open it. In your example you should open it for reading like:
print open('text.txt',"r").read()
Hope this does the trick.
I was wondering if there is a way to edit those lines of a file that contain certain characters.
Something like this:
file.readlines()
for line in file:
if 'characters' in line:
file[line] = 'edited line'
If it matters: I'm using python 3.5
I think what you want is something like:
lines = file.readlines()
for index, line in enumerate(lines):
if 'characters' in line:
lines[index] = 'edited line'
You can't edit the file directly, but you can write out the modified lines over the original (or, safer, write to a temporary file and renamed once you've validated it).
You can use tempfile.NamedTemporaryFile to create a temporary file object and write your lines in it the use shutil module to replace the temp file with your preceding file.
from tempfile import NamedTemporaryFile
import shutil
tempfile = NamedTemporaryFile(delete=False)
with open(file_name) as infile,tempfile:
for line in infile:
if 'characters' in line:
tempfile.write('edited line')
else:
tempfile.write(line)
shutil.move(tempfile.name, file_name)
I'am new to Python 3 and could really use a little help. I have a txt file containing:
InstallPrompt=
DisplayLicense=
FinishMessage=
TargetName=D:\somewhere
FriendlyName=something
I have a python script that in the end, should change just two lines to:
TargetName=D:\new
FriendlyName=Big
Could anyone help me, please? I have tried to search for it, but I didnt find something I could use. The text that should be replaced could have different length.
import fileinput
for line in fileinput.FileInput("file",inplace=1):
sline=line.strip().split("=")
if sline[0].startswith("TargetName"):
sline[1]="new.txt"
elif sline[0].startswith("FriendlyName"):
sline[1]="big"
line='='.join(sline)
print(line)
A very simple solution for what you're doing:
#!/usr/bin/python
import re
import sys
for line in open(sys.argv[1],'r').readlines():
line = re.sub(r'TargetName=.+',r'TargetName=D:\\new', line)
line = re.sub(r'FriendlyName=.+',r'FriendlyName=big', line)
print line,
You would invoke this from the command line as ./test.py myfile.txt > output.txt
Writing to a temporary file and the renaming is the best way to make sure you won't get a damaged file if something goes wrong
import os
from tempfile import NamedTemporaryFile
fname = "lines.txt"
with open(fname) as fin, NamedTemporaryFile(dir='.', delete=False) as fout:
for line in fin:
if line.startswith("TargetName="):
line = "TargetName=D:\\new\n"
elif line.startswith("FriendlyName"):
line = "FriendlyName=Big\n"
fout.write(line.encode('utf8'))
os.rename(fout.name, fname)
Is this a config (.ini) file you're trying to parse? The format looks suspiciously similar, except without a header section. You can use configparser, though it may add extra space around the "=" sign (i.e. "TargetName=D:\new" vs. "TargetName = D:\new"), but if those changes don't matter to you, using configparser is way easier and less error-prone than trying to parse it by hand every time.
txt (ini) file:
[section name]
FinishMessage=
TargetName=D:\something
FriendlyName=something
Code:
import sys
from configparser import SafeConfigParser
def main():
cp = SafeConfigParser()
cp.optionxform = str # Preserves case sensitivity
cp.readfp(open(sys.argv[1], 'r'))
section = 'section name'
options = {'TargetName': r'D:\new',
'FriendlyName': 'Big'}
for option, value in options.items():
cp.set(section, option, value)
cp.write(open(sys.argv[1], 'w'))
if __name__ == '__main__':
main()
txt (ini) file (after):
[section name]
FinishMessage =
TargetName = D:\new
FriendlyName = Big
subs_names.py script works both Python 2.6+ and Python 3.x:
#!/usr/bin/env python
from __future__ import print_function
import sys, fileinput
# here goes new values
substitions = dict(TargetName=r"D:\new", FriendlyName="Big")
inplace = '-i' in sys.argv # make substitions inplace
if inplace:
sys.argv.remove('-i')
for line in fileinput.input(inplace=inplace):
name, sep, value = line.partition("=")
if name in substitions:
print(name, sep, substitions[name], sep='')
else:
print(line, end='')
Example:
$ python3.1 subs_names.py input.txt
InstallPrompt=
DisplayLicense=
FinishMessage=
TargetName=D:\new
FriendlyName=Big
If you are satisfied with the output then add -i parameter to make changes inplace:
$ python3.1 subs_names.py -i input.txt
I'm using Python, and would like to insert a string into a text file without deleting or copying the file. How can I do that?
Unfortunately there is no way to insert into the middle of a file without re-writing it. As previous posters have indicated, you can append to a file or overwrite part of it using seek but if you want to add stuff at the beginning or the middle, you'll have to rewrite it.
This is an operating system thing, not a Python thing. It is the same in all languages.
What I usually do is read from the file, make the modifications and write it out to a new file called myfile.txt.tmp or something like that. This is better than reading the whole file into memory because the file may be too large for that. Once the temporary file is completed, I rename it the same as the original file.
This is a good, safe way to do it because if the file write crashes or aborts for any reason, you still have your untouched original file.
Depends on what you want to do. To append you can open it with "a":
with open("foo.txt", "a") as f:
f.write("new line\n")
If you want to preprend something you have to read from the file first:
with open("foo.txt", "r+") as f:
old = f.read() # read everything in the file
f.seek(0) # rewind
f.write("new line\n" + old) # write the new line before
The fileinput module of the Python standard library will rewrite a file inplace if you use the inplace=1 parameter:
import sys
import fileinput
# replace all occurrences of 'sit' with 'SIT' and insert a line after the 5th
for i, line in enumerate(fileinput.input('lorem_ipsum.txt', inplace=1)):
sys.stdout.write(line.replace('sit', 'SIT')) # replace 'sit' and write
if i == 4: sys.stdout.write('\n') # write a blank line after the 5th line
Rewriting a file in place is often done by saving the old copy with a modified name. Unix folks add a ~ to mark the old one. Windows folks do all kinds of things -- add .bak or .old -- or rename the file entirely or put the ~ on the front of the name.
import shutil
shutil.move(afile, afile + "~")
destination= open(aFile, "w")
source= open(aFile + "~", "r")
for line in source:
destination.write(line)
if <some condition>:
destination.write(<some additional line> + "\n")
source.close()
destination.close()
Instead of shutil, you can use the following.
import os
os.rename(aFile, aFile + "~")
Python's mmap module will allow you to insert into a file. The following sample shows how it can be done in Unix (Windows mmap may be different). Note that this does not handle all error conditions and you might corrupt or lose the original file. Also, this won't handle unicode strings.
import os
from mmap import mmap
def insert(filename, str, pos):
if len(str) < 1:
# nothing to insert
return
f = open(filename, 'r+')
m = mmap(f.fileno(), os.path.getsize(filename))
origSize = m.size()
# or this could be an error
if pos > origSize:
pos = origSize
elif pos < 0:
pos = 0
m.resize(origSize + len(str))
m[pos+len(str):] = m[pos:origSize]
m[pos:pos+len(str)] = str
m.close()
f.close()
It is also possible to do this without mmap with files opened in 'r+' mode, but it is less convenient and less efficient as you'd have to read and temporarily store the contents of the file from the insertion position to EOF - which might be huge.
As mentioned by Adam you have to take your system limitations into consideration before you can decide on approach whether you have enough memory to read it all into memory replace parts of it and re-write it.
If you're dealing with a small file or have no memory issues this might help:
Option 1)
Read entire file into memory, do a regex substitution on the entire or part of the line and replace it with that line plus the extra line. You will need to make sure that the 'middle line' is unique in the file or if you have timestamps on each line this should be pretty reliable.
# open file with r+b (allow write and binary mode)
f = open("file.log", 'r+b')
# read entire content of file into memory
f_content = f.read()
# basically match middle line and replace it with itself and the extra line
f_content = re.sub(r'(middle line)', r'\1\nnew line', f_content)
# return pointer to top of file so we can re-write the content with replaced string
f.seek(0)
# clear file content
f.truncate()
# re-write the content with the updated content
f.write(f_content)
# close file
f.close()
Option 2)
Figure out middle line, and replace it with that line plus the extra line.
# open file with r+b (allow write and binary mode)
f = open("file.log" , 'r+b')
# get array of lines
f_content = f.readlines()
# get middle line
middle_line = len(f_content)/2
# overwrite middle line
f_content[middle_line] += "\nnew line"
# return pointer to top of file so we can re-write the content with replaced string
f.seek(0)
# clear file content
f.truncate()
# re-write the content with the updated content
f.write(''.join(f_content))
# close file
f.close()
Wrote a small class for doing this cleanly.
import tempfile
class FileModifierError(Exception):
pass
class FileModifier(object):
def __init__(self, fname):
self.__write_dict = {}
self.__filename = fname
self.__tempfile = tempfile.TemporaryFile()
with open(fname, 'rb') as fp:
for line in fp:
self.__tempfile.write(line)
self.__tempfile.seek(0)
def write(self, s, line_number = 'END'):
if line_number != 'END' and not isinstance(line_number, (int, float)):
raise FileModifierError("Line number %s is not a valid number" % line_number)
try:
self.__write_dict[line_number].append(s)
except KeyError:
self.__write_dict[line_number] = [s]
def writeline(self, s, line_number = 'END'):
self.write('%s\n' % s, line_number)
def writelines(self, s, line_number = 'END'):
for ln in s:
self.writeline(s, line_number)
def __popline(self, index, fp):
try:
ilines = self.__write_dict.pop(index)
for line in ilines:
fp.write(line)
except KeyError:
pass
def close(self):
self.__exit__(None, None, None)
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
with open(self.__filename,'w') as fp:
for index, line in enumerate(self.__tempfile.readlines()):
self.__popline(index, fp)
fp.write(line)
for index in sorted(self.__write_dict):
for line in self.__write_dict[index]:
fp.write(line)
self.__tempfile.close()
Then you can use it this way:
with FileModifier(filename) as fp:
fp.writeline("String 1", 0)
fp.writeline("String 2", 20)
fp.writeline("String 3") # To write at the end of the file
If you know some unix you could try the following:
Notes: $ means the command prompt
Say you have a file my_data.txt with content as such:
$ cat my_data.txt
This is a data file
with all of my data in it.
Then using the os module you can use the usual sed commands
import os
# Identifiers used are:
my_data_file = "my_data.txt"
command = "sed -i 's/all/none/' my_data.txt"
# Execute the command
os.system(command)
If you aren't aware of sed, check it out, it is extremely useful.