How to replace a string in a file? - python

I have 2 numbers in two similar files. There is a new.txt and original.txt. They both have the same string in them except for a number. The new.txt has a string that says boothNumber="3". The original.txt has a string that says boothNumber="1".
I want to be able to read the new.txt, pick the number 3 out of it and replace the number 1 in original.txt.
Any suggestions? Here is what I am trying.
import re # used to replace string
import sys # some of these are use for other code in my program
def readconfig():
with open("new.text") as f:
with open("original.txt", "w") as f1:
for line in f:
match = re.search(r'(?<=boothNumber=")\d+', line)
for line in f1:
pattern = re.search(r'(?<=boothNumber=")\d+', line)
if re.search(pattern, line):
sys.stdout.write(re.sub(pattern, match, line))
When I run this, my original.txt gets completely cleared of any text.
I did a traceback and I get this:
in readconfig
for line in f1:
io.UnsupportedOperationo: not readable
UPDATE
I tried:
def readconfig(original_txt_path="original.txt",
new_txt_path="new.txt"):
with open(new_txt_path) as f:
for line in f:
if not ('boothNumber=') in line:
continue
booth_number = int(line.replace('boothNumber=', ''))
# do we need check if there are more than one 'boothNumber=...' line?
break
with open(original_txt_path) as f1:
modified_lines = [line.startswith('boothNumber=') if not line
else 'boothNumber={}'.format(booth_number)
for line in f1]
with open(original_txt_path, mode='w') as f1:
f1.writelines(modified_lines)
And I get error:
booth_number = int(line.replace('boothNumber=', ''))
ValueError: invalid literal for int() with base 10: '
(workstationID="1" "1" window=1= area="" extra parts of the line here)\n
the "1" after workstationID="1" is where the boothNumber=" " would normally go. When I open up original.txt, I see that it actually did not change anything.
UPDATE 3
Here is my code in full. Note, the file names are changed but I'm still trying to do the same thing. This is another idea or revision I had that is still not working:
import os
import shutil
import fileinput
import re # used to replace string
import sys # prevents extra lines being inputed in config
# example: sys.stdout.write
def convertconfig(pattern):
source = "template.config"
with fileinput.FileInput(source, inplace=True, backup='.bak') as file:
for line in file:
match = r'(?<=boothNumber=")\d+'
sys.stdout.write(re.sub(match, pattern, line))
def readconfig():
source = "bingo.config"
pattern = r'(?<=boothNumber=")\d+' # !!!!!!!!!! This probably needs fixed
with fileinput.FileInput(source, inplace=True, backup='.bak') as file:
for line in file:
if re.search(pattern, line):
fileinput.close()
convertconfig(pattern)
def copyfrom(servername):
source = r'//' + servername + '/c$/remotedirectory'
dest = r"C:/myprogram"
file = "bingo.config"
try:
shutil.copyfile(os.path.join(source, file), os.path.join(dest, file))
except:
print ("Error")
readconfig()
# begin here
os.system('cls' if os.name == 'nt' else 'clear')
array = []
with open("serverlist.txt", "r") as f:
for servername in f:
copyfrom(servername.strip())
bingo.config is my new file
template.config is my original
It's replacing the number in template.config with the literal string "r'(?<=boothNumber=")\d+'"
So template.config ends up looking like
boothNumber="r'(?<=boothNumber=")\d+'"
instead of
boothNumber="2"

To find boothNumber value we can use next regular expression (checked with regex101)
(?<=\sboothNumber=\")(\d+)(?=\")
Something like this should work
import re
import sys # some of these are use for other code in my program
BOOTH_NUMBER_RE = re.compile('(?<=\sboothNumber=\")(\d+)(?=\")')
search_booth_number = BOOTH_NUMBER_RE.search
replace_booth_number = BOOTH_NUMBER_RE.sub
def readconfig(original_txt_path="original.txt",
new_txt_path="new.txt"):
with open(new_txt_path) as f:
for line in f:
search_res = search_booth_number(line)
if search_res is None:
continue
booth_number = int(search_res.group(0))
# do we need check if there are more than one 'boothNumber=...' line?
break
else:
# no 'boothNumber=...' line was found, so next lines will fail,
# maybe we should raise exception like
# raise Exception('no line starting with "boothNumber" was found')
# or assign some default value
# booth_number = -1
# or just return?
return
with open(original_txt_path) as f:
modified_lines = []
for line in f:
search_res = search_booth_number(line)
if search_res is not None:
line = replace_booth_number(str(booth_number), line)
modified_lines.append(line)
with open(original_txt_path, mode='w') as f:
f.writelines(modified_lines)
Test
# Preparation
with open('new.txt', mode='w') as f:
f.write('some\n')
f.write('<jack Fill workstationID="1" boothNumber="56565" window="17" Code="" area="" section="" location="" touchScreen="False" secureWorkstation="false">')
with open('original.txt', mode='w') as f:
f.write('other\n')
f.write('<jack Fill workstationID="1" boothNumber="23" window="17" Code="" area="" section="" location="" touchScreen="False" secureWorkstation="false">')
# Invocation
readconfig()
# Checking output
with open('original.txt') as f:
for line in f:
# stripping newline character
print(line.rstrip('\n'))
gives
other
<jack Fill workstationID="1" boothNumber="56565" window="17" Code="" area="" section="" location="" touchScreen="False" secureWorkstation="false">

Related

Store the contents of text file in return variable and manipulate wherever required in python?

I have below function & I am trying to get/store the contents of text file in another temp file(removing unnecessary line) with appending special character.
But I also want the same content which is in temp text file with different special character next time but I am not able to do that.Below function is creating a temp file.To get desired output I need to create file every time with same function again which is not good way.Is there anything we can do without creating a temp/extra file and store the contents in return variable and append the special character whatever we want multiple times
import os
import re
def mainfest():
pathfile = "abc_12.txt"
with open(pathfile, 'r') as firstfile, open('tmp.txt', 'r') as s:
for line in firstfile:
if line.strip().startswith("-") or line.startswith("<"):
print"ignore")
elif re.search('\\S', line):
name = str(os.path.basename(line))
s.write("*" +fname)
def contents():
temppath = "temp.txt"
with open(temp path, 'r') as f:
lines = f.readlines()
lines+= lines
return lines
manifest()
value = contents()
file abc_12.txt
---ABC-123
nice/abc.py
xml/abc.py
<<NOP-123
bac\nice.py
abc.py
---CDEF-345
jkl.oy
abc.py
I want the contents of abc_12.txt file I can get in return something like that
abc.py
abc.py
nice.py
abc.py
jkl.oy
abc.py
and manipulate them wherever I want similar to below output
Output 1:
* abc.py
* abc.py
* nice.py
* abc.py
* jkl.oy
* abc.py
Output 2:
##abc.py
##abc.py
##nice.py
##abc.py
##jkl.oy
##abc.py
Maybe first you should read file, search names and keep on list
def read_data():
results = []
with open("abc_12.txt") as infile:
for line in infile:
if line.strip().startswith(("-", "<")): # `startswith`/`endswith` can use tuple
print("ignore:", line)
elif re.search('\\S', line):
name = os.path.basename(line)
results.append(name)
return results
And later you can use this list to create temp file or other file
data = read_data()
with open('temp.txt', 'w') as outfile:
for line in data:
outfile.write(f'* {line}')
#print(f'* {line}', end='')
with open('other.txt', 'w') as outfile:
for line in data:
outfile.write(f'##{line}')
#print(f'##{line}', end='')
EDIT:
Minimal working code.
I used io.StringIO only to simulate file in memory - so everyone can simply copy and test it.
import os
import re
import io
text = r'''---ABC-123
nice/abc.py
xml/abc.py
<<NOP-123
bac\nice.py
abc.py
---CDEF-345
jkl.oy
abc.py
'''
def read_data():
results = []
with io.StringIO(text) as infile:
#with open("abc_12.txt") as infile:
for line in infile:
line = line.strip()
if line:
if line.startswith(("-", "<")): # `startswith`/`endswith` can use tuple
print("ignore:", line)
else:
name = os.path.basename(line)
results.append(name)
return results
data = read_data()
with open('temp.txt', 'w') as outfile:
for line in data:
outfile.write(f'* {line}\n')
print(f'* {line}')
with open('other.txt', 'w') as outfile:
for line in data:
outfile.write(f'##{line}\n')
print(f'##{line}')
EDIT:
If you don't want to save in file then you still need for-loop to create string
data = read_data()
string_1 = ''
for line in data:
string_1 += f'* {line}\n'
string_2 = ''
for line in data:
string_2 += f'##{line}\n'
or to create new list (and eventually string)
data = read_data()
list_1 = []
for line in data:
list_1.append(f'* {line}')
list_2 = []
for line in data:
list_2.append(f'##{line}')
string_1 = "\n".join(list_1)
string_2 = "\n".join(list_2)

How can I delete "\n" lines from a file in Python?

I need to check if the .csv file I'm working with ends with more than 1 "\n" line. If it finds more than a blank line, it removes them all but one.
My code is:
import os
from pathlib import Path
def remove_blanks():
dirname = os.path.dirname(os.path.abspath(__file__))
path: Path = Path(os.path.join(dirname, "data.csv"))
with open(path, "r+") as op:
lines = op.readlines()
for line in lines:
if line == "\n":
op.write(line.rstrip("\n"))
The .csv file is something like ['01-01-2019,0,0,0\n', '18-05-2019,33,31,48\n', '\n', '\n', '\n'] and the output I'd want is ['01-01-2019,0,0,0\n', '18-05-2019,33,31,48\n', '\n'] but it doesn't seem to be able to delete any line.
The simplest way would be to keep track if you've seen an empty line, then write one just before you write a non-empty line.
pre = ""
for line in lines:
if line == "\n":
pre = line
else:
op.write(pre)
op.write(line)
pre = "\n"
op.write(pre)
This reduces any sequence of empty lines to a single empty line, and writes that single line just before writing a non-empty line or the end of the file. When pre is the empty string, writing it is a no-op.
If you want to preserve multiple blank lines in the middle of the file, build up the sequence of blank lines in pre as you find them, and at the end of the file, only write a single blank line (rather than pre itself) if pre is not empty.
pre = ""
for line in lines:
if line == "\n":
pre += line
else:
op.write(pre)
op.write(line)
pre = ""
if pre:
op.write("\n")
Oops, never rewrite the file that you are reading: it is likely not to work or at best will lead to a maintenance nightmare.
If the file is small enough to fit in main memory, this slight change in your code could be enough:
import os.path
from pathlib import Path
def remove_blanks():
dirname = os.path.dirname(os.path.abspath(__file__))
path: Path = Path(os.path.join(dirname, "data.csv"))
with open(path, "r") as op:
lines = op.readlines() # read lines in memory
with open(path("w") as op: # re-write everything from the beginning
flag = False
for line in lines:
if line == "\n":
if not flag:
op.write(line)
flag = True
else:
op.write(line)
# flag = False # uncomment if you want to keep one blank line
# per group of consecutive lines
You could try using the Counter().
import os
from pathlib import Path
from collections import Counter
def remove_blanks():
dirname = os.path.dirname(os.path.abspath(__file__))
path: Path = Path(os.path.join(dirname, "data.csv"))
with open(path, "r+") as op:
lines = op.readlines()
for line in lines:
count = Counter()
# Add 1 for every time word appears in line
for word in line:
count[word] += 1
# Change the number of newlines to 1
if count['\n'] > 1:
count['\n'] = 1
# Returns list with the number of elements
line = list(count.elements())
I managed to work this out, with this code:
import os
from pathlib import Path
def remove_blanks():
dirname = os.path.dirname(os.path.abspath(__file__))
path: Path = Path(os.path.join(dirname, "data.csv"))
with open(path, "r") as op:
lines = op.readlines() # read lines in memory
with open(path, "w") as op: # re-write everything from the beginning
for line in lines:
if line != "\n":
op.write(line)
else:
continue
It can remove every new line in excess, no matter where it is in the file.
Thanks to everyone who tried to help me!

Python looping text file to find and print

This Python code runs but prints the last page of the text file. Not sure why, but I aim to print the whole line of text that is underneath a specified line of text (a line that contains the specific string ** Direct **). How can I loop through a text file, search each line for the specified string, and whenever it is found print the row directly below it? I have searched many online forums and have not found an easily understandable example. I use Python Sypder 2.7. Any help appreciated
import os
Path = '..\\Test.txt'
if os.path.isfile(Path):
with open(Path) as f:
for line in f:
print line
else:
print Path, "doesn't exist"
f.close()
Python 3.x:
dummy.txt
Mango
Anday Wala Burger
40
Aloo
Anday
Ghobi
Anday Wala Burger
30
Kheerey
Anday Wala Burger
py:
searchString = 'Anday Wala Burger'
with open('dummy.txt', "r") as input:
try:
for line in input:
if searchString in line:
print(next(input), end='')
except StopIteration:
pass
OUTPUT:
40
30
EDIT:
Python 2.7:
dummyFile= "dummy.txt"
searchString = 'Anday Wala Burger'
with open(dummyFile) as f:
content = f.readlines()
# you may also want to remove empty lines
content = [l.strip() for l in content if l.strip()]
# flag
nextLine = False
for line in content:
if searchString in line:
nextLine = not nextLine
else:
if nextLine:
print(line)
nextLine = not nextLine
else:
pass
OUTPUT:
40
30
You need to make some changes:
1.-Read the lines
2.- Compare with your text
import os
Path = '..\\Test.txt'
look='**Direct**'
if os.path.isfile(Path):
with open(Path, "r") as f:
for line in f:
if look in line:
print (line)
else:
print (Path, "doesn't exist")
Check out the re module. It includes the re.search() function, which searches for a pattern inside a string.
To print the next line you can take advantage of the fact that file objects are iterable, by using f.next().
For example, you could do:
import os
import re
Path = 'foo.txt'
Pattern = "spam" # String to be searched
if os.path.isfile(Path):
with open(Path) as f:
for line in f:
if re.search(Pattern, line):
print(f.next())
else:
print(Path, "doesn't exist")
By the way, you don't need that final f.close(). It is already taken care of by the with statement.
Check if one is in any line in the file and the print the next line as found will be the next lineno file will be set as currentlineno+1
Contents of file temp
one
two
three
four
Python file
with open('temp') as f:
found=-1
#no lines are found to print so found is negative in value since line no in a file cannot be negative
for index,line in enumerate(f):
if found==index:
print(line)
if 'one' in line:
found=index+1
# when the condition is True found is set to the line no that we need to print
Output
two

How to turn integer in document into a variable

The bingo.config has a string with a random integer in it.
Inside the file, it looks like
boothNumber="2"
My template.config file has the same string with a different integer or number.
I am trying to replace this with the current integer in bingo.config
My problem is that the number in template.config is not being replaced by the number in bingo.config
It's replacing the number in template.config with the literal string "r'(?<=boothNumber=")\d+'"
So template.config ends up looking like
boothNumber="r'(?<=boothNumber=")\d+'"
instead of
boothNumber="2"
It looks like my issue is stemming from how I save the variable "pattern" in the "readconfig" function.
Any ideas on how I can save the integer from bingo.config into a proper variable that can be used?
import os
import shutil
import fileinput
import re # used to replace string
import sys # prevents extra lines being inputed in config
# example: sys.stdout.write
def convertconfig(pattern):
source = "template.config"
with fileinput.FileInput(source, inplace=True, backup='.bak') as file:
for line in file:
match = r'(?<=boothNumber=")\d+'
sys.stdout.write(re.sub(match, pattern, line))
def readconfig():
source = "bingo.config"
pattern = r'(?<=boothNumber=")\d+' # !!!!!!!!!! This probably needs fixed
with fileinput.FileInput(source, inplace=True, backup='.bak') as file:
for line in file:
if re.search(pattern, line):
fileinput.close()
convertconfig(pattern)
def copyfrom(servername):
source = r'//' + servername + '/c$/remotedirectory'
dest = r"C:/myprogram"
file = "bingo.config"
try:
shutil.copyfile(os.path.join(source, file), os.path.join(dest, file))
except:
print ("Error")
readconfig()
# begin here
os.system('cls' if os.name == 'nt' else 'clear')
array = []
with open("serverlist.txt", "r") as f:
for servername in f:
copyfrom(servername.strip())

Searching and extracting WH-word from a file line by line with Python and regex

I have a file that has one sentence per line. I am trying to read the file and search if the sentence is a question using regex and extract the wh-word from the sentences and save them back into another file according the order it appeared in the first file.
This is what I have so far..
def whWordExtractor(inputFile):
try:
openFileObject = open(inputFile, "r")
try:
whPattern = re.compile(r'(.*)who|what|how|where|when|why|which|whom|whose(\.*)', re.IGNORECASE)
with openFileObject as infile:
for line in infile:
whWord = whPattern.search(line)
print whWord
# Save the whWord extracted from inputFile into another whWord.txt file
# writeFileObject = open('whWord.txt','a')
# if not whWord:
# writeFileObject.write('None' + '\n')
# else:
# whQuestion = whWord
# writeFileObject.write(whQuestion+ '\n')
finally:
print 'Done. All WH-word extracted.'
openFileObject.close()
except IOError:
pass
The result after running the code above: set([])
Is there something I am doing wrong here? I would be grateful if someone can point it out to me.
Something like this:
def whWordExtractor(inputFile):
try:
with open(inputFile) as f1:
whPattern = re.compile(r'(.*)who|what|how|where|when|why|which|whom|whose(\.*)', re.IGNORECASE)
with open('whWord.txt','a') as f2: #open file only once, to reduce I/O operations
for line in f1:
whWord = whPattern.search(line)
print whWord
if not whWord:
f2.write('None' + '\n')
else:
#As re.search returns a sre.SRE_Match object not string, so you will have to use either
# whWord.group() or better use whPattern.findall(line)
whQuestion = whWord.group()
f2.write(whQuestion+ '\n')
print 'Done. All WH-word extracted.'
except IOError:
pass
Not sure if it's what you're looking for, but you could try something like this:
def whWordExtractor(inputFile):
try:
whPattern = re.compile(r'who|what|how|where|when|why|which|whom|whose', re.IGNORECASE)
with open(inputFile, "r") as infile:
for line in infile:
whMatch = whPattern.search(line)
if whMatch:
whWord = whMatch.group()
print whWord
# save to file
else:
# no match
except IOError:
pass
Change '(.*)who|what|how|where|when|why|which|whom|whose(\.*)' to
".*(?:who|what|how|where|when|why|which|whom|whose).*\."

Categories

Resources