Removing characters in a file and also making it neat - python

I want to basically remove all the characters in delete list from the file (Line 11 to 15). What would be the neatest way to delete the words without making the code not neat. I am not sure whether to open the file again here which I know would not be the right way but I can't think of a different solution. Any help would be appreciated.
from os import write
import re
def readText():
with open(r'C:\Users\maxth\Desktop\TextCounter\Text.txt') as f:
print(f.read())
def longestWord():
with open(r'C:\Users\maxth\Desktop\TextCounter\Text.txt', 'r+') as f:
users_text = f.read()
#I want to basically remove all the char in delete list from the file. What would be the neatest way to delete the words without making the code not neat. I am not sure wether to open the file again here and re write it or what!
deleteList = ['!','£','$','%','^','&','*','()','_','+']
for line in f:
for word in deleteList:
line = line.replace(word, '')
longest = max(users_text.split(), key=len)
count_longest = str(len(longest))
print('The longest word in the file is: ' + long)
print('Thats a total of '+count_longest+' letters!')
def writeWord():
with open(r'C:\Users\maxth\Desktop\TextCounter\Text.txt', 'w') as f:
users_text = input('Enter your desired text to continue. \n: ')
f.write(users_text)
f.close()
with open(r'C:\Users\maxth\Desktop\TextCounter\Text.txt', 'r') as file:
print(file.read())
longestWord()

Had to re work it and implement it in a different def. Need to add relative paths and will be alot cleaner aswell.
from os import write
import re
def longestWord():
with open(r'C:\Users\maxth\Desktop\TextCounter\Text.txt', 'r+') as f:
users_text = f.read()
longest = max(users_text.split(), key=len)
count_longest = str(len(longest))
print('The longest word in the file is: ' + longest)
print('Thats a total of '+count_longest+' letters!')
def writeWord():
with open(r'C:\Users\maxth\Desktop\TextCounter\Text.txt', 'w') as f:
users_text = input('Enter your desired text to continue. \n: ')
cleanText = re.sub('[^a-zA-Z0-9 \n\.]', ' ', users_text)
f.write(cleanText)
with open(r'C:\Users\maxth\Desktop\TextCounter\Text.txt', 'r') as clean:
print('\nRemoved any illegal characters. Here is your text:\n\n' + cleanText + '\n')
f.close()
while True:
print("""
Welcome to Skies word text counter!
====================================================
""")
writeWord()
longestWord()
userDecide = input("""
====================================================
Would you like to enter new text and repeat?
Type 'yes' to continue else program will terminate.
====================================================
: """)
if not userDecide.lower == 'yes':
print('Application closing...')
exit()

Related

updating a leaderboard for noughts and crosses game [duplicate]

How do I search and replace text in a file using Python 3?
Here is my code:
import os
import sys
import fileinput
print ("Text to search for:")
textToSearch = input( "> " )
print ("Text to replace it with:")
textToReplace = input( "> " )
print ("File to perform Search-Replace on:")
fileToSearch = input( "> " )
#fileToSearch = 'D:\dummy1.txt'
tempFile = open( fileToSearch, 'r+' )
for line in fileinput.input( fileToSearch ):
if textToSearch in line :
print('Match Found')
else:
print('Match Not Found!!')
tempFile.write( line.replace( textToSearch, textToReplace ) )
tempFile.close()
input( '\n\n Press Enter to exit...' )
Input file:
hi this is abcd hi this is abcd
This is dummy text file.
This is how search and replace works abcd
When I search and replace 'ram' by 'abcd' in above input file, it works as a charm. But when I do it vice-versa i.e. replacing 'abcd' by 'ram', some junk characters are left at the end.
Replacing 'abcd' by 'ram'
hi this is ram hi this is ram
This is dummy text file.
This is how search and replace works rambcd
As pointed out by michaelb958, you cannot replace in place with data of a different length because this will put the rest of the sections out of place. I disagree with the other posters suggesting you read from one file and write to another. Instead, I would read the file into memory, fix the data up, and then write it out to the same file in a separate step.
# Read in the file
with open('file.txt', 'r') as file :
filedata = file.read()
# Replace the target string
filedata = filedata.replace('abcd', 'ram')
# Write the file out again
with open('file.txt', 'w') as file:
file.write(filedata)
Unless you've got a massive file to work with which is too big to load into memory in one go, or you are concerned about potential data loss if the process is interrupted during the second step in which you write data to the file.
fileinput already supports inplace editing. It redirects stdout to the file in this case:
#!/usr/bin/env python3
import fileinput
with fileinput.FileInput(filename, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(text_to_search, replacement_text), end='')
As Jack Aidley had posted and J.F. Sebastian pointed out, this code will not work:
# Read in the file
filedata = None
with file = open('file.txt', 'r') :
filedata = file.read()
# Replace the target string
filedata.replace('ram', 'abcd')
# Write the file out again
with file = open('file.txt', 'w') :
file.write(filedata)`
But this code WILL work (I've tested it):
f = open(filein,'r')
filedata = f.read()
f.close()
newdata = filedata.replace("old data","new data")
f = open(fileout,'w')
f.write(newdata)
f.close()
Using this method, filein and fileout can be the same file, because Python 3.3 will overwrite the file upon opening for write.
You can do the replacement like this
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
for line in f1:
f2.write(line.replace('old_text', 'new_text'))
f1.close()
f2.close()
You can also use pathlib.
from pathlib2 import Path
path = Path(file_to_search)
text = path.read_text()
text = text.replace(text_to_search, replacement_text)
path.write_text(text)
(pip install python-util)
from pyutil import filereplace
filereplace("somefile.txt","abcd","ram")
Will replace all occurences of "abcd" with "ram".
The function also supports regex by specifying regex=True
from pyutil import filereplace
filereplace("somefile.txt","\\w+","ram",regex=True)
Disclaimer: I'm the author (https://github.com/MisterL2/python-util)
Open the file in read mode. Read the file in string format. Replace the text as intended. Close the file. Again open the file in write mode. Finally, write the replaced text to the same file.
try:
with open("file_name", "r+") as text_file:
texts = text_file.read()
texts = texts.replace("to_replace", "replace_string")
with open(file_name, "w") as text_file:
text_file.write(texts)
except FileNotFoundError as f:
print("Could not find the file you are trying to read.")
Late answer, but this is what I use to find and replace inside a text file:
with open("test.txt") as r:
text = r.read().replace("THIS", "THAT")
with open("test.txt", "w") as w:
w.write(text)
DEMO
With a single with block, you can search and replace your text:
with open('file.txt','r+') as f:
filedata = f.read()
filedata = filedata.replace('abc','xyz')
f.truncate(0)
f.write(filedata)
Your problem stems from reading from and writing to the same file. Rather than opening fileToSearch for writing, open an actual temporary file and then after you're done and have closed tempFile, use os.rename to move the new file over fileToSearch.
My variant, one word at a time on the entire file.
I read it into memory.
def replace_word(infile,old_word,new_word):
if not os.path.isfile(infile):
print ("Error on replace_word, not a regular file: "+infile)
sys.exit(1)
f1=open(infile,'r').read()
f2=open(infile,'w')
m=f1.replace(old_word,new_word)
f2.write(m)
Using re.subn it is possible to have more control on the substitution process, such as word splitted over two lines, case-(in)sensitive match. Further, it returns the amount of matches which can be used to avoid waste of resources if the string is not found.
import re
file = # path to file
# they can be also raw string and regex
textToSearch = r'Ha.*O' # here an example with a regex
textToReplace = 'hallo'
# read and replace
with open(file, 'r') as fd:
# sample case-insensitive find-and-replace
text, counter = re.subn(textToSearch, textToReplace, fd.read(), re.I)
# check if there is at least a match
if counter > 0:
# edit the file
with open(file, 'w') as fd:
fd.write(text)
# summary result
print(f'{counter} occurence of "{textToSearch}" were replaced with "{textToReplace}".')
Some regex:
add the re.I flag, short form of re.IGNORECASE, for a case-insensitive match
for multi-line replacement re.subn(r'\n*'.join(textToSearch), textToReplace, fd.read()), depending on the data also '\n{,1}'. Notice that for this case textToSearch must be a pure string, not a regex!
Besides the answers already mentioned, here is an explanation of why you have some random characters at the end:
You are opening the file in r+ mode, not w mode. The key difference is that w mode clears the contents of the file as soon as you open it, whereas r+ doesn't.
This means that if your file content is "123456789" and you write "www" to it, you get "www456789". It overwrites the characters with the new input, but leaves any remaining input untouched.
You can clear a section of the file contents by using truncate(<startPosition>), but you are probably best off saving the updated file content to a string first, then doing truncate(0) and writing it all at once.
Or you can use my library :D
I got the same issue. The problem is that when you load a .txt in a variable you use it like an array of string while it's an array of character.
swapString = []
with open(filepath) as f:
s = f.read()
for each in s:
swapString.append(str(each).replace('this','that'))
s = swapString
print(s)
I tried this and used readlines instead of read
with open('dummy.txt','r') as file:
list = file.readlines()
print(f'before removal {list}')
for i in list[:]:
list.remove(i)
print(f'After removal {list}')
with open('dummy.txt','w+') as f:
for i in list:
f.write(i)
you can use sed or awk or grep in python (with some restrictions). Here is a very simple example. It changes banana to bananatoothpaste in the file. You can edit and use it. ( I tested it worked...note: if you are testing under windows you should install "sed" command and set the path first)
import os
file="a.txt"
oldtext="Banana"
newtext=" BananaToothpaste"
os.system('sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
#print(f'sed -i "s/{oldtext}/{newtext}/g" {file}')
print('This command was applied: sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
if you want to see results on the file directly apply: "type" for windows/ "cat" for linux:
####FOR WINDOWS:
os.popen("type " + file).read()
####FOR LINUX:
os.popen("cat " + file).read()
I have done this:
#!/usr/bin/env python3
import fileinput
import os
Dir = input ("Source directory: ")
os.chdir(Dir)
Filelist = os.listdir()
print('File list: ',Filelist)
NomeFile = input ("Insert file name: ")
CarOr = input ("Text to search: ")
CarNew = input ("New text: ")
with fileinput.FileInput(NomeFile, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(CarOr, CarNew), end='')
file.close ()
I modified Jayram Singh's post slightly in order to replace every instance of a '!' character to a number which I wanted to increment with each instance. Thought it might be helpful to someone who wanted to modify a character that occurred more than once per line and wanted to iterate. Hope that helps someone. PS- I'm very new at coding so apologies if my post is inappropriate in any way, but this worked for me.
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
n = 1
# if word=='!'replace w/ [n] & increment n; else append same word to
# file2
for line in f1:
for word in line:
if word == '!':
f2.write(word.replace('!', f'[{n}]'))
n += 1
else:
f2.write(word)
f1.close()
f2.close()
def word_replace(filename,old,new):
c=0
with open(filename,'r+',encoding ='utf-8') as f:
a=f.read()
b=a.split()
for i in range(0,len(b)):
if b[i]==old:
c=c+1
old=old.center(len(old)+2)
new=new.center(len(new)+2)
d=a.replace(old,new,c)
f.truncate(0)
f.seek(0)
f.write(d)
print('All words have been replaced!!!')
I have worked this out as an exercise of a course: open file, find and replace string and write to a new file.
class Letter:
def __init__(self):
with open("./Input/Names/invited_names.txt", "r") as file:
# read the list of names
list_names = [line.rstrip() for line in file]
with open("./Input/Letters/starting_letter.docx", "r") as f:
# read letter
file_source = f.read()
for name in list_names:
with open(f"./Output/ReadyToSend/LetterTo{name}.docx", "w") as f:
# replace [name] with name of the list in the file
replace_string = file_source.replace('[name]', name)
# write to a new file
f.write(replace_string)
brief = Letter()
Like so:
def find_and_replace(file, word, replacement):
with open(file, 'r+') as f:
text = f.read()
f.write(text.replace(word, replacement))
def findReplace(find, replace):
import os
src = os.path.join(os.getcwd(), os.pardir)
for path, dirs, files in os.walk(os.path.abspath(src)):
for name in files:
if name.endswith('.py'):
filepath = os.path.join(path, name)
with open(filepath) as f:
s = f.read()
s = s.replace(find, replace)
with open(filepath, "w") as f:
f.write(s)

Search text in file and print all text

okay. you didnt understand anything from the title. let me explain.
now ı have a file. There is some text in this file. for example "jack.123 jackie.321"
I want to check if the word jack exists in the file and ı wanna print "jack.123".
its my problem. ı didnt print all text.
def append(name,password):
f = open("myfile.txt", "w")
f.write("{},{}".format(name,password))
append("jack",".123")
append("jackie" , ".321")
f = open("myfile.txt" ,"r")
if "jack" in f.read():
print("query found")
Open the file and read all its contents then split on whitespace. That effectively gives you all the words in the file.
Iterate over the list of words checking to see if a word starts with the name you're searching for followed by '.'.
Note that there may be more than one occurrence so build a list.
def find_name(filename, name):
if not name[-1] == '.':
name += '.'
found = []
with open(filename) as myfile:
for word in myfile.read().split():
if word.startswith(name):
found.append(word)
return found
print(*find_name('myfile.txt', 'jack'))
def new_pass(name, passwd):
"creates file and write name and passwd to it"
with open("myfile1.txt", "a") as f:
f.write(name + "." + passwd + "\n")
new_pass("jack", "123")
new_pass("jack", "183")
new_pass("jack", "129")
new_pass("jack", "223")
def check_word(file, word):
"""checks if a word exists and returns its first occurence """
with open(file) as f:
l = f.read().split("\n")
for i in l:
if i.startswith(word):
print("query found")
return i
print(check_word("myfile1.txt", "jack"))
In python you can do it like that :
with open('file.txt', 'r') as file:
data = file.read()
if "jack" in data:
print("jack")
If I understood uncorrectly let me know

reading a text file, then writing to another text without a user input word

I need help reading through a text file to search for a user input word, then copying data from that text file to another text file without the user input word.
def remove(infile, outfile, target):
for line in infile:
for word in line.split():
if word != target:
outfile.write(word)
def main():
outfile = open('outputFile.txt', 'w')
infile = open('inputFile.txt', 'r')
#Ask user for word they would like to omit from outfile
target = input("What word would you like to delete from input file?: ")
remove(infile, outfile, target)
infile.close()
outfile.close()
main()
The code above works correctly. It searches the first text file for the user input word and writes to a new file, but it doesn't write out with the proper spacing. It is all on one line and doesn't have any spaces. How do I write to the new file with the same spaces, and \n as the original document?
def remove(infile, outfile, target):
for line in infile:
for word in line.split(' '):
if word != target:
outfile.write(word+' ') # You forgot to add the space that was removed while splitting the line.
outfile.write('\n') # you also forgot to add the newline
def main():
outfile = open('outputFile.txt', 'w')
infile = open('inputFile.txt', 'r')
#Ask user for word they would like to omit from outfile
target = input("What word would you like to delete from input file?: ")
remove(infile, outfile, target)
infile.close()
outfile.close()
main()

Find a dot in a text file and add a newline to the file in Python?

I read from a file, if it finds a ".", it should add a newline "\n" to the text and write it back to the file. I tried this code but still have the problem.
inp = open('rawCorpus.txt', 'r')
out = open("testFile.text", "w")
for line in iter(inp):
l = line.split()
if l.endswith(".")
out.write("\n")
s = '\n'.join(l)
print(s)
out.write(str(s))
inp.close()
out.close()
Try This ( Normal way ):
with open("rawCorpus.txt", 'r') as read_file:
raw_data = read_file.readlines()
my_save_data = open("testFile.text", "a")
for lines in raw_data:
if "." in lines:
re_lines = lines.replace(".", ".\r\n")
my_save_data.write(re_lines)
else:
my_save_data.write(lines + "\n")
my_save_data.close()
if your text file is not big you can try this too :
with open("rawCorpus.txt", 'r') as read_file:
raw_data = read_file.read()
re_data = raw_data.replace(".", ".\n")
with open("testFile.text", "w") as save_data:
save_data.write(re_data)
UPDATE ( output new lines depends on your text viewer too! because in some text editors "\n" is a new line but in some others "\r\n" is a new line. ) :
input sample :
This is a book. i love it.
This is a apple. i love it.
This is a laptop. i love it.
This is a pen. i love it.
This is a mobile. i love it.
Code:
last_buffer = []
read_lines = [line.rstrip('\n') for line in open('input.txt')]
my_save_data = open("output.txt", "a")
for lines in read_lines:
re_make_lines = lines.split(".")
for items in re_make_lines:
if items.replace(" ", "") == "":
pass
else:
result = items.strip() + ".\r\n"
my_save_data.write(result)
my_save_data.close()
Ouput Will Be :
This is a book.
i love it.
This is a apple.
i love it.
This is a laptop.
i love it.
This is a pen.
i love it.
This is a mobile.
i love it.
You are overwriting the string s in every loop with s = '\n'.join(l).
Allocate s = '' as empty string before the for-loop and add the new lines during every loop, e.g. with s += '\n'.join(l) (short version of s = s + '\n'.join(l)
This should work:
inp = open('rawCorpus.txt', 'r')
out = open('testFile.text', 'w')
s = '' # empty string
for line in iter(inp):
l = line.split('.')
s += '\n'.join(l) # add new lines to s
print(s)
out.write(str(s))
inp.close()
out.close()
Here is my own solution, but still I want one more newline after ".", that this solution not did this
read_lines = [line.rstrip('\n') for line in open('rawCorpus.txt')]
words = []
my_save_data = open("my_saved_data.txt", "w")
for lines in read_lines:
words.append(lines)
for word in words:
w = word.rstrip().replace('.', '\n.')
w = w.split()
my_save_data.write(str("\n".join(w)))
print("\n".join(w))
my_save_data.close()

PYTHON: search for exact string in text document

I am trying to search a text file/database for an exact username but I am getting multiple outputs for my query
Here is my code:
import re
txtFile = open("test.txt", "r")
userName = raw_input("USER: ")
for line in txtFile:
if re.match(userName, line):
print line
When I enter a generic username like "dragon" I get multiple results:
>>>
USER: dragon
dragon:46.245.173.123
dragonsAreCool:3.13.136.5
How would I make the search exact instead of getting multiple results that include my query?
import re
userName = raw_input("USER: ")
with open("test.txt", "r") as txtFile:
for line in txtFile:
if re.match(userName + ':', line):
print line
break # if you sure there is only one user, add break, make you code faster, else delete it.
or:
for line in txtFile:
if line.startswith(userName + ':')
print line
break # if you sure there is only one user, add break, make you code faster, else delete it.

Categories

Resources