Replace space with underscore using python - python

I need some help, I have a a txt file with spaces between words, I want to replace the space with underscore.
fileHandler = open('nog_rename_update.txt')
for eachline in fileHandler:
new_name = fileHandler.replace(" ","_")
print(new_name)
That's my code but it keeps throwing error messages
new_name = fileHandler.replace(" ","_")
AttributeError: '_io.TextIOWrapper' object has no attribute 'replace'
example files that I want to remove space and add underscore

Here's a generic approach that should work for you:
teststring = 'hello world this is just a test. don\'t mind me 123.'
# replace multiple spaces with one space
while ' ' in teststring:
teststring = teststring.replace(' ', ' ')
# replace space with underscore (_)
teststring = teststring.replace(' ', '_')
print(teststring)
assert teststring == "hello_world_this_is_just_a_test._don't_mind_me_123." # True
Using a file example:
fname = 'mah_file.txt'
with open(fname) as in_file:
contents = in_file.read()
while ' ' in contents:
contents = contents.replace(' ', ' ')
# write updated contents back to file
with open(fname, 'w') as out_file:
out_file.write(contents.replace(' ', '_'))

This opens the files, reads line by line, splits the line into two parts, and combines the two parts with an underscore. I stored it in a list that you can use to do your next step.
with open('nog_rename_update.txt') as f:
new_list = []
for line in f:
# split the line
split = line.split()
new_list.append(split[0]+"_"+split[1])
# print the list to see results
print(new_list)
#
# add code to loop through the new list and to write to a file
#

Try out this
fileHandler = open('nog_rename_update.txt').read()
new_name = fileHandler.replace(" ", "_")
print(new_name)

f = open("test.txt", "r")
text=f.read()
f.close()
f=open("testfile.txt", "w+")
text2=''
if ' ' in text:
text2 = text.replace(' ' , '_')
print(text2)
f.write(text2)
f.close()

Here is another, less verbose solution. Simply use re.sub:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
with open(file_name, "r") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")
And if you want to replace the spaces in your text file:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
lines = []
with open(file_name, "r") as file:
lines = [
re.sub("( )+", "_", line) for line in file.readlines()
]
with open(file_name, "w") as file:
file.writelines(lines)
Or use fileinput:
import re
import fileinput
file_name = r"D:\projects\playground\python\data\test.txt"
with fileinput.FileInput(file_name, inplace=True, backup=".bak") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")

Related

Python to read txt files and delete lines that contains same part

I have a tons (1000+) of txt files that looks like this
TextTextText('aaa/bbb`ccc' , "ddd.eee");
TextTextText('yyy/iii`ooo' , "rrr.ttt");
TextTextText('aaa/fff`ggg' , "hhh.jjj");
What I want to achieve is to delete all lines that contains same "aaa" part, and leave only one line with it (remove all duplicates).
my code so far:
import os
from collections import Counter
sourcepath = os.listdir('Process_Directory3/')
for file in sourcepath:
inputfile = 'Process_Directory3/' + file
outputfile = "Output_Directory/" + file
lines_seen = set()
outfile = open(outputfile, "w")
for line in open(inputfile, "r"):
print(line)
cut_line = line.split("'")
new_line = cut_line[1]
cut_line1 = new_line.split("/")
new_line1 = cut_line1[0]
if new_line1 not in lines_seen:
outfile.write(new_line1)
lines_seen.add(new_line1)
outfile.close()
My code is not working at all, I dont get any results
Console Report:
Line13 in <module>
new_line = cut_line[1]
IndexError: list index out of range
Sorry for my bad writing, it's my first post so far :D
Best Regards
Update:
I added
startPattern = "TextTextText"
if(startPattern in line):
to make sure i target only lines that begins with "TextTextText", but for some reason I am getting .txt in destination folder that contains only 1 line of content "aaa".
In the end of the day, here is a fully working code:
import os
sourcepath = os.listdir('Process_Directory3/')
for file in sourcepath:
inputfile = 'Process_Directory3/' + file
outputfile = "Output_Directory/" + file
lines_seen = set()
outfile = open(outputfile, "w")
for line in open(inputfile, "r"):
if line.startswith("TextTextText"):
try:
cut_line = line.split("'")
new_line = cut_line[1]
cut_line1 = new_line.split("/")
new_line1 = cut_line1[0]
if new_line1 not in lines_seen:
outfile.write(line)
lines_seen.add(new_line1)
except:
pass
else:
outfile.write(line)
outfile.close()
Thanks for a great help guys!
Use a try-except block in inner for loop. This will prevent your program from being interrupted if any error is encountered due to any line which doesn't contain ' or /.
Update:
I've tried the code given below and it worked fine for me.
sourcepath = os.listdir('Process_Directory3/')
for file in sourcepath:
inputfile = 'Process_Directory3/' + file
outputfile = "Output_Directory/" + file
lines_seen = set()
outfile = open(outputfile, "w")
for line in open(inputfile, "r"):
try:
cut_line = line.split("'")
new_line = cut_line[1]
cut_line1 = new_line.split("/")
new_line1 = cut_line1[0]
if new_line1 not in lines_seen:
outfile.write(line) # Replaced new_line1 with line
lines_seen.add(new_line1)
except:
pass
outfile.close() # This line was having bad indentation
Input file:
TextTextText('aaa/bbb`ccc' , "ddd.eee");
TextTextText('yyy/iii`ooo' , "rrr.ttt");
TextTextText('aaa/fff`ggg' , "hhh.jjj");
TextTextText('WWW/fff`ggg' , "hhh.jjj");
TextTextText('yyy/iii`ooo' , "rrr.ttt");
Output File:
TextTextText('aaa/bbb`ccc' , "ddd.eee");
TextTextText('yyy/iii`ooo' , "rrr.ttt");
TextTextText('WWW/fff`ggg' , "hhh.jjj");
It looks like you encountered line inside your file which has not ', in this case line.split("'") produce list with single element, for example
line = "blah blah blah"
cut_line = line.split("'")
print(cut_line) # ['blah blah blah']
so trying to get cut_line[1] result in error as there is only cut_line[0]. As this piece of your code is inside loop you might avoid that by skipping to next iteration using continue word, if cut_line has not enough elements, just replace:
cut_line = line.split("'")
new_line = cut_line[1]
by:
cut_line = line.split("'")
if len(cut_line) < 2:
continue
new_line = cut_line[1]
This will result in ignoring all lines without '.
I think using a regular expression would make it easier. I have made a simplified working code using re.
import re
lines = [
"",
"dfdsa sadfsadf sa",
"TextTextText('aaa/bbb`ccc' ,dsafdsafsA ",
"TextTextText('yyy/iii`ooo' ,SDFSDFSDFSA ",
"TextTextText('aaa/fff`ggg' ,SDFSADFSDF ",
]
lines_seen = set()
out_lines = []
for line in lines:
# SEARCH FOR 'xxx/ TEXT in the line -----------------------------------
re_result = re.findall(r"'[a-z]+\/", line)
if re_result:
print(f're_result {re_result[0]}')
if re_result[0] not in lines_seen:
print(f'>>> newly found {re_result[0]}')
lines_seen.add(re_result[0])
out_lines.append(line)
print('------------')
for line in out_lines:
print(line)
Result
re_result 'aaa/
>>> newly found 'aaa/
re_result 'yyy/
>>> newly found 'yyy/
re_result 'aaa/
------------
TextTextText('aaa/bbb`ccc' ,dsafdsafsA
TextTextText('yyy/iii`ooo' ,SDFSDFSDFSA
You can experiment with regular expressions here regex101.com.
Try r"'.+/" any character between ' and /, or r"'[a-zA-Z]+/" lower and uppercase letters between ' and /.

I want to add a string in between of 2 strings in file, but in output whole text is getting appended at the end of file

Code :
fo = open("backup.txt", "r")
filedata = fo.read()
with open("backup.txt", "ab") as file :
file.write(filedata[filedata.index('happy'):] + " appending text " + filedata[:filedata.rindex('ending')])
with open("backup.txt", "r") as file :
print "In meddival : \n",file.read()
Expected Output :
I noticed that every now and then I need to Google fopen all over again. happy appending text ending
Actual output :
I noticed that every now and then I need to Google fopen all over again. happy endinghappy ending appending text I noticed that every now and then I need to Google fopen all over again. happy
Okay, this will definitely fix your problem.
fo = open("backup.txt", "r")
filedata = fo.read()
ix = filedata.index('ending')
new_str = ' '.join([filedata[:ix], 'appending text', filedata[ix:]])
with open("backup.txt", "ab") as file:
file.write(new_str)
with open("backup.txt", "r") as file :
print "In meddival : \n",file.read()
As you can see, I am getting the index of the beginning of the ending word.
Then, I use join to make push in the appending text between happy and ending.
Note You're adding to your file another line with the changes you've made. To override the old line, replace the a with w in the with open("backup.txt", "ab")...
There are more ways for doing that
You can split the string to words, find the index of the 'ending' word and insert the 'appending text' before it.
text_list = filedata.split()
ix = text_list.index('ending')
text_list.insert(ix, 'appending text')
new_str = ' '.join(text_list)
You can also do this one:
word = 'happy'
text_parts = filedata.split('happy')
appending_text = ' '.join(word, 'appending text')
new_str = appending_text.join(text_parts)
You need to split your file content
fo = open("backup.txt", "r")
filedata = fo.read().split()
with open("backup.txt", "ab") as file:
file.write(' '.join(filedata[filedata.index('happy'):]) + " appending text " + ' '.join(filedata[:filedata.index('ending')]))
with open("backup.txt", "r") as file :
print "In meddival : \n",file.read()

Adding filename to last column in csv using python

I have a folder full of .mpt files, each of them having the same data format.
I need to delete the first 57 lines from all files and append these files into one csv - output.csv.
I have that section already:
import glob
import os
dir_name = 'path name'
lines_to_ignore = 57
input_file_format = '*.mpt'
output_file_name = "output.csv"
def convert():
files = glob.glob(os.path.join(dir_name, input_file_format))
with open(os.path.join(dir_name, output_file_name), 'w') as out_file:
for f in files:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
out_file.write(i)
print("working")
convert()
print("done")
This part works ok.
how do i add the filename of each .mpt file as the last column of the output.csv
Thank you!
This is a quick 'n dirty solution.
In this loop the variable i is just a string (a line from a CSV file):
for i in content:
out_file.write(i)
So you just need to 1) strip off the end of line character(s) (either "\n" or "\r\n") and append ",".
If you're using Unix, try:
for i in content:
i = i.rstrip("\n") + "," + output_file_name + "\n"
out_file.write(i)
This assumes that the field separator is a comma. Another option is:
for i in content:
i = i.rstrip() + "," + output_file_name
print >>out_file, i
This will strip all white space from the end of i.
Add quotes if you need to quote the output file name:
i = i.rstrip(...) + ',"' + output_file_name '"'
The relevant part:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
new_line = ",".join([i.rstrip(), f]) + "\n" #<-- this is new
out_file.write(new_line) #<-- this is new

Python Regex Find Pattern, Remove Rest of String, Write new String to File

I have the codez:
import re
pattern = ','
firstNames = "dictionary//first_names.txt"
new_file = []
def openTxtFile(txtFile):
file = open (txtFile,"r")
data = file.read()
print (data)
file.close
def parseTextFile(textFile):
openTxtFile(firstNames)
for line in lines:
match = re.search(pattern, line)
if match:
new_line = match.group() + '\n'
print (new_line)
new_file.append(new_line)
with open(firstNames, 'w') as f:
f.seek(0)
f.writelines(new_file)
I am trying to take the original file, match it on a "," and return line by line to a New file the string before the "," I'm having trouble putting all this together, thanks!
Use the csv module, since your original file is comma separated:
import csv
with open('input_file.txt') as f:
reader = csv.reader(f)
names = [line[0] for line in reader]
with open('new_file.txt','w') as f:
for name in names:
f.write('{0}\n'.format(name))

Problems with Python's file.write() method and string handling

The problem I am having at this point in time (being new to Python) is writing strings to a text file. The issue I'm experiencing is one where either the strings don't have linebreaks inbetween them or there is a linebreak after every character. Code to follow:
import string, io
FileName = input("Arb file name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
TempFile.write(MyString)
MyFile.close()
TempFile.close()
My input looks like this:
1 Jargon
2 Python
3 Yada Yada
4 Stuck
My output when I do this is:
JargonPythonYada YadaStuck
I then modify the source code to this:
import string, io
FileName = input("Arb File Name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
#print(MyString)
TempFile.write('\n'.join(MyString))
MyFile.close()
TempFile.close()
Same input and my output looks like this:
J
a
r
g
o
nP
y
t
h
o
nY
a
d
a
Y
a
d
aS
t
u
c
k
Ideally, I would like each of the words to appear on a seperate line without the numbers in front of them.
Thanks,
MarleyH
You have to write the '\n' after each line, since you're stripping the original '\n';
Your idea of using '\n'.join() doesn't work because it will use\n to join the string, inserting it between each char of the string. You need a single \n after each name, instead.
import string, io
FileName = input("Arb file name (.txt): ")
with open(FileName, 'r') as MyFile:
with open('TempFile.txt', 'w', encoding='UTF-8') as TempFile:
for line in MyFile:
line = line.strip().split(": ", 1)
TempFile.write(line[1] + '\n')
fileName = input("Arb file name (.txt): ")
tempName = 'TempFile.txt'
with open(fileName) as inf, open(tempName, 'w', encoding='UTF-8') as outf:
for line in inf:
line = line.strip().split(": ", 1)[-1]
#print(line)
outf.write(line + '\n')
Problems:
the result of str.split() is a list (this is why, when you cast it to str, you get ['my item']).
write does not add a newline; if you want one, you have to add it explicitly.

Categories

Resources