Problems with Python's file.write() method and string handling - python

The problem I am having at this point in time (being new to Python) is writing strings to a text file. The issue I'm experiencing is one where either the strings don't have linebreaks inbetween them or there is a linebreak after every character. Code to follow:
import string, io
FileName = input("Arb file name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
TempFile.write(MyString)
MyFile.close()
TempFile.close()
My input looks like this:
1 Jargon
2 Python
3 Yada Yada
4 Stuck
My output when I do this is:
JargonPythonYada YadaStuck
I then modify the source code to this:
import string, io
FileName = input("Arb File Name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
#print(MyString)
TempFile.write('\n'.join(MyString))
MyFile.close()
TempFile.close()
Same input and my output looks like this:
J
a
r
g
o
nP
y
t
h
o
nY
a
d
a
Y
a
d
aS
t
u
c
k
Ideally, I would like each of the words to appear on a seperate line without the numbers in front of them.
Thanks,
MarleyH

You have to write the '\n' after each line, since you're stripping the original '\n';
Your idea of using '\n'.join() doesn't work because it will use\n to join the string, inserting it between each char of the string. You need a single \n after each name, instead.
import string, io
FileName = input("Arb file name (.txt): ")
with open(FileName, 'r') as MyFile:
with open('TempFile.txt', 'w', encoding='UTF-8') as TempFile:
for line in MyFile:
line = line.strip().split(": ", 1)
TempFile.write(line[1] + '\n')

fileName = input("Arb file name (.txt): ")
tempName = 'TempFile.txt'
with open(fileName) as inf, open(tempName, 'w', encoding='UTF-8') as outf:
for line in inf:
line = line.strip().split(": ", 1)[-1]
#print(line)
outf.write(line + '\n')
Problems:
the result of str.split() is a list (this is why, when you cast it to str, you get ['my item']).
write does not add a newline; if you want one, you have to add it explicitly.

Related

Replace space with underscore using python

I need some help, I have a a txt file with spaces between words, I want to replace the space with underscore.
fileHandler = open('nog_rename_update.txt')
for eachline in fileHandler:
new_name = fileHandler.replace(" ","_")
print(new_name)
That's my code but it keeps throwing error messages
new_name = fileHandler.replace(" ","_")
AttributeError: '_io.TextIOWrapper' object has no attribute 'replace'
example files that I want to remove space and add underscore
Here's a generic approach that should work for you:
teststring = 'hello world this is just a test. don\'t mind me 123.'
# replace multiple spaces with one space
while ' ' in teststring:
teststring = teststring.replace(' ', ' ')
# replace space with underscore (_)
teststring = teststring.replace(' ', '_')
print(teststring)
assert teststring == "hello_world_this_is_just_a_test._don't_mind_me_123." # True
Using a file example:
fname = 'mah_file.txt'
with open(fname) as in_file:
contents = in_file.read()
while ' ' in contents:
contents = contents.replace(' ', ' ')
# write updated contents back to file
with open(fname, 'w') as out_file:
out_file.write(contents.replace(' ', '_'))
This opens the files, reads line by line, splits the line into two parts, and combines the two parts with an underscore. I stored it in a list that you can use to do your next step.
with open('nog_rename_update.txt') as f:
new_list = []
for line in f:
# split the line
split = line.split()
new_list.append(split[0]+"_"+split[1])
# print the list to see results
print(new_list)
#
# add code to loop through the new list and to write to a file
#
Try out this
fileHandler = open('nog_rename_update.txt').read()
new_name = fileHandler.replace(" ", "_")
print(new_name)
f = open("test.txt", "r")
text=f.read()
f.close()
f=open("testfile.txt", "w+")
text2=''
if ' ' in text:
text2 = text.replace(' ' , '_')
print(text2)
f.write(text2)
f.close()
Here is another, less verbose solution. Simply use re.sub:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
with open(file_name, "r") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")
And if you want to replace the spaces in your text file:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
lines = []
with open(file_name, "r") as file:
lines = [
re.sub("( )+", "_", line) for line in file.readlines()
]
with open(file_name, "w") as file:
file.writelines(lines)
Or use fileinput:
import re
import fileinput
file_name = r"D:\projects\playground\python\data\test.txt"
with fileinput.FileInput(file_name, inplace=True, backup=".bak") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")

Remove the last empty line from each text file

I have many text files, and each of them has a empty line at the end. My scripts did not seem to remove them. Can anyone help please?
# python 2.7
import os
import sys
import re
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
all_file = open(filepath,'r')
lines = all_file.readlines()
output = 'F:/WF/new/' + filename
# Read in each row and parse out components
for line in lines:
# Weed out blank lines
line = filter(lambda x: not x.isspace(), lines)
# Write to the new directory
f = open(output,'w')
f.writelines(line)
f.close()
You can use Python's rstrip() function to do this as follows:
filename = "test.txt"
with open(filename) as f_input:
data = f_input.read().rstrip('\n')
with open(filename, 'w') as f_output:
f_output.write(data)
This will remove all empty lines from the end of the file. It will not change the file if there are no empty lines.
you can remove last empty line by using:
with open(filepath, 'r') as f:
data = f.read()
with open(output, 'w') as w:
w.write(data[:-1])
You can try this without using the re module:
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
f = open(filepath).readlines()
new_file = open(filepath, 'w')
new_file.write('')
for i in f[:-1]:
new_file.write(i)
new_file.close()
For each filepath, the code opens the file, reads in its contents line by line, then writes over the file, and lastly writes the contents of f to the file, except for the last element in f, which is the empty line.
You can remove the last blank line by the following command. This worked for me:
file = open(file_path_src,'r')
lines = file.read()
with open(file_path_dst,'w') as f:
for indx, line in enumerate(lines):
f.write(line)
if indx != len(lines) - 1:
f.write('\n')
i think this should work fine
new_file.write(f[:-1])

how to read the content of .txt file using python?

output_filename = r"C:\Users\guage\Output.txt"
RRA:
GREQ-299684_6j
GREQ-299684_6k
CZM:
V-GREQ-299684_6k
V-GREQ-299524_9
F_65624_1
R-GREQ-299680_5
DUN:
FB_71125_1
FR:
VQ-299659_18
VR-GREQ-299659_19
VEQ-299659_28
VR-GREQ-299659_31
VR-GREQ-299659_32
VEQ-299576_1
GED:
VEQ-299622_2
VR-GREQ-299618_13
VR-GREQ-299559_1
VR-GREQ-299524_14
FB_65624_1
VR-GREQ-299645_1
MNT:
FB_71125_1
FB_71125_2
VR-534_4
The above is the content of the the .txt file. how can I read it separately the content of it. for example -
RRA:VR-GREQ-299684_6j VR-GREQ-299684_6k VR-GREQ-299606_3 VR-GREQ-299606_4 VR-GREQ-299606_5 VR-GREQ-299606_7
and save it in a variable or something similar to it. Later I want to read CZM separately and so on. I did as below.
with open(output_filename, 'r') as f:
excel = f.read()
But how to read it separately ? can someone tell me how to do it ?
Something like this:
def read_file_with_custom_record_separator(file_path, delimiter='\n'):
fh = open(file_path)
data = ""
for line in fh:
if line.strip().endswith(delimiter) and data != "":
print "VARIABLE:\n<", data, ">\n"
data = line
else:
data += line
print "LAST VARIABLE:\n<", data, ">\n"
And then:
read_file_with_custom_record_separator("input.txt", ":")
You can make use of the file text : as indicator to create a new file like this:
savefilename = ""
with open(filename, 'r') as f:
for line in f:
line = line.strip() # get rid of the unnecessary white chars
lastchar = line[-1:] # get the last char
if lastchar == ":": # if the last char is ":"
savefilename = line[0:-1] # get file name from line (except the ":")
sf = open(savefilename + ".txt", 'w') # create a new file
else:
sf.write(line + "\n") # write the data to the opened file
Then you should get collection of files:
RRA.txt
CZM.txt
DUN.txt
# etc
which contains all the appropriate data:
RRA.txt
VR-GREQ-299684_6j
VR-GREQ-299684_6k
VR-GREQ-299606_3
VR-GREQ-299606_4
VR-GREQ-299606_5
VR-GREQ-299606_7
CZM.txt
VR-GREQ-299684_6k
VR-GREQ-299606_6
VR-GREQ-299606_8
VR-GREQ-299640_1
VR-GREQ-299640_5
VR-GREQ-299524_9
FB_65624_1
VR-GREQ-299680_5
DUN.txt
FB_71125_1
# and so on
You can replace the sf = open and the sf.write which whatever way you feel best to separate the data. Here, I use files...
You can iterate over the file and use the lines and indices to your advantage; something like this:
with open(output_filename, 'r') as f:
for index, line in enumerate(f):
# here you have access to each line and its index
# so you can save any number of lines you wish
What about reading it into a list, then process its element as you prefer
>>> f = open('myfile.txt', 'r').readlines()
>>> len(f)
46
>>> f[0]
RRA:
>>> f[-1]
VR-GREQ-299534_4
>>> f[:3]
['RRA:\n', 'VR-GREQ-299684_6j \n', 'VR-GREQ-299684_6k \n']
>>>
>>> [l for l in f if l.startswith('FB_')]
['FB_65624_1 \n', 'FB_71125_1 \n', 'FB_69228_1 \n', 'FB_65624_1 \n', 'FB_71125_1 \n', 'FB_71125_2 \n']
>>>

Reading comma separated values from text file in python

I have a text file consisting of 100 records like
fname,lname,subj1,marks1,subj2,marks2,subj3,marks3.
I need to extract and print lname and marks1+marks2+marks3 in python. How do I do that?
I am a beginner in python.
Please help
When I used split, i got an error saying
TypeError: Can't convert 'type' object to str implicitly.
The code was
import sys
file_name = sys.argv[1]
file = open(file_name, 'r')
for line in file:
fname = str.split(str=",", num=line.count(str))
print fname
If you want to do it that way, you were close. Is this what you were trying?
file = open(file_name, 'r')
for line in file.readlines():
fname = line.rstrip().split(',') #using rstrip to remove the \n
print fname
Note: its not a tested code. but it tries to solve your problem. Please give it a try
import csv
with open(file_name, 'rb') as csvfile:
marksReader = csv.reader(csvfile)
for row in marksReader:
if len(row) < 8: # 8 is the number of columns in your file.
# row has some missing columns or empty
continue
# Unpack columns of row; you can also do like fname = row[0] and lname = row[1] and so on ...
(fname,lname,subj1,marks1,subj2,marks2,subj3,marks3) = *row
# you can use float in place of int if marks contains decimals
totalMarks = int(marks1) + int(marks2) + int(marks3)
print '%s %s scored: %s'%(fname, lname, totalMarks)
print 'End.'
"""
sample file content
poohpool#signet.com; meixin_kok#hotmail.com; ngai_nicole#hotmail.com; isabelle_gal#hotmail.com; michelle-878#hotmail.com;
valerietan98#gmail.com; remuskan#hotmail.com; genevieve.goh#hotmail.com; poonzheng5798#yahoo.com; burgergirl96#hotmail.com;
insyirah_powergals#hotmail.com; little_princess-angel#hotmail.com; ifah_duff#hotmail.com; tweety_butt#hotmail.com;
choco_ela#hotmail.com; princessdyanah#hotmail.com;
"""
import pandas as pd
file = open('emaildump.txt', 'r')
for line in file.readlines():
fname = line.split(';') #using split to form a list
#print(fname)
df1 = pd.DataFrame(fname,columns=['Email'])
print(df1)

Python Regex Find Pattern, Remove Rest of String, Write new String to File

I have the codez:
import re
pattern = ','
firstNames = "dictionary//first_names.txt"
new_file = []
def openTxtFile(txtFile):
file = open (txtFile,"r")
data = file.read()
print (data)
file.close
def parseTextFile(textFile):
openTxtFile(firstNames)
for line in lines:
match = re.search(pattern, line)
if match:
new_line = match.group() + '\n'
print (new_line)
new_file.append(new_line)
with open(firstNames, 'w') as f:
f.seek(0)
f.writelines(new_file)
I am trying to take the original file, match it on a "," and return line by line to a New file the string before the "," I'm having trouble putting all this together, thanks!
Use the csv module, since your original file is comma separated:
import csv
with open('input_file.txt') as f:
reader = csv.reader(f)
names = [line[0] for line in reader]
with open('new_file.txt','w') as f:
for name in names:
f.write('{0}\n'.format(name))

Categories

Resources