Python: Filter in File I/O - python

I'm trying to take out the strings "a" and "b" from a document. Here's what I am doing but it's not working because I can't use replace in a list.
def filter_ab(filename):
fileRef=open(filename)
file_list=fileRef.readlines()
filter="ab"
for k in file_list:
for j in k:
if j in filter:
file_list=file_list.replace(j,"")

Can you use something like this:
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
for line in f1:
f2.write(line.replace('a', '').replace('b', ''))
f1.close()
f2.close()

Just use this:
def filter_ab(filename):
lines = []
with open(filename, "r") as fh:
for line in fh.readlines():
line = line.replace("a", "")
line = line.replace("b", "")
lines.append(line)
with open(filename, "w") as fh:
for line in lines:
fh.write(line)

str.translate is actually quite convenient for something like this
with open('file1.txt', 'r') as f1, open('file2.txt', 'w') as f2:
for line in f1:
f2.write(line.translate(None, 'ab'))

Related

How do you replace a specific line of a file with python?

line_row = -1
file = open(file_path, 'r')
for number_of_lines in file:
line_row = line_row + 1
if '1234' in number_of_lines:
lines = file.readlines()
line = lines[line_row]
print(lines)
lines[line_row] = 'hello'
file = open(file_path, "w")
file.writelines(lines)
file.close()
When I run this, it will delete everything that is before the nth line. I want it to replace only the nth line. can you help me?
try this, using enumerate
with open(file_path, 'r') as f:
lines = f.readlines()
for i, line in enumerate(lines):
if "some text" in line:
lines[i] = "updated text"
with open(file_path, "w") as f:
f.writelines(lines)

I need to search a string in data file. but the string is written in another file

I have key words to be search in one file let say abc.txt and in another file I have my data, def.txt.
I want a code in python to find key words written in abc.txt, in def.txt and if present, print those line in a new file.
Thank you.
I tried writing a code but it didn't work.
following is the code I write.
f = open('/home/vivek/Documents/abc.txt')
f1 = open('output.txt', 'a')
f2 = open('/home/vivek/Documents/def.txt', 'r')
# doIHaveToCopyTheLine=False
for line in f.readlines():
if f2 in line:
f1.write(line)
f1.close()
f.close()
f2.close()
Load the keywords into a list then you can check the other file line-by-line, and write to outfile as you find keywords in the line.
with open('/path/to/keywords.txt') as f:
keywords = set(line.strip() for line in f) # assuming words are separated by line
with open('/path/to/search_me.txt') as f, open('/path/to/outfile.txt', 'w') as outfile:
for line in f:
if any(kw in line for kw in keywords):
outfile.write(line)
You should record all the words in abc.txt use a set and then search them in def.txt
word_set = set()
with open('/home/vivek/Documents/abc.txt') as f:
for line in f:
word_set.add(line.strip())
f1 = open('output.txt', 'a')
with open('/home/vivek/Documents/def.txt') as f:
for line in f:
find = False
for word in word_set:
if word in line:
find = True
break
if find:
f1.write(line)
f1.close()
You can try this code:
with open("keyword.txt", "r") as keyword_file:
keywords = keyword_file.read().strip()
keywords = keywords.split()
with open("data.txt", "r") as data_file, open("output.txt", "w") as output_file:
for line in data_file.readlines():
line = line.strip()
for word in keywords:
if line.find(word) != -1:
print line
output_file.writelines(line + '\n')
break
In addition to sytech's answer you may try this:
with open('def.txt') as kw_obj, open('abc.txt') as in_obj:
keywords = set(kw_obj.read().split())
in_lines = in_obj.readlines()
match_lines = [line for keyword in keywords for line in in_lines if keyword in line]
if match_lines:
with open('out.txt', 'w') as out:
out.write(''.join(match_lines))

Need to delete a specific word from a specific line in Python

My textfile is like this:
"A rainbow has seven colors.
A rainbow rises just after the rainfall."
I want the word "rainbow" to be deleted from the 1st line but not from the second line and in the new file all the lines to be printed as it is without the rainbow in the first line. With a code like this:
infile = "old.txt"
outfile = "new.txt"
delete_word = ["rainbow"]
fin = open(infile)
fout = open(outfile, "w+")
for line in fin:
for word in delete_word:
if 'has' in line:
line = line.replace(word, "")
fout.write(line)
fin.close()
fout.close()
I am getting only the first line printed without 'rainbow'.
infile = "old.txt"
outfile = "new.txt"
delete_word = "rainbow"
with open(infile, "r") as fin, open(outfile, "a") as fout:
for line in fin:
for word == delete_word:
line = line.replace(word, "")
fout.write(line)
Try:
infile = "old.txt"
outfile = "new.txt"
delete_word = ["rainbow"]
lines = [1] #List of lines you want to delete the word from
fin = open(infile)
fout = open(outfile, "w+")
line_count = 0
for line in fin:
line_count += 1
for word in line.split():
if word in delete_word and line_count in lines:
line = line.replace(word, "")
fout.write(line)
fin.close()
fout.close()

find and replace whole line python

I have a file like this..
xxxxxxxxxxxxxxx
xxxxxxxxxxxxxxx
xxxxxxxxxxxxxxx
a b c invalid #seperated by tab
xxxxxxxxxxxxxxx
xxxxxxxxxxxxxxx
I need to replace a b c invalid to a b reviewed rd # separated by tab
Basically any line that ends with invalid, I need to replace that line with reviewed rd // separated by tab but I have to keep the first and second words on that line (only replace 3rd and 4th).
I have started doing something like this, but this won't exactly do what I want.
f1 = open('fileInput', 'r')
f2 = open('fileInput'+".tmp", 'w')
for line in f1:
f2.write(line.replace('invalid', ' reviewed'+\t+'rd'))
f1.close()
f2.close()
regex can be an option but I'm not that good with it yet. Can someone help.
P.S. a,b and c's are variables.. I can't do an exact search on 'a','b','c'.
f1 = open('fileInput', 'r')
f2 = open('fileInput+".tmp"', 'w')
for line in f1:
if line[:-1].endswith("invalid"):
f2.write("\t".join(line.split("\t")[:2] + ["reviewed", "rd"]) + "\n")
else:
f2.write(line)
f1.close()
f2.close()
import re
pattern = re.compile(r'\t\S+\tinvalid$')
with open('data') as fin:
with open('output', 'w') as fout:
for line in fin:
fout.write(pattern.sub('\treviewd\trd', line))
with open('input.tab') as fin, open('output.tab', 'wb') as fout:
tabin = csv.reader(fin, delimiter='\t')
tabout = csv.writer(fout, delimiter='\t')
for row in tabin:
if len(tabin) != 4:
continue # or raise - whatever
if row[-1] == 'invalid':
tabout.writerow(row[:2] + ['reviewed', 'rd'])

Return lines from file in order of a list

I've tried to put together a solution from similar questions but have failed miserably. I just don't know enough about Python yet :(
I have an inputlist containing elements in a particular order ex: ["GRE", "KIN", "ERD", "KIN"]
I have a datafile containing the elements, plus other data ex:
"ERD","Data","Data"...
"KIN","Data","Data"...
"FAC","Data","Data"...
"GRE","Data","Data"...
I need to create an outputlist that contains the lines from the datafile in the order they appear in the inputlist.
The code below returns the outputlist in the order the appear in the datafile, which is not the intended behavior... :-\
with open(inputfile, 'r') as f:
names = [line.strip() for line in f]
outputlist = []
with open(datafile, 'r') as f:
for line in f:
name = line.split(',')[0]
if name[1:-1] in names:
outputlist.append(line)
output = open(outputfile, 'w')
output.writelines(outputlist)
How can I have it return the list in the proper order? Thanks in advance for your help :-)
Edit
Thank's to Oscar, this is the solution I implemented:
datafile = 'C:\\testing\\bldglist.txt'
inputfile = 'C:\\testing\\inputlist.txt'
outputfile = "C:\\testing\\output.txt"
with open(inputfile, 'r') as f:
inputlist = [line.strip() for line in f]
def outputList(inputlist, datafile, outputfile):
d = {}
with open(datafile, 'r') as f:
for line in f:
line = line.strip()
key = line.split(',')[0]
d[key] = line
with open(outputfile, 'w') as f:
f.write('"Abbrev","Xcoord","Ycoord"\n')
for key in inputlist:
f.write(d[key] + '\n')
outputList(inputlist, datafile, outputfile)
This is the easy solution. It reads the entire input file into memory as a dictionary of first letter: line. It's then easy to write the lines in the write order.
If the file is very large (gigabytes) or you don't have a lot of memory, there are other ways. But they're not nearly as nice.
I haven't tested this.
import csv
data = {}
with open(datafile) as f:
for line in csv.reader(f):
data[line[0]] = line
with open(outputfile, "w") as f:
f = csv.writer(f)
for entry in inputlist:
f.writerow(data[entry])
Assuming a data file with this format:
"ERD","Data","Data"...
"KIN","Data","Data"...
"FAC","Data","Data"...
"GRE","Data","Data"...
Try this solution:
def outputList(inputlist, datafile, outputfile):
d = {}
with open(datafile, 'r') as f:
for line in f:
line = line.lstrip()
key = line.split(',')[0]
d[key] = line
with open(outputfile, 'w') as f:
for key in inputlist:
f.write(d[key])
Use it like this:
outputList(['"GRE"', '"KIN"', '"ERD"', '"KIN"'],
'/path/to/datafile',
'/path/to/outputfile')
It will write the output file with the expected order.
1) Create a list with the elements you wish to map to. In this case, ["GRE", "KIN", "ERD", "FAC"]
2) Read the file and map (using a dictionary of lists) the first elements.
3) Output to a file.
import csv
out_index=["GRE", "KIN", "ERD", "FAC"]
d={}
with open('/Users/andrew/bin/SO/abcd.txt','r') as fr:
for e in csv.reader(fr):
if e[0] not in d: d[e[0]]=[]
for ea in e[1:]:
d[e[0]].append(ea)
for i in out_index:
print i,":"
for e in d[i]:
print ' ',e
Given this example data:
"ERD","Data-a1","Data-a2"
"KIN","Data-b1","Data-b2"
"FAC","Data-c1","Data-c2"
"GRE","Data-d1","Data-d2"
"ERD","Data-a3","Data-a4"
"GRE","Data-d3","Data-d4"
Output:
GRE :
Data-d1
Data-d2
Data-d3
Data-d4
KIN :
Data-b1
Data-b2
ERD :
Data-a1
Data-a2
Data-a3
Data-a4
FAC :
Data-c1
Data-c2
Done!

Categories

Resources