isolate and work with certain lines in text file - python

I'm using python. I've got a text file that has the following format:
###########
text lines
###########
text lines
###########
text lines
###########
I want to run an algorithm for each segment found between two "######" lines.
How do I reference the text lines between two "######" lines.
The number of lines between two "######" lines is not fixed.
thanks

You can do this quite easily with split():
with open('myfile.txt') as f:
data = f.read().split('###########')
print([txt.strip() for txt in data if txt])

Would this work?
f = open("textfile.txt")
for line in f.readlines():
if '######' not in line:
print(line) # evaluate and process the line here

Or how about this:
with open("f.txt") as f:
print(''.join(x for x in filter(lambda x: x != '###########', f.read().split("\n"))))

file.txt:
###########
line1
#
line2
######
line3
line4
line5
##########
line6
line7
code:
import re
FILE_PATH = "file.txt"
blocks = []
lines = []
with open(FILE_PATH) as file:
for line in file:
if re.fullmatch(r"#+(\n){0,1}", line):
if lines:
blocks.append(lines)
lines = []
else:
lines.append(line.rstrip("\n")) # remove rstrip if you want to keep a new line at the end of a line
# store last block (if file does not end with #+ line)
if lines:
blocks.append(lines)
print(blocks) # [['line1'], ['line2'], ['line3', 'line4', 'line5'], ['line6', 'line7']]

Considering "######" doesn't contain "\n":
def get_chunks(filename):
with open(filename) as f:
return f.read().split('#'*6)[2:-1:2]
else:
import re
def get_chunks(filename):
with open(filename) as f:
return re.split(r'(#\n?){6}', f.read())[4:-1:4]

Related

Extract blocks of text that starts with "Start Text" until it encounters another "Start Text"

Here is the code I have to extract blocks of text of a file that starts with "Start Text" until it encounters another "Start Text".
with open('temp.txt', "r") as f:
buff = []
i = 1
for line in f:
if line.strip(): skips the empty lines
buff.append(line)
if line.startswith("Start Text"):
output = open('file' + '%d.txt' % i, 'w')
output.write(''.join(buff))
output.close()
i += 1
buff = [] # buffer reset
INPUT: "temp.txt" has the following structure:
Start Text - ABCD
line1
line2
line3
Start Text - EFG
line4
Start Text - P3456
line5
line6
DESIRED OUTPUT: I am trying to create multiple text files below with extracted blocks of texts.
file1.txt
Start Text - ABCD
line1
line2
line3
file2.txt
Start Text - EFG
line4
file3.txt
Start Text - P3456
line5
line6
UNDESIRED OUTPUT (What the code produces)
file1.txt
Start Text - ABCD
file2.txt
Start Text - EFG
line1
line2
line3
file3.txt
line4
Start Text - P3456
Here is the issue I am facing. The code creates three files but does not write “Start Text” lines into their respective text blocks. I am not sure what I am missing. I will appreciate any pointers.
When the code sees "Start Text" in a line, it writes that line and all the previous lines to the output file.
This explains why the first output file contains only the header -- that is the first line in the input file, so obviously there aren't any previous lines.
It seems like what you really want is for the header and the following lines to be written.
I've updated your code to not write a file after seeing the very first header, and also to write a file after the input file is exhausted.
buff = []
i = 1
with open('temp.txt', "r") as f:
for line in f:
if line.startswith("Start Text"):
# write a file only if buff isn't empty. (if it is
# empty, this must be the very first header, so we
# don't need to write an output file yet)
if buff:
output = open('file' + '%d.txt' % i, 'w')
output.write(''.join(buff))
output.close()
i += 1
buff = [] # buffer reset
if line.strip():
buff.append(line)
# write the final section
if buff:
output = open('file' + '%d.txt' % i, 'w')
output.write(''.join(buff))
output.close()
You're almost there. See how when you check for startswith(), then write out the buffer, and clean the buffer. As it returns to the loop, if hasn't stored the line when entering this if statement - this line is lost. Try adding it to the new buffer for the next round of lines.
...
buff = [] # buffer reset
buff.append(line) # add 'Start Text' line to next buffer
Note that your code currently will never write out the final block of text. Consider to write out the last buffer as well (i.e., when no line is left).
[EDIT after question edit]
As the other answer replies, the check for startswith() causes a write to file after the line is found. However, the line has already been added to the buffer. Try switching the statements, to first detect the startswith, then write everything out if it was the case (if the buffer is not empty!), then continue by adding the line to the buffer.
(the note still stands)
def parse_file(fname):
with open(fname, "r") as f:
buff = []
for line in f:
if line.strip(): # skips the empty lines
if line.startswith("Start Text") and buff:
yield ''.join(buff)
buff = []
buff.append(line)
for idx, data in enumerate(parse_file('sample.txt'), start=1):
with open(f'file{idx}.txt', 'w') as f:
f.write(data)
I don't think you need to build a buffer. You can just process line by line as you iterate over the input file.
class MyTempFile():
def __init__(self):
self.fd = None
self.newfile = None
def __enter__(self):
return self
def __exit__(self, *args):
self.closefd()
self.newfile = None
def closefd(self):
if self.fd is not None:
self.fd.close()
self.fd = None
def newfile_impl(self):
i = 0
while True:
self.closefd()
i += 1
self.fd = open(f'temp{i}.txt', 'w')
yield
def write(self, data):
if self.fd is not None and data.strip():
self.fd.write(data)
def next_file(self):
if self.newfile is None:
self.newfile = self.newfile_impl()
next(self.newfile)
with MyTempFile() as mtf:
with open('temp.txt') as infile:
for line in infile:
if line.startswith('Start Text'):
mtf.next_file()
mtf.write(line)

How to read line in text file and replace the whole line in Python?

I want to replace a whole line in a text document, if there is a line that begins with "truck_placement"
Can I remove the whole line when it contains "truck_placement" and then write the new text?
I tried it but it only inserts the new text und doesn't replace the whole line.
Thats the current code:
cordget = coordinatesentry.get()
fin = open(save_file,"r")
filedata = fin.read()
fin.close
newdata = filedata.replace("truck_placement: " , "truck_placement: " + cordget)
fin = open(save_file, "w")
fin.write(newdata)
fin.close
Your best bet is to append all the lines without "truck_placement" to a new file. This can be done with the following code:
original = open("truck.txt","r")
new = open("new_truck.txt","a")
for line in original:
if "truck_placement" not in line:
new.write(line)
original.close()
new.close()
You can either read the whole file into one string and replace the line using regular expression:
import re
cordget = "(value, one) (value, two)"
save_file = "sample.txt"
with open(save_file, "r") as f:
data = f.read()
# Catch the line from "truck_placement: " until the newline character ('\n')
# and replace it with the second argument, where '\1' the catched group
# "truck_placement: " is.
data = re.sub(r'(truck_placement: ).*\n', r'\1%s\n' % cordget, data)
with open(save_file, "w") as f:
f.writelines(data)
Or you could read the file as a list of all lines and overwrite the specific line:
cordget = "(value, one) (value, two)"
save_file = "sample.txt"
with open(save_file, "r") as f:
data = f.readlines()
for index, line in enumerate(data):
if "truck_placement" in line:
data[index] = f"truck_placement: {cordget}\n"
with open(save_file, "w") as f:
f.writelines(data)

Open a JS file and edit a line with Python

I'm trying to modify a specific line in a js file using python.
Here's the js file :
...
hide: [""]
...
Here's my python code :
with open('./config.js','r') as f:
lines = f.readlines()
with open('./config.js','w') as f:
for line in lines:
line = line.replace('hide', 'something')
f.write(line)
So it works but this is not what I want to do.
I want to write 'something' between the brackets and not replace 'hide'.
So I don't know how to do it: Do I have to replace the whole line or can I just add a word between the brackets?
Thanks
If you want to replace text at this exact line you could just do:
with open('./config.js','r') as f:
lines = f.readlines()
with open('./config.js','w') as f:
  new_value = 'Something New'
for line in lines:
if line.startswith('hide'):
line = 'hide: ["{}"]'.format(new_value)
f.write(line)
or alternatively in the conditional
if line.startswith('hide'):
line = line.replace('""', '"Something new"')
Here's way to replace any value in brackets for hide that starts with any spacing.
lines = '''\
first line
hide: [""]
hide: ["something"]
last line\
'''
new_value = 'new value'
for line in lines.splitlines():
if line.strip().startswith('hide'):
line = line[:line.index('[')+2] + new_value + line[line.index(']')-1:]
print(line)
Output:
first line
hide: ["new value"]
hide: ["new value"]
last line
You can use fileinput and replace it inplace:
import fileinput
import sys
def replaceAll(file,searchExp,replaceExp):
for line in fileinput.input(file, inplace=1):
if searchExp in line:
line = line.replace(searchExp,replaceExp)
sys.stdout.write(line)
replaceAll("config.js",'hide: [""]','hide: ["something"]')
Reference
If hide: [""] is not ambiguous, you could simply load the whole file, replace and write it back:
newline = 'Something new'
with open('./config.js','r') as f:
txt = f.read()
txt = txt.replace('hide: [""]', 'hide: ["' + newline + '"]')
with open('./config.js','w') as f:
f.write(txt)
As long as you don't have "hide" anywhere else in the file, then you could just do
with open('/config.js','r') as f:
lines = f.readlines()
with open('./config.js','w') as f:
for line in lines:
line = line.replace('hide [""]', 'hide ["something"]')
f.write(line)
You can do this using re.sub()
import re
with open('./config.js','r') as f:
lines = f.readlines()
with open('./config.js','w') as f:
for line in lines:
line = re.sub(r'(\[")("\])', r'\1' + 'something' + r'\2', line)
f.write(line)
It works by searching for a regular expression, but forms a group out of what you want on the left ((\[")) and the right (("\])). You then concatenate these either side of the text you want to insert (in this example 'something').
The bounding ( ) makes a group which can be accessed in the replace with r'\1', then second group is r'\2'.

Clean the csv data using python

I tried to remove the unwanted characters / # http form each line
codes below:
import csv
with open('C:\\project\\in.csv','r') as input_file:
with open('C:\\project\\out.csv','w') as output_file:
for L in input_file:
if L.endswith("/"):
newL=L.replace("/","")
output_file.write(newL)
elif L.find("#"):
newL,sep,tail=L.partition("#")
output_file.write(newL)
elif L.startswith('http:'):
newL=L.replace('http:','https:')
output_file.write(newL)
here is the mini example in.csv file for testing:
line1/
line2#sdgsgs
https://line3
http://line4
line5/
after make clean, I want it to be like :
line1
line2
https://line3
https://line4
line5
But the result not what I want, Can some one give me a hand.
Many Thanks, Henry
In this version a line can contain all of the replacement chars:
#!/usr/bin/env python
import csv
Output = []
with open('C:\\project\\in.csv', 'r') as input_file:
for line in input_file:
line = line.strip()
if line.endswith("/"):
line = line.replace("/", "")
if "#" in line:
line, sep, tail = line.partition("#")
if line.startswith('http:'):
line = line.replace('http:', 'https:')
Output.append(line)
with open('C:\\project\\out.csv', 'w') as output_file:
for output in Output:
output_file.write("{}\n".format(output))
Will output:
line1
line2
https://line3
https://line4
line5

Match the last word and delete the entire line

Input.txt File
12626232 : Bookmarks
1321121:
126262
Here 126262: can be anything text or digit, so basically will search for last word is : (colon) and delete the entire line
Output.txt File
12626232 : Bookmarks
My Code:
def function_example():
fn = 'input.txt'
f = open(fn)
output = []
for line in f:
if not ":" in line:
output.append(line)
f.close()
f = open(fn, 'w')
f.writelines(output)
f.close()
Problem: When I match with : it remove the entire line, but I just want to check if it is exist in the end of line and if it is end of the line then only remove the entire line.
Any suggestion will be appreciated. Thanks.
I saw as following but not sure how to use it in here
a = "abc here we go:"
print a[:-1]
I believe with this you should be able to achieve what you want.
with open(fname) as f:
lines = f.readlines()
for line in lines:
if not line.strip().endswith(':'):
print line
Here fname is the variable pointing to the file location.
You were almost there with your function. You were checking if : appears anywhere in the line, when you need to check if the line ends with it:
def function_example():
fn = 'input.txt'
f = open(fn)
output = []
for line in f:
if not line.strip().endswith(":"): # This is what you were missing
output.append(line)
f.close()
f = open(fn, 'w')
f.writelines(output)
f.close()
You could have also done if not line.strip()[:-1] == ':':, but endswith() is better suited for your use case.
Here is a compact way to do what you are doing above:
def function_example(infile, outfile, limiter=':'):
''' Filters all lines in :infile: that end in :limiter:
and writes the remaining lines to :outfile: '''
with open(infile) as in, open(outfile,'w') as out:
for line in in:
if not line.strip().endswith(limiter):
out.write(line)
The with statement creates a context and automatically closes files when the block ends.
To search if the last letter is : Do following
if line.strip().endswith(':'):
...Do Something...
You can use a regular expression
import re
#Something end with ':'
regex = re.compile('.(:+)')
new_lines = []
file_name = "path_to_file"
with open(file_name) as _file:
lines = _file.readlines()
new_lines = [line for line in lines if regex.search(line.strip())]
with open(file_name, "w") as _file:
_file.writelines(new_lines)

Categories

Resources