how to get the contents in brackets in a file Python - python

I am working with files right now and i want to get text from a bracket this is what i mean by getting text from a brackets...
{
this is text for a
this is text for a
this is text for a
this is text for a
}
[
this is text for b
this is text for b
this is text for b
this is text for b
]
The content in a is this is text for a and the content for b is is text for b
my code seems to not be printing the contents in a properly it show a&b my file.
My code:
with open('file.txt','r') as read_obj:
for line in read_obj.readlines():
var = line[line.find("{")+1:line.rfind("}")]
print(var)

iterate over the file
for each line check the first character
if the first character is either '[' or '{' start accumulating lines
if the first character is either ']' or '}' stop accumulating lines
a_s = []
b_s = []
capture = False
group = None
with open(path) as f:
for line in f:
if capture: group.append(line)
if line[0] in '{[':
capture = True
group = a_s if line[0] == '{' else b_s
elif line[0] in '}]':
capture = False
group = None
print(a_s)
print(b_s)
Relies on the file to be structured exactly as shown in the example.

This is what regular expressions are made for. Python has a built-in module named re to perform regular expression queries.
In your case, simply:
import re
fname = "foo.txt"
# Read data into a single string
with open(fname, "r") as f:
data = f.read()
# Remove newline characters from the string
data = re.sub(r"\n", "", data)
# Define pattern for type A
pattern_a = r"\{(.*?)\}"
# Print text of type A
print(re.findall(pattern_a, data))
# Define pattern for type B
pattern_b = r"\[(.*?)\]"
# Print text of type B
print(re.findall(pattern_b, data))
Output:
['this is text for athis is text for athis is text for athis is text for a']
['this is text for bthis is text for bthis is text for bthis is text for b']

Read the file and split the content to a list.
Define a brackets list and exclude them through a loop and write the rest to a file.
file_obj = open("content.txt", 'r')
content_list = file_obj.read().splitlines()
brackets = ['[', ']', '{', '}']
for i in content_list:
if i not in brackets:
writer = open("new_content.txt", 'a')
writer.write(i+ '\n')
writer.close()

f1=open('D:\\Tests 1\\t1.txt','r')
for line in f1.readlines():
flag=0
if line.find('{\n') or line.find('[\n'):
flag=1
elif line.find('}\n') or line.find(']\n'):
flag=0
if flag==1:
print(line.split('\n')[0])

Related

How to extract specific information from a file in python

I have a file with following format:
device={
id=1
tag=10
name=device1
}
device={
id=2
tag=20
name=device2
}
device={
id=3
tag=30
name=device3
}
So let's say I am only interested in device with id=2 and I want to extract its tag number(this is configurable and will be changed from some other code). So I need to extract tag number of the device id 2. How can I do this in python. I have done following:
ID='id=2'
with open("file.txt") as file:
for line in file:
if line.strip() == ID:
#Here I do not know what to write
# to extract 20
Thanks
With re.search function:
import re
with open('file.txt', 'r') as f:
id_num = 'id=2'
tag_num = re.search(r'' + id_num + '\s+tag=([0-9]+)', f.read())
print(tag_num.group(1))
The output:
20
f.read() - reads the file contents (as text)
r'' + id_num + '\s+tag=([0-9]+)' - constructing regex pattern, so it would become id=2\s+tag=([0-9]+) where \s is one or many whitespace characters(including newlines) and ([0-9]+) is the 1st captured group containing tag number
tag_num.group(1) - extracting the value of the 1st captured/parenthesized group 1 from the match object tag_num
You can read the next line using line.readline() try to use this code:
ID='id=2'
with open("file.txt") as file:
while True:
line = file.readline()
if line.strip() == ID:
nextline = file.readline()
result = nextline.strip().split('=')[1]
if line == '':
break
with open("") as file:
#print file.read()
for line in file:
#print line.split()
if line.strip()==ID:
d=file.next() #reads next line
print d.split('=')[1]
break

Copy complete sentences from text file and add to list

I am trying to extract complete sentences from a long text file and adding them as strings to a list in Python 2.7. I want to automate this and not just cut and paste in the list.
Here is what I have:
from sys import argv
script, filename = argv # script = alien.py; filename = roswell.txt
listed = []
text = open(filename, 'rw')
for i in text:
lines = readline(i)
listed.append(lines)
print listed
text.close()
Nothing loads to the list.
You can do it with a while loop:
listed = []
with open(filename,"r") as text:
Line = text.readline()
while Line!='':
listed.append(Line)
Line = text.readline()
print listed
In the previous example, I assumed that each sentence is written on a different line, if that's not the case, use this code instead:
listed = []
with open(filename,"r") as text:
Line = text.readline()
while Line!='':
Line1 = Line.split(".")
for Sentence in Line1:
listed.append(Sentence)
Line = text.readline()
print listed
And on a side note, try using with open(...) as text: instead of text = open(...)
Normally sentences are separated by '. ', not '\n'. Under this condition, use split with period+space(without return-enter):
listed = []
fd = open(filename,"r")
try:
data = fd.read()
sentences = data.split(". ")
for sentence in sentences:
listed.append(sentence)
print listed
finally:
fd.close()

how to read the content of .txt file using python?

output_filename = r"C:\Users\guage\Output.txt"
RRA:
GREQ-299684_6j
GREQ-299684_6k
CZM:
V-GREQ-299684_6k
V-GREQ-299524_9
F_65624_1
R-GREQ-299680_5
DUN:
FB_71125_1
FR:
VQ-299659_18
VR-GREQ-299659_19
VEQ-299659_28
VR-GREQ-299659_31
VR-GREQ-299659_32
VEQ-299576_1
GED:
VEQ-299622_2
VR-GREQ-299618_13
VR-GREQ-299559_1
VR-GREQ-299524_14
FB_65624_1
VR-GREQ-299645_1
MNT:
FB_71125_1
FB_71125_2
VR-534_4
The above is the content of the the .txt file. how can I read it separately the content of it. for example -
RRA:VR-GREQ-299684_6j VR-GREQ-299684_6k VR-GREQ-299606_3 VR-GREQ-299606_4 VR-GREQ-299606_5 VR-GREQ-299606_7
and save it in a variable or something similar to it. Later I want to read CZM separately and so on. I did as below.
with open(output_filename, 'r') as f:
excel = f.read()
But how to read it separately ? can someone tell me how to do it ?
Something like this:
def read_file_with_custom_record_separator(file_path, delimiter='\n'):
fh = open(file_path)
data = ""
for line in fh:
if line.strip().endswith(delimiter) and data != "":
print "VARIABLE:\n<", data, ">\n"
data = line
else:
data += line
print "LAST VARIABLE:\n<", data, ">\n"
And then:
read_file_with_custom_record_separator("input.txt", ":")
You can make use of the file text : as indicator to create a new file like this:
savefilename = ""
with open(filename, 'r') as f:
for line in f:
line = line.strip() # get rid of the unnecessary white chars
lastchar = line[-1:] # get the last char
if lastchar == ":": # if the last char is ":"
savefilename = line[0:-1] # get file name from line (except the ":")
sf = open(savefilename + ".txt", 'w') # create a new file
else:
sf.write(line + "\n") # write the data to the opened file
Then you should get collection of files:
RRA.txt
CZM.txt
DUN.txt
# etc
which contains all the appropriate data:
RRA.txt
VR-GREQ-299684_6j
VR-GREQ-299684_6k
VR-GREQ-299606_3
VR-GREQ-299606_4
VR-GREQ-299606_5
VR-GREQ-299606_7
CZM.txt
VR-GREQ-299684_6k
VR-GREQ-299606_6
VR-GREQ-299606_8
VR-GREQ-299640_1
VR-GREQ-299640_5
VR-GREQ-299524_9
FB_65624_1
VR-GREQ-299680_5
DUN.txt
FB_71125_1
# and so on
You can replace the sf = open and the sf.write which whatever way you feel best to separate the data. Here, I use files...
You can iterate over the file and use the lines and indices to your advantage; something like this:
with open(output_filename, 'r') as f:
for index, line in enumerate(f):
# here you have access to each line and its index
# so you can save any number of lines you wish
What about reading it into a list, then process its element as you prefer
>>> f = open('myfile.txt', 'r').readlines()
>>> len(f)
46
>>> f[0]
RRA:
>>> f[-1]
VR-GREQ-299534_4
>>> f[:3]
['RRA:\n', 'VR-GREQ-299684_6j \n', 'VR-GREQ-299684_6k \n']
>>>
>>> [l for l in f if l.startswith('FB_')]
['FB_65624_1 \n', 'FB_71125_1 \n', 'FB_69228_1 \n', 'FB_65624_1 \n', 'FB_71125_1 \n', 'FB_71125_2 \n']
>>>

Python Regex Find Pattern, Remove Rest of String, Write new String to File

I have the codez:
import re
pattern = ','
firstNames = "dictionary//first_names.txt"
new_file = []
def openTxtFile(txtFile):
file = open (txtFile,"r")
data = file.read()
print (data)
file.close
def parseTextFile(textFile):
openTxtFile(firstNames)
for line in lines:
match = re.search(pattern, line)
if match:
new_line = match.group() + '\n'
print (new_line)
new_file.append(new_line)
with open(firstNames, 'w') as f:
f.seek(0)
f.writelines(new_file)
I am trying to take the original file, match it on a "," and return line by line to a New file the string before the "," I'm having trouble putting all this together, thanks!
Use the csv module, since your original file is comma separated:
import csv
with open('input_file.txt') as f:
reader = csv.reader(f)
names = [line[0] for line in reader]
with open('new_file.txt','w') as f:
for name in names:
f.write('{0}\n'.format(name))

Insert text into a text file following specific text using Python

I have to edit some text files to include new information, but I will need to insert that information at specific locations in the file based on the surrounding text.
This doesn't work the way I need it to:
with open(full_filename, "r+") as f:
lines = f.readlines()
for line in lines:
if 'identifying text' in line:
offset = f.tell()
f.seek(offset)
f.write('Inserted text')
...in that it adds the text to the end of the file. How would I write it to the next line following the identifying text?
(AFAICT, this is not a duplicate of similar questions, since none of those were able to provide this answer)
If you don't need to work in place, then maybe something like:
with open("old.txt") as f_old, open("new.txt", "w") as f_new:
for line in f_old:
f_new.write(line)
if 'identifier' in line:
f_new.write("extra stuff\n")
(or, to be Python-2.5 compatible):
f_old = open("old.txt")
f_new = open("new.txt", "w")
for line in f_old:
f_new.write(line)
if 'identifier' in line:
f_new.write("extra stuff\n")
f_old.close()
f_new.close()
which turns
>>> !cat old.txt
a
b
c
d identifier
e
into
>>> !cat new.txt
a
b
c
d identifier
extra stuff
e
(Usual warning about using 'string1' in 'string2': 'name' in 'enamel' is True, 'hello' in 'Othello' is True, etc., but obviously you can make the condition arbitrarily complicated.)
You could use a regex and then replace the text.
import re
c = "This is a file's contents, apparently you want to insert text"
re.sub('text', 'text here', c)
print c
returns "This is a file's contents, apparently you want to insert text here"
Not sure if it'll work for your usecase but it's nice and simple if it fits.
This will look for any string, in the file (not specific, to be at the start of the line only, i.e. can exist spread over multiple lines as well).
Typically you can follow the algo as:
lookup for the string in the file, and capture "location"
then split the file about this "location", and attempt to create new files as
write start-to-loc content to new file
next, write your "NEW TEXT" to the new file
next, loc-to-end content to new file
Let us see code:
#!/usr/bin/python
import os
SEARCH_WORD = 'search_text_here'
file_name = 'sample.txt'
add_text = 'my_new_text_here'
final_loc=-1
with open(file_name, 'rb') as file:
fsize = os.path.getsize(file_name)
bsize = fsize
word_len = len(SEARCH_WORD)
while True:
found = 0
pr = file.read(bsize)
pf = pr.find(SEARCH_WORD)
if pf > -1:
found = 1
pos_dec = file.tell() - (bsize - pf)
file.seek(pos_dec + word_len)
bsize = fsize - file.tell()
if file.tell() < fsize:
seek = file.tell() - word_len + 1
file.seek(seek)
if 1==found:
final_loc = seek
print "loc: "+str(final_loc)
else:
break
# create file with doxygen comments
f_old = open(file_name,'r+')
f_new = open("new.txt", "w")
f_old.seek(0)
fStr = str(f_old.read())
f_new.write(fStr[:final_loc-1]);
f_new.write(add_text);
f_new.write(fStr[final_loc-1:])
f_new.close()
f_old.close()

Categories

Resources