i need to search a bunch of text files which may contain content in this forms:
//for nv version
tf = new TextField();
tf.width = 600;
tf.height = 300;
tf.textColor = 0x00ffff;
tf.mouseEnabled = false;
this.addChild(tf);
Sys.__consoleWindowFuncBasic = log;
//nv end
and delete the part between the 2 lines and save them.
i split the text into lines, and check the text line by line,thus make the work very heavy,is there any easy way for doing this?
Check this out
beginMarker = "//for nv version"
endMarker = "//nv end"
include = True
with open('path/to/input') as infile, open('path/to/output', 'w') as outfile:
for line in infile:
if include:
if line.strip() != beginMarker:
outfile.write(line)
else:
include = False
else:
if line.strip() == endMarker:
include = True
Maybe you can try to use the regular expression to replace the match lines to an empty line.
#yuwang provided a sed version, here is a python version(python re docs):
>>> import re
>>> s = """some words
... other words
... //for nv version
... tf = new TextField();
... tf.width = 600;
... tf.height = 300;
... tf.textColor = 0x00ffff;
... tf.mouseEnabled = false;
... this.addChild(tf);
... Sys.__consoleWindowFuncBasic = log;
... //nv end
... yet some other words
... """
>>> p = r"//for nv version(.*?)//nv end\n" # reluctant instead of greedy
>>> print re.sub(p, "", s, flags=re.S) # use re.S to make `.` matchs `\n`
some words
other words
yet some other words
Related
I am working with files right now and i want to get text from a bracket this is what i mean by getting text from a brackets...
{
this is text for a
this is text for a
this is text for a
this is text for a
}
[
this is text for b
this is text for b
this is text for b
this is text for b
]
The content in a is this is text for a and the content for b is is text for b
my code seems to not be printing the contents in a properly it show a&b my file.
My code:
with open('file.txt','r') as read_obj:
for line in read_obj.readlines():
var = line[line.find("{")+1:line.rfind("}")]
print(var)
iterate over the file
for each line check the first character
if the first character is either '[' or '{' start accumulating lines
if the first character is either ']' or '}' stop accumulating lines
a_s = []
b_s = []
capture = False
group = None
with open(path) as f:
for line in f:
if capture: group.append(line)
if line[0] in '{[':
capture = True
group = a_s if line[0] == '{' else b_s
elif line[0] in '}]':
capture = False
group = None
print(a_s)
print(b_s)
Relies on the file to be structured exactly as shown in the example.
This is what regular expressions are made for. Python has a built-in module named re to perform regular expression queries.
In your case, simply:
import re
fname = "foo.txt"
# Read data into a single string
with open(fname, "r") as f:
data = f.read()
# Remove newline characters from the string
data = re.sub(r"\n", "", data)
# Define pattern for type A
pattern_a = r"\{(.*?)\}"
# Print text of type A
print(re.findall(pattern_a, data))
# Define pattern for type B
pattern_b = r"\[(.*?)\]"
# Print text of type B
print(re.findall(pattern_b, data))
Output:
['this is text for athis is text for athis is text for athis is text for a']
['this is text for bthis is text for bthis is text for bthis is text for b']
Read the file and split the content to a list.
Define a brackets list and exclude them through a loop and write the rest to a file.
file_obj = open("content.txt", 'r')
content_list = file_obj.read().splitlines()
brackets = ['[', ']', '{', '}']
for i in content_list:
if i not in brackets:
writer = open("new_content.txt", 'a')
writer.write(i+ '\n')
writer.close()
f1=open('D:\\Tests 1\\t1.txt','r')
for line in f1.readlines():
flag=0
if line.find('{\n') or line.find('[\n'):
flag=1
elif line.find('}\n') or line.find(']\n'):
flag=0
if flag==1:
print(line.split('\n')[0])
My file has something like this
#email = "abc";
%area = (
"abc" => 10,
"xyz" => 10,
);
Is there any regex match I can use to match begin with %area = ( and read the nextline until ); is found. This is so that I can remove those lines from the file.
Regex that I tried ^%area = \(.*|\n\) somehow does not continue to match is next line.
So my final file will only have
#email = "abc";
Assuming a file file contains:
#email = "abc";
%area = (
"abc" => 10,
"xyz" => 10,
);
Would you please try the following:
import re
with open("file") as f:
s = f.read()
print(re.sub(r'^%area =.*?\);', '', s, flags=(re.DOTALL|re.MULTILINE)))
Output:
#email = "abc";
If you want to clean-up the remaining empty lines, please try instead:
print(re.sub(r'\n*^%area =.*?\);\n*', '\n', s, flags=(re.DOTALL|re.MULTILINE))
Then the result looks like:
#email = "abc";
The re.DOTALL flag makes . match any character including a newline.
The re.MULTILINE flag allows ^ and $ to match, respectively,
just after and just before newlines within the string.
[EDIT]
If you want to overwrite the original file, please try:
import re
with open("file") as f:
s = f.read()
with open("file", "w") as f:
f.write(re.sub(r'\n*^%area =.*?\);\n*', '\n', s, flags=(re.DOTALL|re.MULTILINE)))
To capture and remove your area group, you can use; link
re.sub('%area = \((.|\n)*\);', '', string)
#'#email = "abc";\n\n'
However, this will include two new lines after your #email line. You could add \n\n to the regex to capture that as well;
re.sub('\n\n%area = \((.|\n)*\);', '', string)
#'#email = "abc";'
However, if the email always follows the same logic, you would be best searching for that line only. link
re.search('(#email = ).*(?=\n)', string).group()
#'#email = "abc";'
The function:
def newstr(string, file_name, after):
lines = []
with open(file_name, 'r') as f:
for line in f:
lines.append(line)
with open(file_name, 'w+') as f:
flag = True
for line in lines:
f.write(line)
if line.startswith(after) and flag:
f.write(string+"\n")
flag = False
What im running is
newstr('hello', "test.txt", "new code(")
Test.txt contents:
package example.main;
import example
public class Main {
public static void Main() {
new code("<RANDOM>");
new code("<RANDOM>");
}
}
What I expect:
package example.main;
import example
public class Main {
public static void Main() {
new code("<RANDOM>");
hello
new code("<RANDOM>");
}
}
But when I run the script nothing changes.
You have lines with whitespace indentation, such as
new code("<RANDOM>");
That line has spaces at the start; if you looked at the string representation you'd see:
>>> repr(line)
' new code("<RANDOM>");\n'
That line does not start with 'new code(', it starts with ' new code(':
>>> line.startswith('new code(')
False
str.startswith() does not ignore spaces.
Either strip off the whitespace, or include whitespace in the after variable:
>>> line.strip().startswith('new code(') # str.strip removes whitespace from start and end
True
>>> line.startswith(' new code(')
True
You could also use regular expressions to match lines, so using the pattern r'^\s*new code\('.
Your mistake is that line does not start with the text you are looking for. It starts with " new code(" not "new code(. So you either need to look for " new code( or you need to strip the whitespace from the line, i.e. line.lstrip().startswith(....
BTW. instead of the loop you use to read in the file you could just say lines = f.readlines().
As there is lots of whitespace being created by the IDE
you need to strip it before you can use startswith so change it to line.lstrip().startswith so that the leading whitespaces get removed
next for the writing you can use an Regex to add the whitespace to your new line like this
f.write(re.search(r"\s*", line).group()+string+"\n")
Fixed code:
import re
def newstr(string, file_name, after):
with open(file_name, 'r') as f:
lines = f.readlines()
with open(file_name, 'w+') as f:
flag = True
for line in lines:
f.write(line)
if line.lstrip().startswith(after) and flag:
f.write(re.search(r"\s*", line).group()+string+"\n")
flag = False
newstr('hello', "test.txt", "new code(")
I just want to check if there is any better way of doing this rather than using what i came up with.
The thing is that i need to parse a .py file, more precisely i have to look for a specific list named id_list that contains several int numbers. Numbers can be written in several formats.
For example:
id_list = [123456, 789123, 456789]
id_list = [ 123456,
789123,
456789 ]
id_list = [ 123456
,789123
,456789 ]
What i came up with works just fine, but for the sake of perfectionism i want to know if there is "smoother" way of doing so.
with open(filepath, 'rb') as input_file:
parsed_string = ''
start_flag = False
start_parsing = False
for line in input_file:
if 'id_list' in line:
id_detected = True
if id_detected:
for char in line:
if char == '[':
start_parsing = True
if start_parsing and char != '\n':
parsed_string += char
if char == ']':
id_detected = False
start_parsing = False
break
After that has been done im just filtering parsed_string:
new_string = "".join(filter(lambda char: char.isdigit() or char == ',', parsed_string))
Which gets me string containing numbers and commas: 123456,789123,456789
So to wrap this up, is there anything that i could improve?
You can use a regular expression to solve:
import re
with open(filepath, 'rb') as input_file:
text = input_file.read()
match = re.search(r'id_list\s*=\s*\[(.*?)\]', text, flags=re.DOTALL)
if match is None:
print "Not found"
else:
id_list_str = match.group(1)
id_list = map(int, id_list_str.split(','))
print id_list
just use import and from
If you don't want to import the whole python files just import the elements you need
example
from filename.py import id_list
I have to edit some text files to include new information, but I will need to insert that information at specific locations in the file based on the surrounding text.
This doesn't work the way I need it to:
with open(full_filename, "r+") as f:
lines = f.readlines()
for line in lines:
if 'identifying text' in line:
offset = f.tell()
f.seek(offset)
f.write('Inserted text')
...in that it adds the text to the end of the file. How would I write it to the next line following the identifying text?
(AFAICT, this is not a duplicate of similar questions, since none of those were able to provide this answer)
If you don't need to work in place, then maybe something like:
with open("old.txt") as f_old, open("new.txt", "w") as f_new:
for line in f_old:
f_new.write(line)
if 'identifier' in line:
f_new.write("extra stuff\n")
(or, to be Python-2.5 compatible):
f_old = open("old.txt")
f_new = open("new.txt", "w")
for line in f_old:
f_new.write(line)
if 'identifier' in line:
f_new.write("extra stuff\n")
f_old.close()
f_new.close()
which turns
>>> !cat old.txt
a
b
c
d identifier
e
into
>>> !cat new.txt
a
b
c
d identifier
extra stuff
e
(Usual warning about using 'string1' in 'string2': 'name' in 'enamel' is True, 'hello' in 'Othello' is True, etc., but obviously you can make the condition arbitrarily complicated.)
You could use a regex and then replace the text.
import re
c = "This is a file's contents, apparently you want to insert text"
re.sub('text', 'text here', c)
print c
returns "This is a file's contents, apparently you want to insert text here"
Not sure if it'll work for your usecase but it's nice and simple if it fits.
This will look for any string, in the file (not specific, to be at the start of the line only, i.e. can exist spread over multiple lines as well).
Typically you can follow the algo as:
lookup for the string in the file, and capture "location"
then split the file about this "location", and attempt to create new files as
write start-to-loc content to new file
next, write your "NEW TEXT" to the new file
next, loc-to-end content to new file
Let us see code:
#!/usr/bin/python
import os
SEARCH_WORD = 'search_text_here'
file_name = 'sample.txt'
add_text = 'my_new_text_here'
final_loc=-1
with open(file_name, 'rb') as file:
fsize = os.path.getsize(file_name)
bsize = fsize
word_len = len(SEARCH_WORD)
while True:
found = 0
pr = file.read(bsize)
pf = pr.find(SEARCH_WORD)
if pf > -1:
found = 1
pos_dec = file.tell() - (bsize - pf)
file.seek(pos_dec + word_len)
bsize = fsize - file.tell()
if file.tell() < fsize:
seek = file.tell() - word_len + 1
file.seek(seek)
if 1==found:
final_loc = seek
print "loc: "+str(final_loc)
else:
break
# create file with doxygen comments
f_old = open(file_name,'r+')
f_new = open("new.txt", "w")
f_old.seek(0)
fStr = str(f_old.read())
f_new.write(fStr[:final_loc-1]);
f_new.write(add_text);
f_new.write(fStr[final_loc-1:])
f_new.close()
f_old.close()