Stop replacement by pattern? - python

Say my file look like this:
some lines
tom
some lines
beginword a b
some lines
endword
jim
some lines
beginword x y
some lines
endword
...
Want to be:
some lines
tom
some lines
beginword ZZ b
some lines
endword
jim
some lines
beginword x y
some lines
endword
So this is my python code:
input = open("file", "r")
output = open("file_updated", "w")
dummy = ""
item = []
for line in input:
dummy += line
if line.find("tom" + "\n") != -1:
for line in input:
if line.find("beginword") != -1:
item = line.split()
dummy += item[0] + " w " + item[-1] + "\n"
else:
dummy += line
output.write(dummy)
input.close()
output.close()
It replace all lines contain "beginword", include the lines belong to "jim", how can I stop the replacement by "endword" belong to "tom"?

Use break statement
input = open("file", "r")
output = open("file_updated", "w")
dummy = ""
item = []
for line in input:
dummy += line
if line.find("tom" + "\n") != -1:
for line in input:
# check for endword and exit for loop
if line.find("endword" + "\n") == 0:
dummy += line
break
if line.find("beginword") != -1:
item = line.split()
dummy += item[0] + " w " + item[-1] + "\n"
else:
dummy += line
output.write(dummy)
input.close()
output.close()
Also, have a look at these:
reading and writing files
regular expressions

Related

Read the file and split by the sum of words, upload to a folder

I figured out how to count words, how to split them - how to combine + put a ***** mark at the end of a sentence for > 200 and < 250 words (after the dot)???
# text counting method
file = open("test.txt", "r")
number_of_lines = 0
number_of_words = 0
number_of_characters = 0
for line in file:
line = line.strip("\n")
words = line.split()
number_of_lines += 1
number_of_words += len(words)
number_of_characters += len(line)
file.close()
print("lines:", number_of_lines, "words:", number_of_words, "characters:", number_of_characters, ( "sum_chsrters:", number_of_characters // 1000, "sum_wolds:", number_of_words // 120 ))
# text splitting method
with open('test.txt') as fo:
op = ''
start = 0
cntr = 1
for x in fo.read().split("\n"):
if (x=='*****'):
if (start==1):
with open(str(cntr) + '.txt', 'w') as opf:
opf.write(op)
opf.close()
op=''
cntr+=1
else:
start=1
else:
if (op==''):
op = x
else:
op = op + '\n' + x
fo.close()
P.S - I'm a beginner. It needs to be split file (sum +10,000 characters) into 10 parts (1000 characters or 120 words each) and put in a folder (01.txt, 02.txt, ...,10.txt)

How to not print a line (rather than printing 0) if it doesn't satisfy if statement - Python

I want to print out the line count and hashtag count only if the line starts with "RT" AND has at least 1 hashtag in it.
At the moment my code will print the line if it starts with "RT" but will give a 0 for the hashtag count is 0. However I don't want it to print the line at all.
i.e. there should be no lines of code that print out as "49 : 0"
PLEASE HELP - feel like I'm nearly there but need to add some more code into the second for loop.
hashtag_count = 0
line_count = 0
for line in open("tweets.txt"):
line_split = line.split()
hashtag_count = 0
if line.startswith("RT "):
for word in line_split:
if "#" in word:
hashtag_count += 1
print(str(line_count) + " : " + str(hashtag_count))
line_count += 1
You have reset value of hashtag_count in loop, remove that statement
hashtag_count = 0
line_count = 0
for line in open("tweets.txt"):
line_strip = line.strip()
line_split = line_strip.split()
hashtag_count = 0 #Remove this line <----------------------------
if line_strip.startswith("RT "):
for word in line_split
if word.startswith("#"):
hashtag_count += 1
print(str(line_count) + " : " + str(hashtag_count))
line_count += 1
After removal it will return number of hashtag_count correctly.
if hashtag_count:
print(str(line_count) + " : " + str(hashtag_count))
P.S.
RT
Russia Today? :-O
Maybe I missed something but why you're iterating over words?
If you just need to print only line number with # count for this lines starting with 'RT', try the following:
with open("tweets.txt") as f:
for line_number, line in enumerate(f, 1):
if line.startswith('RT'):
hash_count = line.count('#')
print("{} : {}".format(line_number, hash_count))

compare an exact word with the txt file

i am trying to get the exact word match from my file along with their line no.
like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc
how can i limitize my code to only print what i commanded..
here is my code!
lineNo = 0
linesFound = []
inFile= open('rxmop.txt', 'r')
sKeyword = input("enter word ")
done = False
while not done :
pos = inFile.tell()
sLine = inFile.readline()
if sLine == "" :
done = True
break
if (sLine.find( sKeyword ) != -1):
print ("Found at line: "+str(lineNo))
tTuple = lineNo, pos
linesFound.append( tTuple )
lineNo = lineNo + 1
done = False
while not done :
command = int( input("Enter the line you want to view: ") )
if command == -1 :
done = True
break
for tT in linesFound :
if command == tT[0] :
inFile.seek( tT[1] )
lLine = inFile.readline()
print ("The line at position " + str(tT[1]) + "is: " + lLine)
"like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc"
You split each record and compare whole "words" only.
to_find = "RXOTG-10"
list_of_possibles = ["RXOTG-10 QTA5777 HYB SY G12",
"RXOTG-100 QTA9278 HYB SY G12"]
for rec in list_of_possibles:
words_list=rec.strip().split()
if to_find in words_list:
print "found", rec
else:
print " NOT found", rec

Python Count paragraph

Hello all so i've been tasked to count lines and paragraphs. Counting every line is obviously easy but im stuck on counting the paragraphs. If a paragraph has no character it will give back the number zero and for every paragraph is an increment higher. For example an input file is: Input and an Output should come out Output
so my code is:
def insert_line_para_nums(infile, outfile):
f = open(infile, 'r')
out = open(outfile, 'w')
linecount = 0
for i in f:
paragraphcount = 0
if '\n' in i:
linecount += 1
if len(i) < 2: paragraphcount *= 0
elif len(i) > 2: paragraphcount = paragraphcount + 1
out.write('%-4d %4d %s' % (paragraphcount, linecount, i))
f.close()
out.close()
def insert_line_para_nums(infile, outfile):
f = open(infile, 'r')
out = open(outfile, 'w')
linecount = 0
paragraphcount = 0
empty = True
for i in f:
if '\n' in i:
linecount += 1
if len(i) < 2:
empty = True
elif len(i) > 2 and empty is True:
paragraphcount = paragraphcount + 1
empty = False
if empty is True:
paragraphnumber = 0
else:
paragraphnumber = paragraphcount
out.write('%-4d %4d %s' % (paragraphnumber, linecount, i))
f.close()
out.close()
This is one way to do it, and not the prettiest.
import re
f = open('a.txt', 'r')
paragraph = 0
lines = f.readlines()
for idx, line in enumerate(lines):
if not line == '\n':
m = re.search(r'\w', line)
str = m.group(0)
try:
# if the line is a newline, and the previous line has a str in it, then
# count it as a paragraph.
if line == '\n' and str in lines[idx-1]:
paragraph +=1
except:
pass
if lines[-1] != '\n': # if the last line is not a new line, count a paragraph.
paragraph +=1
print paragraph

Python - how to print amount of numbers, periods, and commas in file

def showCounts(fileName):
lineCount = 0
wordCount = 0
numCount = 0
comCount = 0
dotCount = 0
with open(fileName, 'r') as f:
for line in f:
words = line.split()
lineCount += 1
wordCount += len(words)
for word in words:
# ###text = word.translate(string.punctuation)
exclude = set(string.punctuation)
text = ""
text = ''.join(ch for ch in text if ch not in exclude)
try:
if int(text) >= 0 or int(text) < 0:
numCount += 1
# elif text == ",":
# comCount += 1
# elif text == ".":
# dotCount += 1
except ValueError:
pass
print("Line count: " + str(lineCount))
print("Word count: " + str(wordCount))
print("Number count: " + str(numCount))
print("Comma count: " + str(comCount))
print("Dot count: " + str(dotCount) + "\n")
Basically it will show the number of lines and the number of words, but I can't get it to show the number of numbers, commas, and dots. I have it read a file that the user enters and then show the amount of lines and words, but for some reason it says 0 for numbers commas and dots. I commented out the part where it gave me trouble. If i remove the comma then i just get an error. thanks guys
This code loops over every character in each line, and adds 1 to its variable:
numCount = 0
dotCount = 0
commaCount = 0
lineCount = 0
wordCount = 0
fileName = 'test.txt'
with open(fileName, 'r') as f:
for line in f:
wordCount+=len(line.split())
lineCount+=1
for char in line:
if char.isdigit() == True:
numCount+=1
elif char == '.':
dotCount+=1
elif char == ',':
commaCount+=1
print("Number count: " + str(numCount))
print("Comma count: " + str(commaCount))
print("Dot count: " + str(dotCount))
print("Line count: " + str(lineCount))
print("Word count: " + str(wordCount))
Testing it out:
test.txt:
Hello, my name is B.o.b. I like biking, swimming, and running.
I am 125 years old, and I was 124 years old 1 year ago.
Regards,
B.o.b
Running:
bash-3.2$ python count.py
Number count: 7
Comma count: 5
Dot count: 7
Line count: 6
Word count: 27
bash-3.2$
Everything makes sense here, except the lineCount the reason why this is 6 is because of newlines. In my editor (nano), it adds a newline to the end of any file by default. So just imagine the text file to be this:
>>> x = open('test.txt').read()
>>> x
'Hello, my name is B.o.b. I like biking, swimming, and running.\n\nI am 125 years old, and I was 124 years old 1 year ago.\n\nRegards,\nB.o.b \n'
>>> x.count('\n')
6
>>>
Hope this helps!
For the punctuations, why not just do:
def showCounts(fileName):
...
...
with open(fileName, 'r') as fl:
f = fl.read()
comCount = f.count(',')
dotCount = f.count('.')
You could use the Counter class to take care of it you:
from collections import Counter
with open(fileName, 'r') as f:
data = f.read().strip()
lines = len(data.split('\n'))
words = len(data.split())
counts = Counter(data)
numbers = sum(v for (k,v) in counts.items() if k.isdigit())
print("Line count: {}".format(lines))
print("Word count: {}".format(words))
print("Number count: {}".format(numbers))
print("Comma count: {}".format(counts[',']))
print("Dot count: {}".format(counts['.']))

Categories

Resources