Identifying spaces between commas - python

I need to identify if theres a space between a number and comma then that number is invalid. So if the number has more or less than 2 decimal places and/or white spaces in between the commas then it is INVALID but if it has no whitespaces in between the commas and has 2 decimal places then it it a VALID number. That's why the first number in Line 1 is VALID
There's two methods, I prefer to work on method 2 but I thought if I put two methods it might help any of you to add on
#-----------Method 1------------------------------------------
res = 0
outfile = "output2.txt"
baconFile = open(outfile,"wt")
index = 0
invalid_string = "INVALID"
valid_string = "VALID"
with open('file.txt') as file:
for line in file:
carrera = ''
index = index + 1
print("Line {}: ".format(index), end='')
baconFile.write("Line {}: ".format(index))
number_list = line.strip().split(',')
for number in number_list:
if len(number.split('.')[-1]) == 2:
#res += 1
## print("VALID")
carrera = valid_string
if len(number.split('.')[-1]) != 2:
#res += 1
carrera = invalid_string
if len(number.split(',')[-1]) == " ": #checking for whitespace
carrera = invalid_string
print (carrera, end=' ')
baconFile.write(carrera + " ")
print('\n', end='')
baconFile.write('\n')
baconFile.close()
#-----------Method 2------------------------------------------
res = 0
outfile = "output2.txt"
baconFile = open(outfile,"wt")
index = 0
invalid_string = "INVALID"
valid_string = "VALID"
with open('file.txt') as file:
for line in file:
index = index + 1
o = "Line {}: ".format(index)
number_list = line.strip().split(',')
for x in number_list:
if len(x.split('.')[-1]) == 2:
o += valid_string + " "
if len(x.split('.')[-1]) != 2:
o += invalid_string + " "
if len(x.split(',')[-1]) == " ":
o += valid_string + " "
Here's my list of numbers in Text.file:
1,1.02, 123.0005
1.02, 1.02 , 1.02
Expected:
Line 1: INVALID VALID INVALID
Line 2: VALID INVALID INVALID (since there's spaces between the last number that's why it is INVALID)
ACTUAL:
Line 1: INVALID VALID INVALID
Line 2: VALID INVALID VALID

You can split the strings with , and decide if the string is valid or invalid based on whether the string stars with a whitespace
#Open the files
with open('file.txt') as fp:
#Extract out non-empty lines from file
lines = [line for line in fp.readlines() if line.strip()]
res = []
#Iterate over the lines
for idx, line in enumerate(lines):
#Number is valid if it doesn't start with a whitespace, has a decimal part and the decimal part is two digits long
res = ['VALID' if not item.startswith(' ') and '.' in item and len(item.split('.')[1]) == 2 else 'INVALID' for item in line.split(',')]
#Print the result
print("Line {}: {}".format(idx+1, ' '.join(res)))
The output will be
Line 1: INVALID VALID INVALID
Line 2: VALID INVALID INVALID

try this:
line="1,1.02, 123.0005"
reslt=line.split(",")
Res=" "
for i in reslt:
if " "in i:
line1="INVALID "
else:
line1="VALID "
Res +="".join(line1)
print("line1:"+Res)
READ from file :
nbline
with open('file.txt') as f:
for line in f.readlines():
print(line)
reslt=line.split(",")
Res=" "
for i in reslt:
if " "in i:
line1="INVALID "
else:
line1="VALID "
Res +="".join(line1)
nbline = nbline+1
print("line {}:{}".format(nbline,Res))
output:
line1: VALID VALID INVALID

A list comprehension based on splitting on commas, and a little string trickery would be much simpler:
line="1,1.02, 123.0005"
result = " ".join("IN"*(" " in s)+"VALID" for s in line.split(","))
print(result) # VALID VALID INVALID

With decimal.Decimal object, you can retrieve the exponent, which somehow tells you the number of decimal places (see docs):
import decimal
o += " ".join(['INVALID' if x[0] == ' ' or decimal.Decimal(x).as_tuple().exponent != -2 else 'VALID' for x in line.split(',')])
Output
#with line = "1,1.02, 123.0005"
'Line 1: INVALID VALID INVALID'
#with line = "1.02, 1.02 , 1.02"
'Line 2: VALID INVALID INVALID'

Related

Truncate ''space'' issue x/002

import re
with open("./teste/counter.txt", "r+") as count:
countread = count.read()
inputvar = input("Counting - write anything: ")
if countread == "":
print("Countread is ''None''. Adding to text file number ''1''.")
count.write('1')
else:
count.truncate(0)
countread = countread.replace(' ', '')
countplus = int(countread) + 1
print(countread)
count.write(str(countplus))
count.close()
I am trying to erase the file with count.truncate(0) but after it adds 1, and goes to 2 in my text file, at 3 I get the error:
ValueError: invalid literal for int() with base 10: '\x002'
For the line ''countplus = ...''
EDIT: By the way the ''countread replace'' was a try to fix this issue.
Fixed it with this
while 3>2:
with open("./teste/counter.txt", "r+") as count:
countread = count.read()
if countread == "":
countread = "0"
inputvar = input("Counting " + countread + " write anything: ")
if countread == "0":
count.write('1')
else:
countplus = int(countread) + 1
count.truncate(0)
count.seek(0)
countread = count.read()
count.write(str(countplus))
count.close()

How to read and write code output into a text file line by line

For example, my list looks like this from my text file:
1,2.12,3.123
Then it prints this to my output text file (decimal places from each number):
Line 1: INVALID VALID INVALID
However, if my list from my text file is like this:
1,2.12,3.123
1,1.00
Then it prints this to my output text file:
Line 1: Line 2: INVALID
VALID
INVALID
Line 3: Line 4: INVALID
VALID
INVALID
How do I get it to print this to my output text file:
Line 1: INVALID VALID INVALID
LINE 2: INVALID VALID
I tried to add an index = index +1 so it can add 1 to every line.
from functools import reduce
res = 0
outfile = "output2.txt"
baconFile = open(outfile,"wt")
index = 0
invalid_string = "INVALID"
valid_string = "VALID"
for line in open("file.txt"): # read file line-by-line
carrera = ''
index = index +1 # Count lines that contain what the command wants
print("Line {}: ".format(index))
baconFile.write("Line {}: ".format(index))
with open('file.txt') as file:
number_list = file.readline().strip().split(',')
for line in number_list:
if len(line.split('.')[-1]) == 2:
# res += 1
## print("VALID")
carrera = valid_string
if len(line.split('.')[-1]) != 2:
#res += 1
carrera = invalid_string
print (carrera)
baconFile.write(carrera + " ")
# print(res)
baconFile.close()
Expected:
Line 1: INVALID VALID INVALID
LINE 2: INVALID VALID
Actual:
Line 1: Line 2: INVALID
VALID
INVALID
Line 3: Line 4: INVALID
VALID
INVALID
Iterate over each line in a file with for line in file, and print without a newline with print('...', end=''). Don't open() it twice.
res = 0
outfile = "output2.txt"
baconFile = open(outfile,"wt")
index = 0
invalid_string = "INVALID"
valid_string = "VALID"
index = 1
with open('file.txt') as file:
for line in file:
print("Line {}: ".format(index), end='')
baconFile.write("Line {}: ".format(index))
number_list = line.strip().split(',')
for number in number_list:
if len(number.split('.')[-1]) == 2:
#res += 1
## print("VALID")
carrera = valid_string
if len(number.split('.')[-1]) != 2:
#res += 1
carrera = invalid_string
print (carrera, end='')
baconFile.write(carrera + " ")
print('\n', end='')
baconFile.write('\n')
index += 1
(To other answerers: I avoid enumerate because the code already has index in it.)
I made some modifications for formatting:
res = 0
outfile = "output2.txt"
baconFile = open(outfile,"wt")
index = 0
invalid_string = "INVALID"
valid_string = "VALID"
with open('file.txt') as file:
for line in file:
index = index +1
o = "Line {}: ".format(index)
number_list = line.strip().split(',')
for x in number_list:
if len(x.split('.')[-1]) == 2:
o += valid_string + " "
if len(x.split('.')[-1]) != 2:
o += invalid_string + " "
print(o)
baconFile.write(o + "\n")
baconFile.close()
EDIT: accidentally left the 'carrera' variable there, thought I might edit it out, since it no longer serves a purpose

Stop replacement by pattern?

Say my file look like this:
some lines
tom
some lines
beginword a b
some lines
endword
jim
some lines
beginword x y
some lines
endword
...
Want to be:
some lines
tom
some lines
beginword ZZ b
some lines
endword
jim
some lines
beginword x y
some lines
endword
So this is my python code:
input = open("file", "r")
output = open("file_updated", "w")
dummy = ""
item = []
for line in input:
dummy += line
if line.find("tom" + "\n") != -1:
for line in input:
if line.find("beginword") != -1:
item = line.split()
dummy += item[0] + " w " + item[-1] + "\n"
else:
dummy += line
output.write(dummy)
input.close()
output.close()
It replace all lines contain "beginword", include the lines belong to "jim", how can I stop the replacement by "endword" belong to "tom"?
Use break statement
input = open("file", "r")
output = open("file_updated", "w")
dummy = ""
item = []
for line in input:
dummy += line
if line.find("tom" + "\n") != -1:
for line in input:
# check for endword and exit for loop
if line.find("endword" + "\n") == 0:
dummy += line
break
if line.find("beginword") != -1:
item = line.split()
dummy += item[0] + " w " + item[-1] + "\n"
else:
dummy += line
output.write(dummy)
input.close()
output.close()
Also, have a look at these:
reading and writing files
regular expressions

compare an exact word with the txt file

i am trying to get the exact word match from my file along with their line no.
like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc
how can i limitize my code to only print what i commanded..
here is my code!
lineNo = 0
linesFound = []
inFile= open('rxmop.txt', 'r')
sKeyword = input("enter word ")
done = False
while not done :
pos = inFile.tell()
sLine = inFile.readline()
if sLine == "" :
done = True
break
if (sLine.find( sKeyword ) != -1):
print ("Found at line: "+str(lineNo))
tTuple = lineNo, pos
linesFound.append( tTuple )
lineNo = lineNo + 1
done = False
while not done :
command = int( input("Enter the line you want to view: ") )
if command == -1 :
done = True
break
for tT in linesFound :
if command == tT[0] :
inFile.seek( tT[1] )
lLine = inFile.readline()
print ("The line at position " + str(tT[1]) + "is: " + lLine)
"like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc"
You split each record and compare whole "words" only.
to_find = "RXOTG-10"
list_of_possibles = ["RXOTG-10 QTA5777 HYB SY G12",
"RXOTG-100 QTA9278 HYB SY G12"]
for rec in list_of_possibles:
words_list=rec.strip().split()
if to_find in words_list:
print "found", rec
else:
print " NOT found", rec

Python - how to print amount of numbers, periods, and commas in file

def showCounts(fileName):
lineCount = 0
wordCount = 0
numCount = 0
comCount = 0
dotCount = 0
with open(fileName, 'r') as f:
for line in f:
words = line.split()
lineCount += 1
wordCount += len(words)
for word in words:
# ###text = word.translate(string.punctuation)
exclude = set(string.punctuation)
text = ""
text = ''.join(ch for ch in text if ch not in exclude)
try:
if int(text) >= 0 or int(text) < 0:
numCount += 1
# elif text == ",":
# comCount += 1
# elif text == ".":
# dotCount += 1
except ValueError:
pass
print("Line count: " + str(lineCount))
print("Word count: " + str(wordCount))
print("Number count: " + str(numCount))
print("Comma count: " + str(comCount))
print("Dot count: " + str(dotCount) + "\n")
Basically it will show the number of lines and the number of words, but I can't get it to show the number of numbers, commas, and dots. I have it read a file that the user enters and then show the amount of lines and words, but for some reason it says 0 for numbers commas and dots. I commented out the part where it gave me trouble. If i remove the comma then i just get an error. thanks guys
This code loops over every character in each line, and adds 1 to its variable:
numCount = 0
dotCount = 0
commaCount = 0
lineCount = 0
wordCount = 0
fileName = 'test.txt'
with open(fileName, 'r') as f:
for line in f:
wordCount+=len(line.split())
lineCount+=1
for char in line:
if char.isdigit() == True:
numCount+=1
elif char == '.':
dotCount+=1
elif char == ',':
commaCount+=1
print("Number count: " + str(numCount))
print("Comma count: " + str(commaCount))
print("Dot count: " + str(dotCount))
print("Line count: " + str(lineCount))
print("Word count: " + str(wordCount))
Testing it out:
test.txt:
Hello, my name is B.o.b. I like biking, swimming, and running.
I am 125 years old, and I was 124 years old 1 year ago.
Regards,
B.o.b
Running:
bash-3.2$ python count.py
Number count: 7
Comma count: 5
Dot count: 7
Line count: 6
Word count: 27
bash-3.2$
Everything makes sense here, except the lineCount the reason why this is 6 is because of newlines. In my editor (nano), it adds a newline to the end of any file by default. So just imagine the text file to be this:
>>> x = open('test.txt').read()
>>> x
'Hello, my name is B.o.b. I like biking, swimming, and running.\n\nI am 125 years old, and I was 124 years old 1 year ago.\n\nRegards,\nB.o.b \n'
>>> x.count('\n')
6
>>>
Hope this helps!
For the punctuations, why not just do:
def showCounts(fileName):
...
...
with open(fileName, 'r') as fl:
f = fl.read()
comCount = f.count(',')
dotCount = f.count('.')
You could use the Counter class to take care of it you:
from collections import Counter
with open(fileName, 'r') as f:
data = f.read().strip()
lines = len(data.split('\n'))
words = len(data.split())
counts = Counter(data)
numbers = sum(v for (k,v) in counts.items() if k.isdigit())
print("Line count: {}".format(lines))
print("Word count: {}".format(words))
print("Number count: {}".format(numbers))
print("Comma count: {}".format(counts[',']))
print("Dot count: {}".format(counts['.']))

Categories

Resources