regex expression for delete python comment [duplicate] - python

This question already has answers here:
Script to remove Python comments/docstrings
(7 answers)
Closed 2 years ago.
I want to delete all comment in python file.
file like this:
--------------- comment.py ---------------
# this is comment line.
age = 18 # comment in line
msg1 = "I'm #1." # comment. there's a # in code.
msg2 = 'you are #2. ' + 'He is #3' # strange sign ' # ' in comment.
print('Waiting your answer')
I write many regex to extract all comments, some like this:
(?(?<=['"])(?<=['"])\s*#.*$|\s*#.*$)
get: #1." # comment. there's a # in code.
(?<=('|")[^\1]*\1)\s*#.*$|\s*#.*$
wrong. it's not 0-width in lookaround (?<=..)
But it dosn't work right. What's the right regex?
Could you help me, please?

You can try using tokenize instead of regex, as #OlvinRoght said, parsing code using regex maybe is bad idea in this case. As you can see here, you can try something like this to detect the comments:
import tokenize
fileObj = open('yourpath\comment.py', 'r')
for toktype, tok, start, end, line in tokenize.generate_tokens(fileObj.readline):
# we can also use token.tok_name[toktype] instead of 'COMMENT'
# from the token module
if toktype == tokenize.COMMENT:
print('COMMENT' + " " + tok)
Output:
COMMENT # -*- coding: utf-8 -*-
COMMENT # this is comment line.
COMMENT # comment in line
COMMENT # comment. there's a # in code.
COMMENT # strange sign ' # ' in comment.
Then, to get the expected result, that is python file without comments, you can try this:
nocomments=[]
for toktype, tok, start, end, line in tokenize.generate_tokens(fileObj.readline):
if toktype != tokenize.COMMENT:
nocomments.append(tok)
print(' '.join(nocomments))
Output:
age = 18
msg1 = "I'm #1."
msg2 = 'you are #2. ' + 'He is #3'
print ( 'Waiting your answer' )

Credit: https://gist.github.com/BroHui/aca2b8e6e6bdf3cb4af4b246c9837fa3
This will do. It uses tokenize. You can modify this code as per your use.
""" Strip comments and docstrings from a file.
"""
import sys, token, tokenize
def do_file(fname):
""" Run on just one file.
"""
source = open(fname)
mod = open(fname + ",strip", "w")
prev_toktype = token.INDENT
first_line = None
last_lineno = -1
last_col = 0
tokgen = tokenize.generate_tokens(source.readline)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if 0: # Change to if 1 to see the tokens fly by.
print("%10s %-14s %-20r %r" % (
tokenize.tok_name.get(toktype, toktype),
"%d.%d-%d.%d" % (slineno, scol, elineno, ecol),
ttext, ltext
))
if slineno > last_lineno:
last_col = 0
if scol > last_col:
mod.write(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
mod.write("#--")
elif toktype == tokenize.COMMENT:
# Comment
mod.write("\n")
else:
mod.write(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
if __name__ == '__main__':
do_file("text.txt")
text.txt:
# this is comment line.
age = 18 # comment in line
msg1 = "I'm #1." # comment. there's a # in code.
msg2 = 'you are #2. ' + 'He is #3' # strange sign ' # ' in comment.
print('Waiting your answer')
Output:
age = 18
msg1 = "I'm #1."
msg2 = 'you are #2. ' + 'He is #3'
print('Waiting your answer')
Input:
msg1 = "I'm #1." # comment. there's a # in code. the regex#.*$ will match #1." # comment. there's a # in code. . Right match shoud be # comment. there's a # in code.
Output:
msg1 = "I'm #1."

Related

Specific words from a text file with PLY

I am making a lexical analyzer for determined words that are in a .txt file, for this I declare determined words reserved and I try to print only the selected words on the screen, but the result I get is that it takes all the words in the txt file and prints them. I've been following the tutorial and the official Ply documentation in http://www.dabeaz.com/ply/ply.html#ply_nn6 but I still don't achieve my goal. Could someone help me with this? Thank you very much.
import ply.lex as lex
import re
import os
import sys
reservadas = {
'if' : 'if',
'then' : 'then',
'else' : 'else',
'while' : 'while',
}
tokens = ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE',
'ODD','ASSIGN','NE','LT','LTE','GT','GTE',
'LPARENT', 'RPARENT','COMMA','SEMMICOLOM',
'DOT','UPDATE'
] + list(reservadas.values())
#tokens = tokens+reservadas
# reservadas = {
# 'begin':'BEGIN',
# 'end':'END',
# 'if':'IF',
# 'then':'THEN',
# 'while':'WHILE',
# 'do':'DO',
# 'call':'CALL',
# 'const':'CONST',
# 'int':'VAR',
# 'procedure':'PROCEDURE',
# 'out':'OUT',
# 'in':'IN',
# 'else':'ELSE'
# }
#tokens = tokens+list(reservadas.values())
t_ignore = '\t '
t_ignore_PLUS = r'\+'
t_ignore_MINUS = r'\-'
t_ignore_TIMES = r'\*'
t_ignore_DIVIDE = r'/'
t_ignore_ODD = r'ODD'
t_ignore_ASSIGN = r'='
t_ignore_NE = r'<>'
t_ignore_LT = r'<'
t_ignore_LTE = r'<='
t_ignore_GT = r'>'
t_ignore_GTE = r'>='
t_ignore_LPARENT = r'\('
t_ignore_RPARENT = r'\)'
t_ignore_COMMA = r','
t_ignore_SEMMICOLOM = r';'
t_ignore_DOT = r'\.'
t_ignore_UPDATE = r':='
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reservadas.get(t.value,'ID') # Check for reserved words
return t
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
#dsfjksdlgjklsdgjsdgslxcvjlk-,.
def t_COMMENT(t):
r'\//.*'
r'\/*.*'
r'\*/.*'
pass
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
pass
def t_error(t):
print ("----- '%s'" % t.value[0])
t.lexer.skip(1)
while True:
tok = analizador.token()
if not tok : break
print (tok)
the output I get with the above code is:
LexToken(ID,'FSR',1,3)
LexToken(ID,'testing',1,7)
LexToken(ID,'sketch',1,15)
'---- '
'---- '
LexToken(ID,'Connect',3,28)
LexToken(ID,'one',3,36)
LexToken(ID,'end',3,40)
LexToken(ID,'of',3,44)
LexToken(ID,'FSR',3,47)
LexToken(ID,'to',3,51)
LexToken(ID,'V',3,55)
LexToken(ID,'the',3,58)
LexToken(ID,'other',3,62)
LexToken(ID,'end',3,68)
LexToken(ID,'to',3,72)
LexToken(ID,'Analog',3,75)
'---- '
.
.
.
.
LexToken(ID,'Serial',21,694)
LexToken(ID,'print',21,701)
----- '"'
LexToken(ID,'Analog',21,708)
LexToken(ID,'reading',21,715)
----- '"'
'---- '
LexToken(ID,'Serial',22,732)
LexToken(ID,'println',22,739)
LexToken(ID,'fsrReading',22,747)
'---- '
'---- '
LexToken(ID,'LEDbrightness',26,898)
LexToken(ID,'map',26,914)
LexToken(ID,'fsrReading',26,918)
'---- '
LexToken(ID,'analogWrite',28,996)
LexToken(ID,'LEDpin',28,1008)
LexToken(ID,'LEDbrightness',28,1016)
'---- '
LexToken(ID,'IF',29,1034)
'---- '
LexToken(if,'if',30,1038)
'---- '
LexToken(ID,'delay',31,1044)
'---- '
----- '}'
Press any key to continue . . .
my expectation for the exit would be this:
LexToken(ID,'IF',29,1034)
'---- '
LexToken(if,'if',30,1038)
I am analyzing a code of arduino, and all those words are comments, but I only need you to look for the conditionals if or IF, or other reserved words like for, but the main idea is that with a list of reserved words you identify them and show me only those selected
If you want to discard tokens that are not in your 'reserved' list, adjust the t_ID function like so:
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
reserved_type = reservadas.get(t.value, False)
if reserved_type:
t.type = reserved_type
return t # Return token with reserved type
return None # Discard non-reserved tokens
Additionally, your comment token function is probably misapplied here.
def t_COMMENT(t):
r'\//.*'
r'\/*.*'
r'\*/.*'
pass
You can't use multiple rules or span a rule over multiple strings like this. Because the docstring (which ply uses to get the regex) will only contain the very first string.
Secondly, I think the regex needs adjusting for comments, assuming you're tokenizing C or a C-like language. Particularly, it needs to account for the possibility that comments span multiple lines.
To fix, apply the following for dealing with comments:
def t_block_comment(tok):
r'/\*((.|\n))*?\*/'
tok.lexer.lineno += tok.value.count('\n')
return None # Discard block comments "/* comment */"
t_ignore_comment = r'//.*' # ignore inline comments "// comment"
You may also need to apply the regex multiline flag:
analizador = lex.lex(reflags=re.MULTILINE)
Lastly, your t_ignore_DIVIDE = r'/' may be preventing your comment rules from applying, too. Consider ordering this after the comment rules.

probleme with old python scrip

I'm still republishing a python 2.x script in 3.x.
at some point, the script must replace the "print" function with "disp" (equivalent in TI basic language) except that it no longer works because of parentheses. anyone have an idea to fix it?
The code :
elif (line.find("print ")==idepth(line)):
line = replace(line,"print ","Disp ")
if (line[-1] == ","):
line = line[:-1].rstrip() # Trailing , not legal for ti basic
thanks in advance
Edit: full code :
import sys
import os
import re
#GUI:
import tkinter as tk
from tkinter import filedialog
import tkinter.simpledialog
import tkinter.messagebox
GUI_MODE = False
TAB_REPLACE = " "
def main():
args = sys.argv[1:]
global GUI_MODE
print (args)
if (len(args)==0):
GUI_MODE=True
root = tk.Tk()
root.withdraw()
inp = filedialog.askopenfilename(title="Select a python script to convert")
if (inp==''):
print ("cancelled")
return 0
else:
inp=args[0]
#now input file is known
file=open(inp)
prog = file.read().replace("\r","").split("\n") # Lines of code
#Get a program name:
if (prog[0][:1]=="#" and (prog[0].upper().find("NAME:")>-1 or prog[0].upper().find("PROGRAM:")>-1 )):
outname=prog[0][prog[0].find(":")+1:]
else:
outname=tkinter.simpledialog.askstring("File name","What do you want to name this program?")
fixed = format(prog)
#Write the converted program in this folder:
print ("\n--Converted to TI-Basic code:--")
print (fixed)
print ("")
print ("Making output files: "+outname+".tib, "+outname+".8xp ...")
#Write the converted program in this folder:
outfile=open(outname+".tib","w")
outfile.write(fixed)
outfile.close()
#Write the .8xp program
outfile=open(outname+".8xp","w")
outfile.write(fixed)
outfile.close()
#assuming the compiler tibasic.exe is in this folder:
if (sys.platform[:3]=="win"):
if (os.system('tibasic.exe '+outname+'.tib')): #Returns non-0, error:
errReport("Error trying to run tibasic.exe! Make sure it is in the current folder.")
else:
if (os.system('wine tibasic.exe '+outname+'.tib')): #Returns non-0, error:
errReport("Error trying to run tibasic.exe! Make sure it is in the current folder, and w.i.n.e is installed.\n"+
"(See http://www.winehq.org/ for installer)")
os.remove(outname+".tib")
a=input("Done! Press enter to exit:") #pause
return 0
def format(linesArray): #converts lines from Python to ti-basic.
for i in range(len(linesArray)):
linesArray[i]=linesArray[i].replace("\t",TAB_REPLACE) #Important! see idepth()
i=0;
linesArray.append("") # 0-indent ending so blockAddEnd won't mess up.
while (i<len(linesArray)):
#Convert control blocks (if, for, while) from indented (python) to END (TI)
line = linesArray[i]
if isBlockStart(line,"for "):
linesArray = blockAddEnd(linesArray, i, "End")
elif isBlockStart(line,"if "):
linesArray = blockAddEnd(linesArray, i, "End")
elif isBlockStart(line,"while "):
linesArray = blockAddEnd(linesArray, i, "End")
elif isBlockStart(line,"repeat "): #not in python, but works on TI.
linesArray = blockAddEnd(linesArray, i, "End")
i+=1
# Don't need indentations anymore, do the rest of the conversions:
for i in range(len(linesArray)):
linesArray[i]=convLine(linesArray[i],i+1)
#Remove blanks:
for i in range(linesArray.count("")):
linesArray.remove("")
return "\n".join(linesArray)
def convLine(line,num): #Line by line conversion.
line = line.rstrip().lstrip() #trim indentation.
lnum = "Line "+str(num)+": "
if line.count("#"):
comment = line[line.find("#"):]
if (comment[0:6] == "#no-ti"):
#Does not work on the ti.
return ""
elif (comment[0:4] == "#ti:"):
# Only for ti:
return comment[4:]
else:
line = line[:line.find("#")] # take comment off code
#No imports in ti-basic!
if line.startswith("import ") or (line.count(" import ") and line.startswith("from ")):
return ""
#Errors and warnings:
if (toolong(line)):
print (lnum+"Warning: Text string too long to fit on a TI83/84 screen. The calculator screen is 16 characters wide, 8 characters high.")
if (line.find("\n")>-1):
print (lnum+"Warning: newline \\n is not allowed in TI-Basic.")
if (line.find("'''")>-1):
print (lnum+"Warning: ''' quotes are not allowed, you must use \" quotes on a single line for TI-Basic.")
if (replace(line,"pow(","")!=line):
errReport(lnum+"TI calculators don't have the pow() command, you must use a**b instead of pow(a,b).")
if (replace(line,"import ","")!=line):
print (lnum+"import ignored. No import statements in TI-Basic!")
return "" # ignore import statements!
if (replace(line,"-=","")!=line):
errReport(lnum+"The -= operator is not allowed.\nTry +=- or a=a+-number instead.")
if (replace(line,"def ","")!=line):
errReport(lnum+"Functions are not supported in TI-Basic! However, you can run another program with \"prgmPRGNAME\".")
if (replace(line,"//","")!=line):
print (lnum+"// division converted to / division: For int division, try int(a/b).")
line=replace(line,"//","/")
if (replace(line,"-","")!=line):
print (lnum+"Warning: The - is changed to negative sign on the calculator. If you wanted to subtract, use a+-b instead of a-b.")
if (replace(line,"open(","")!=line):
errReport(lnum+"Error: TI calculators can't use \"open(filename)\" in programs. To store text, try using variables STR0, STR1, ... STR9.")
if (replace(line,"%","")!=line):
errReport(lnum+"Error: TI83/84 calculators don't have Mod.\n Instead of a % b, try (a/b-int(a/b))*b instead.")
# Replace excess spaces, they cause errors in the calculator:
line=replace(line,", ",",")
line=replace(line," + ","+")
line=replace(line," - ","-")
line=replace(line," +- ","+-")
line=replace(line," * ","*")
line=replace(line," / ","/")
line=replace(line," == ","==")
line=replace(line," > ",">")
line=replace(line," < ","<")
line=replace(line," != ","!=")
#TODO: Arrays converted to lists?
line=replace(line,"theta","[theta]") # variable
line=replace(line,"**","^")
line=mathReplace(line)
#round, max, min already works.
line=replace(line,"float(","(")
line=replace(line,"len(","dim(")
line=replace(line,"math.pi","[pi]")
line=replace(line,"math.e","[e]")
line=replace(line,"eval(","expr(")
line=replace(line,"-","[neg]") # use +- instead of - operator.
line=replace(line,"==","=")
line=replace(line," and ","&")
line=replace(line," or ","|")
line=replace(line,"random.random()","rand")
line=replace(line,"random.randint","RandInt")
line=replace(line,"int(","iPart(")
if (replace(line,"input(","") != line):
line=inputConv(line,num)
if isBlockStart(line,"for "):
line=forConv(line,num)
elif (isBlockStart(line,"if ")):
line = replace(line,"if ","If ")
line = replace(line,":",":Then")
elif (isBlockStart(line,"while ")):
line = replace(line,"while ","While ")
line = replace(line,":","")
elif (isBlockStart(line,"repeat")):
line = replace(line,"repeat","Repeat")
line = replace(line,":","")
elif (isBlockStart(line,"else")):
line = replace(line,"else:","Else")
elif isBlockStart(line,"elif"):
errReport(lnum+"""Error: There is no else-if command on the TI83/84. However, you can use this instead:
if <condition>:
...
else:
if <condition>:
...
else:
...""")
elif (line.find("print ")==idepth(line)):
line = re.sub(r"print *\((.+)\)", r"disp \1", line)
if (line[-1] == ","):
line = line[:-1].rstrip() # Trailing , not legal for ti basic
elif (replace(line,"=","")!=line): #assignment is -> on the calculator.
eqspace = line.find("=")
line = line[eqspace+1:].rstrip().lstrip() + "->" + line[:eqspace].rstrip().lstrip() # sto arrow.
line = fixEQ(line)
return replace(line,"+[neg]","-") #lastly, switch back the negative.
def fixEQ(line):
# fix +=, *=, /=.
# A+=1 changes to 1->A+, so fix it now.
if (line[-1]=="+" or line[-1]=="*" or line[-1]=="/"):
line = line[:-1].rstrip()+line[-1] # remove any spaces in "a +" etc
pre = line[line.find("->")+2:]
#pre = pre[:-1].rstrip()+pre[-1]
line= pre + "("+line[:line.find("->")]+")"+ line[line.find("->"):-1]
return line
def inputConv(line,num):
lnum = "Line "+str(num)+": "
if (replace(line,"raw_input(","")!=line and line==replace(line,"=","")):
#raw_input not assigned to variable is like Pause.
return "Pause "
else:
var = line[:line.find("=")].rstrip().lstrip()
if (len(var)>1 and var!="theta"): # might be invalid.
print (lnum+"Warning: Program tries to store to variable \"%s\"." % var)
prompt = line[line.find("input(")+6:]
prompt = prompt[:prompt.find(")")]
# Now return the TI basic input with var spaces removed:
return "Input "+prompt+","+var
def forConv(line,num):
lnum = "Line "+str(num)+": "
# split "for i in range(...):"
var = line[line.find("for ")+4:line.find(" in range")]
#print var
part = line[line.find("in range(")+9:] # only "...) : "
part = part.rstrip(": ")[:-1] # remove extra " " or ":", remove last ).
#print "'"+line+"'"
out = part.split(",")
if len(out)==1:
return "For(%s,0,(%s)-1)" % (var, out[0])
elif len(out)==2:
return "For(%s,(%s),(%s)-1)" % (var, out[0], out[1])
elif len(out)==3:
return "For(%s,(%s),(%s)-1,(%s)" % (var, out[0], out[1], out[2])
else:
errReport(lnum+"Too many commas in for loop!")
return "couldn't convert: "+line
def blockAddEnd(lines, startLine, endText):
# Takes an array, line #, and end text.
# Adds end for that indentation block.
startInd = idepth(lines[startLine])
if idepth(lines[startLine+1]) <= startInd:
errReport("Expected indent after line "+str(startLine+1)+".")
i = startLine+1
#continue searching for the end while it's indented or it's an else line:
while idepth(lines[i]) > startInd or (isBlockStart(lines[i],"else")):
i+=1
# now insert.
lines.insert(i,endText)
return lines
def idepth(text):
# get indentation depth of line.
depth=0
line = text.replace("\t",TAB_REPLACE) #tab is 4 spaces.
while (line[:1]==" "):
line=line[1:]
depth+=1
return depth
def replace(text, changethis, tothis):
# replaces text, but not in quotes.
arr = text.split("\"")
for i in range(0,len(arr),2):
arr[i]=arr[i].replace(changethis, tothis)
return "\"".join(arr)
def toolong(text):
# checks for too long string:
arr = text.split("\"")
for i in range(1,len(arr),2):
#print arr[i]
if (len(arr[i]) > 16):
return True
return False
def parMatch(text,num): # given "(stuff()...()))", returns the parentheses block.
lnum = "Line "+str(num)+": "
for i in range(len(text)):
part = text[:i-1]
if (part.count("(")==part.count(")")):
return part[1:-1] #without outside parentheses.
errReport(lnum+"Invalid parentheses")
def isBlockStart(line, type):
# Check if the line is start of a <type> block.
# checks if it starts with <type>, and ends with ":".
# example: isBlockStart("for i in range(8) : ","for") is true.
return (line.find(type) == idepth(line) and line.rstrip(" ")[-1]==":")
def errReport(text):
print (text)
if (GUI_MODE):
root = tk.Tk()
root.withdraw()
tkinter.messagebox.showerror("Error",text)
sys.exit(1)
def mathReplace(line):
""" Replaces mathematical functions with ti basic functions. """
#Same function in both Python and TI-basic:
same = ["sin", "cos", "tan", "asin", "acos", "atan", "sinh", "cosh", "tanh", "asinh", "acosh", "atanh"]
line=replace(line,"math.sqrt(","[root]^2(")
line=replace(line,"math.fabs(","abs(")
for func in same:
line = replace(line,"math.%s(" % func,func)
line=replace(line,"math.log(","ln(")
line=replace(line,"math.exp(","e^(")
line=replace(line,"math.floor(","int(")
line=replace(line,"math.log10(","log(")
#same, but without "math." They might use
#from math import sqrt etc...
line=replace(line,"sqrt(","[root]^2(")
line=replace(line,"fabs(","abs(")
for func in same:
line = replace(line, "%s(" % func,func)
#(Redundant lines deleted)
line=replace(line,"log(","ln(")
line=replace(line,"exp(","e^(")
line=replace(line,"floor(","int(")
line=replace(line,"log10(","log(")
return line
if __name__ == '__main__': main()
it's the original code with changement propose below
You can use python's regular expression library to perform more advanced string matching and replacement than replace(). Specifically, re.sub(), which functions the same as replace() but takes regular expressions instead of simple strings.
Be sure to first import it with import re. You can then do the following:
elif (line.find("print ")==idepth(line)):
line = re.sub(r"print *\(\"(.+)\"\)", r"disp \1", line)
if (line[-1] == ","):
line = line[:-1].rstrip() # Trailing , not legal for ti basic
This will look for a string of the format "print("&1")" or "print ("&1")" and replace it with "disp &1", where &1 is the contents between the quotes.
Edit: You had initially specified that you wanted the output "without the quotes", but it seems you've edited that comment. If you want to include the quotes in the output, use this line instead:
line = re.sub(r"print *\((.+)\)", r"disp \1", line)

How to print number of characters based on terminal width that also resize?

Not sure if it's possible, but I was hoping to do something where I can print a hyphen for the width of the terminal on one line. If the window's width is resized, the amount of hyphens displayed would print accordingly.
This is a more elaborated version that allows printing whatever you want always according to the dimension of the terminal. You can also resize the terminal while nothing is being printed and the content will be resized accordingly.
I commented the code a little bit... but if you need I can be more explicit.
#!/usr/bin/env python2
import threading
import Queue
import time
import sys
import subprocess
from backports.shutil_get_terminal_size import get_terminal_size
printq = Queue.Queue()
interrupt = False
lines = []
def main():
ptt = threading.Thread(target=printer) # Turn the printer on
ptt.daemon = True
ptt.start()
# Stupid example of stuff to print
for i in xrange(1,100):
printq.put(' '.join([str(x) for x in range(1,i)])) # The actual way to send stuff to the printer
time.sleep(.5)
def split_line(line, cols):
if len(line) > cols:
new_line = ''
ww = line.split()
i = 0
while len(new_line) <= (cols - len(ww[i]) - 1):
new_line += ww[i] + ' '
i += 1
print len(new_line)
if new_line == '':
return (line, '')
return (new_line, ' '.join(ww[i:]))
else:
return (line, '')
def printer():
while True:
cols, rows = get_terminal_size() # Get the terminal dimensions
msg = '#' + '-' * (cols - 2) + '#\n' # Create the
try:
new_line = str(printq.get_nowait())
if new_line != '!##EXIT##!': # A nice way to turn the printer
# thread out gracefully
lines.append(new_line)
printq.task_done()
else:
printq.task_done()
sys.exit()
except Queue.Empty:
pass
# Build the new message to show and split too long lines
for line in lines:
res = line # The following is to split lines which are
# longer than cols.
while len(res) !=0:
toprint, res = split_line(res, cols)
msg += '\n' + toprint
# Clear the shell and print the new output
subprocess.check_call('clear') # Keep the shell clean
sys.stdout.write(msg)
sys.stdout.flush()
time.sleep(.5)
if __name__ == '__main__':
main()
Check this out:(it worked on windows and python3 )
import os
os.system('mode con: cols=100 lines=40')
input("Press any key to continue...")
os.system('mode con: cols=1000 lines=400')
input("Press any key to continue...")
This is doing exactly what you asked for... with a very small issue: when you make the shell smaller the cursor goes down of one line and the stuff that is above will stay there.... I can try to solve this issue... but the result will be more complicated.
I assumed you are using a unix system.
The code uses threads to be able to keep the line on the screen while doing other things. In this case just sleeping... Moreover, only using a thread is actually possible to have a "fast" answer to the change of the dimension of the terminal.
#!/usr/bin/env python2
import threading
import time
import sys
from backports.shutil_get_terminal_size import get_terminal_size
def main1():
ptt = threading.Thread(target=printer2)
ptt.daemon = True
ptt.start()
time.sleep(10)
def printer2():
while True:
cols, rows = get_terminal_size()
line = '-' * (cols - 2)
sys.stdout.write("\r" + '#' + line + '#')
sys.stdout.flush()
time.sleep(.5)

Python: For loop only loops over the first part of a txt file

Recently I had to make a script for my internship to check if a subnet occurs in a bunch of router/switch configs.
I've made a script that generates the output. Now I need a second script (I couldn't get it to work into one), that reads the output, if the subnet occurs write it to aanwezig.txt, if not write to nietAanwezig.txt.
A lot of other answers helped me to make this script and it works but it only executes for the first 48 IPs and there are over 2000...
The code of checkOutput.py:
def main():
file = open('../iprangesclean.txt', 'rb')
aanwezig = open('../aanwezig.txt', 'w')
nietAanwezig = open('../nietAanwezig.txt', 'w')
output = open('output.txt', 'rb')
for line in file:
originalLine = line
line.rstrip()
line = line.replace(' ', '')
line = line.replace('\n', '')
line = line.replace('\r', '')
one,two,three,four = line.split('.')
# 3Byte IP:
ipaddr = str(one) + "." + str(two) + "." + str(three)
counter = 1
found = 0
for lijn in output:
if re.search("\b{0}\b".format(ipaddr),lijn) and found == 0:
found = 1
else:
found = 2
print counter
counter= counter + 1
if found == 1:
aanwezig.write(originalLine)
print "Written to aanwezig"
elif found == 2:
nietAanwezig.write(originalLine)
print "Written to nietAanwezig"
found = 0
file.close()
aanwezig.close()
nietAanwezig.close()
main()
The format of iprangesclean.txt is like following:
10.35.6.0/24
10.132.42.0/24
10.143.26.0/24
10.143.30.0/24
10.143.31.0/24
10.143.32.0/24
10.35.7.0/24
10.143.35.0/24
10.143.44.0/24
10.143.96.0/24
10.142.224.0/24
10.142.185.0/24
10.142.32.0/24
10.142.208.0/24
10.142.70.0/24
and so on...
Part of output.txt (I can't give you everything because it has user information):
*name of device*.txt:logging 10.138.200.100
*name of device*.txt:access-list 37 permit 10.138.200.96 0.0.0.31
*name of device*.txt:access-list 38 permit 10.138.200.100
*name of device*.txt:snmp-server host 10.138.200.100 *someword*
*name of device*.txt:logging 10.138.200.100
Try this change:
for lijn in output:
found = 0 # put this here
if re.search("\b{0}\b".format(ipaddr),lijn) and found == 0:
found = 1
else:
found = 2
print counter
counter= counter + 1
"""Indent one level so it us in the for statement"""
if found == 1:
aanwezig.write(originalLine)
print "Written to aanwezig"
elif found == 2:
nietAanwezig.write(originalLine)
print "Written to nietAanwezig"
If I understand the problem correctly, this should guide you to the right direction. The if statement is currently not executed in the for statement. If this does solve your problem, then you don't need the found variable either. You can just have something like:
for counter, lijn in enumerate(output, 1):
if re.search("\b{0}\b".format(ipaddr),lijn):
aanwezig.write(originalLine)
print "Written to aanwezig"
else:
nietAanwezig.write(originalLine)
print "Written to nietAanwezig"
print counter
Please let me know if I have misunderstood the question.
Note I haven't tested the code above, try them out as a starting point.

Python how to add exception?

#martineau I have updated my codes, is this what you meant ? How do i handle KeyError instead of NameError ?
url = "http://app2.nea.gov.sg/anti-pollution-radiation-protection/air-pollution/psi/psi-readings-over-the-last-24-hours"
web_soup = soup(urllib2.urlopen(url))
table = web_soup.find(name="div", attrs={'class': 'c1'}).find_all(name="div")[4].find_all('table')[0]
data = {}
cur_time = datetime.datetime.strptime("12AM", "%I%p")
for tr_index, tr in enumerate(table.find_all('tr')):
if 'Time' in tr.text:
continue
for td_index, td in enumerate(tr.find_all('td')):
if not td_index:
continue
data[cur_time] = td.text.strip()
if td.find('strong'):
bold_time = cur_time
data[bold_time] = '20'
cur_time += datetime.timedelta(hours=1)
default_value = '20' # whatever you want it to be
try:
bold = data[bold_time]
except NameError:
bold_time = beforebold = beforebeforebold = default_value
# might want to set "bold" to something, too, if needed
else:
beforebold = data.get(bold_time - datetime.timedelta(hours=1))
beforebeforebold = data.get(bold_time - datetime.timedelta(hours=2))
This is where I print my data to do calculation.
print bold
print beforebold
print beforebeforebold
You need to add something to set data[bold_time]:
if td.find('strong'):
bold_time = cur_time
data[bold_time] = ????? # whatever it should be
cur_time += datetime.timedelta(hours=1)
This should avoid both the NameError and KeyError exceptions as long as the word strong is found. You still might want to code defensively and handle one or both of them gracefully. That what exception where meant to do, handle those exceptional cases that shouldn't happen...
I had read your previous post before it disappeared, and then I've read this one.
I find it a pity to use BeautifulSoup for your goal, because, from the code I see, I find its use complicated, and the fact is that regexes run roughly 10 times faster than BeautifulSoup.
Here's the code with only re, that furnishes the data you are interested in.
I know, there will people to say that HTML text can't be parsed by regexs. I know, I know... but I don't parse the text, I directly find the chunks of text that are interesting. The source code of the webpage of this site is apparently very well structured and it seems there is little risk of bugs. Moreover, tests and verification can be added to keep watch on the source code and to be instantly informed on the possible changings made by the webmaster in the webpage
import re
from httplib import HTTPConnection
hypr = HTTPConnection(host='app2.nea.gov.sg',
timeout = 300)
rekete = ('/anti-pollution-radiation-protection/'
'air-pollution/psi/'
'psi-readings-over-the-last-24-hours')
hypr.request('GET',rekete)
page = hypr.getresponse().read()
patime = ('PSI Readings.+?'
'width="\d+%" align="center">\r\n'
' *<strong>Time</strong>\r\n'
' *</td>\r\n'
'((?: *<td width="\d+%" align="center">'
'<strong>\d+AM</strong>\r\n'
' *</td>\r\n)+.+?)'
'width="\d+%" align="center">\r\n'
' *<strong>Time</strong>\r\n'
' *</td>\r\n'
'((?: *<td width="\d+%" align="center">'
'<strong>\d+PM</strong>\r\n'
' *</td>\r\n)+.+?)'
'PM2.5 Concentration')
rgxtime = re.compile(patime,re.DOTALL)
patline = ('<td align="center">\r\n'
' *<strong>' # next line = group 1
'(North|South|East|West|Central|Overall Singapore)'
'</strong>\r\n'
' *</td>\r\n'
'((?: *<td align="center">\r\n' # group 2 start
' *[.\d-]+\r\n' #
' *</td>\r\n)*)' # group 2 end
' *<td align="center">\r\n'
' *<strong style[^>]+>'
'([.\d-]+)' # group 3
'</strong>\r\n'
' *</td>\r\n')
rgxline = re.compile(patline)
rgxnb = re.compile('<td align="center">\r\n'
' *([.\d-]+)\r\n'
' *</td>\r\n')
m= rgxtime.search(page)
a,b = m.span(1) # m.group(1) contains the data AM
d = dict((mat.group(1),
rgxnb.findall(mat.group(2))+[mat.group(3)])
for mat in rgxline.finditer(page[a:b]))
a,b = m.span(2) # m.group(2) contains the data PM
for mat in rgxline.finditer(page[a:b]):
d[mat.group(1)].extend(rgxnb.findall(mat.group(2))+[mat.group(3)])
print 'last 3 values'
for k,v in d.iteritems():
print '%s : %s' % (k,v[-3:])

Categories

Resources