am trying to figure out how to use this nifty lib to parse BigIP config files...
the grammar should,be something like this:
stanza :: name { content }
name :: several words, might contain alphas nums dot dash underscore or slash
content:: stanza OR ZeroOrMore(printable characters)
To make things slightly more complicated, one exception:
If name starts with "rule ", then content cannot be "stanza"
I started with this:
from pyparsing import *
def parse(config):
def BNF():
"""
Example:
...
ltm virtual /Common/vdi.uis.test.com_80_vs {
destination /Common/1.2.3.4:80
http-class {
/Common/http2https
}
ip-protocol tcp
mask 255.255.255.255
profiles {
/Common/http { }
/Common/tcp { }
}
vlans-disabled
}
...
"""
lcb, rcb, slash, dot, underscore, dash = [c for c in '{}/._-']
name_word = Word(alphas + nums + dot + underscore + slash + dash)
name = OneOrMore(name_word).setResultsName("name")
stanza = Forward()
content = OneOrMore(stanza | ZeroOrMore(OneOrMore(Word(printables)))).setResultsName("content")
stanza << Group(name + lcb + content + rcb).setResultsName("stanza")
return stanza
return [x for x in BNF().scanString(config)]
The code above seems to lock up in some infinite loop. It is also missing my requirement for excluding looking for 'stanza" if "name" starts with "rule ".
OneOrMore(ZeroOrMore(OneOrMore(Word(printables))) will always match, thus leading to the infinite loop.
Also, printables includes a closing curly bracket, which gets consumed by the content term, and is no longer available for the stanza. (If your content can including a closing bracket, you need to define something to escape it, to distinguish a content bracket from a stanza bracket.)
To address the name rule, you need another content definition, one that doesn't include stanza, and a "rule rule".
def parse(config):
def BNF():
lcb, rcb, slash, dot, underscore, dash = [c for c in '{}/._-']
printables_no_rcb = Word(printables, excludeChars=rcb)
name_word = Word(alphas + nums + dot + underscore + slash + dash)
name = OneOrMore(name_word).setResultsName("name")
rule = Group(Literal('rule') + name).setResultsName("name")
rule_content = OneOrMore(printables_no_rcb).setResultsName("content")
stanza = Forward()
content = OneOrMore(stanza | OneOrMore(printables_no_rcb)).setResultsName("content")
stanza << Group(rule + lcb + rule_content + rcb | name + lcb + content + rcb).setResultsName("stanza")
return stanza
return [x for x in BNF().scanString(config)]
Related
After watching ArjanCodes video on dataclasses,
I've been trying to add variables to a python dataclass from a json config file to format the font style of a print function printT in Jupyterlab.
I use ANSI escapes for the formatting which doesn't work anymore if I import the variables to the dataclass. Instead of formatting the text, the ANSI code get's printed out.
# config.json
{
"lb" : "\n",
"solid_line" : "'___'*20 + config.lb",
"dotted_line" : "'---'*20 + config.lb",
"BOLD" : "\\033[1m",
"END" : "\\033[0m"
}
# config.py
from dataclasses import dataclass
import json
#dataclass
class PrintConfig:
lb : str
solid_line : str
dotted_line : str
BOLD : str
END : str
def read_config(config_file : str) -> PrintConfig:
with open(config_file, 'r') as file:
data = json.load(file)
return(PrintConfig(**data))
# helper.py
from config import read_config
config = read_config('config.json')
def printT(title,linebreak= True,addLine = True, lineType = config.solid_line,toDisplay = None):
'''
Prints a line break, the input text and a solid line.
Inputs:
title = as string
linebreak = True(default) or False; Adds a line break before printing the title
addLine = True(default) or False; Adds a line after printing the title
lineType = solid_line(default) or dotted_line; Defines line type
toDisplay = displays input, doesnt work with df.info(),because info executes during input
'''
if linebreak:
print(config.lb)
print(config.BOLD + title + config.END)
if addLine:
print(lineType)
if toDisplay is not None:
display(toDisplay)
# test.ipynb
from helper import printT
printT('Hello World')
Output
\033[1mHello World\033[0m
'___'*20 + config.lb
Desired result
Hello World
It works if I use eval if addLine: print(eval(lineType)) but I'd like to get deeper insights into the mechanics here. Is there a way of getting it to work without eval?
Also this part "solid_line" : "'___'*20 + config.lb" feels wrong.
Markdown as alternative to ANSI
Here's a basic configuration system. I won't add the output since it would need a screenshot but it works on bash/macos. Inspired by and [tip_colors_and_formatting]
And from (https://misc.flogisoft.com/bash/tip_colors_and_formatting):
In Bash, the character can be obtained with the following syntaxes:
\e
\033
\x1B
\e didn't work, so I went on to use to \x1B since that worked in the linked SE answer. \033 works too, I checked.
from dataclasses import dataclass
PREFIX = "\x1B["
#these aren't configurable, they are ANSI constants so probably
#not useful to put them in a config json
CODES = dict(
prefix = PREFIX,
bold = f"1",
reset = f"{PREFIX}0m",
red = "31",
green = "32",
)
#dataclass
class PrintConfig:
bold : bool = False
color : str = ""
def __post_init__(self):
# these are calculated variables, none of client code's
# business:
self.start = self.end = ""
start = ""
if self.bold:
start += CODES["bold"] + ";"
if self.color:
start += CODES[self.color.lower()] + ";"
if start:
self.end = CODES["reset"]
#add the escape prefix, then the codes and close with m
self.start = f"{CODES['prefix']}{start}".rstrip(";") + "m"
def print(self,v):
print(f"{self.start}{v}{self.end}")
normal = PrintConfig()
normal.print("Hello World")
bold = PrintConfig(bold=1)
print(f"{bold=}:")
bold.print(" Hello World")
boldred = PrintConfig(bold=1,color="red")
print(f"{boldred=}:")
boldred.print(" Hello bold red")
#this is how you would do it from json
green = PrintConfig(**dict(color="green"))
green.print(" Little Greenie")
#inspired from https://stackoverflow.com/a/287934
print("\n\ninspired by...")
CSI = "\x1B["
print(CSI+"31;40m" + "Colored Text" + CSI + "0m")
print(CSI+"1m" + "Colored Text" + CSI + "0m")
This string consists of an actual backslash followed by the digits 033, etc.
"BOLD" : "\\033[1m",
To turn on bold on an ansi terminal, you need an escape character (octal 33) followed by [1m. In Python, you can write those escape codes with a single backslash: "\033[1m". In a json file, you must provide the unicode codepoint of the escape character, \u001b. If the rest is in order, you'll see boldface.
"BOLD" : "\u001b[1m",
"END" : "\u001b[0m"
As for the eval part, you have a string containing the expression you need to evaluate. I assume you wrote it this way because you first tried without the double quotes, e.g. ,
"dotted_line" : '---'*20 + config.lb,
and you got a json syntax error. That's not surprising: Json files are data, not code, and they cannot incorporate expressions or variable references. Either place your config in a python file that you include instead of loading json, or move the dependencies to the code. Or both.
In a python file, config.py:
config = {
"lb": "\n",
"solid_line" : '___'*20,
...
In helper.py:
...
if addLine:
print(lineType + config.lb)
I am making a plugin for Sublime Text 3. It contacts my server in Java and receives a response in the form of a list of strings, that contains C code.
To display this code in a popup window you need to pass a string in HTML format to the method show_popup. Accordingly, all C-code characters that can be recognized by the parser as HTML entities should be replaced with their names (&name;) or numbers (&#number;). At first, I just replaced the most common characters with replace(), but it didn't always work out - Parse Error was displayed in the console:
Parse Error: <br> printf ("Decimals: %d %ld\n", 1977, 650000L);
<br> printf ("Preceding with blanks:&nbs
...
y</a></li><p><b>____________________________________________________</b></p>
</ul>
</body>
code: Unexpected character
I've tried to escape html entities with Python's html library:
import html
...
html.escape(string)
But Sublime doesn't see import and print in console that I was using a function without defining it - I guess he didn't see that I connected this library(Whyyy?). cgi.escape - is depricated, so I can't use this. I decided to write the function myself.
Then I saw a very interesting way to replace all the characters whose code is >127 and some other characters (&, <,>) with their numbers:
def escape_html (s):
out = ""
i = 0
while i < len(s):
c = s[i]
number = ord(c)
if number > 127 or c == '"' or c == '\'' or c == '<' or c == '>' or c == '&':
out += "&#"
out += str(number)
out += ";"
else:
out += c
i += 1
out = out.replace(" ", " ")
out = out.replace("\n", "<br>")
return out
This code works perfectly for displaying characters in a browser, but unfortunately it is not supported by Sublime Text 3.
As a result, I came to the conclusion that these characters should be replaced with their equivalent names:
def dumb_escape_html(s):
entities = [["&", "&"], ["<", "<"], [">", ">"], ["\n", "<br>"],
[" ", " "]]
for entity in entities:
s = s.replace(entity[0], entity[1])
return s
But again I faced an obstacle: not all names are supported in Sublime. And again an error Parse Error.
I also attach a link to JSON file, which contains answer from my server, content of which should be displayed in pop-up window: Example of data from sever (codeshare.io)
I absolutely do not understand, in what I make a mistake - I hope, that great programmers know how to solve my problem.
Edit. Minimal, Reproducible Example:
import sublime
import sublime_plugin
import string
import sys
import json
def get_func_name(line, column):
return "printf"
def get_const_data(func_name):
input_file = open ("PATH_TO_JSON/data_printf.json")
results = json.load(input_file)
return results
def dumb_escape_html(s):
entities = [["&", "&"], ["<", "<"], [">", ">"], ["\n", "<br>"],
[" ", " "]]
for entity in entities:
s = s.replace(entity[0], entity[1])
return s
def dumb_unescape_html(s):
entities = [["<", "<"], [">", ">"], ["<br>", "\n"],
[" ", " "], ["&", "&"]]
for entity in entities:
s = s.replace(entity[0], entity[1])
return s
class CoderecsysCommand(sublime_plugin.TextCommand):
def run(self, edit):
v = self.view
cur_line = v.substr(v.line(v.sel()[0]))
for sel in v.sel():
line_begin = v.rowcol(sel.begin())[0]
line_end = v.rowcol(sel.end())[0]
pos = v.rowcol(v.sel()[0].begin()) # (row, column)
try:
func_name = get_func_name(cur_line, pos[1]-1)
li_tree = ""
final_data = get_const_data(func_name)
for i in range(len(final_data)):
source = "source: " + final_data[i]["source"]
escaped = dumb_escape_html(final_data[i]["code"])
divider = "<b>____________________________________________________</b>"
li_tree += "<li><p>%s</p>%s <a href='%s'>Copy</a></li><p>%s</p>" %(source, escaped, escaped, divider)
# The html to be shown.
html = """
<body id=copy-multiline>
Examples of using <b>%s</b> function.
<ul>
%s
</ul>
</body>
""" %(func_name, li_tree)
self.view.show_popup(html, max_width=700, on_navigate=lambda example: self.copy_example(example, func_name, source))
except Exception as ex:
self.view.show_popup("<b style=\"color:#1c87c9\">CodeRec Error:</b> " + str(ex), max_width=700)
def copy_example(self, example, func_name, source):
# Copies the code to the clipboard.
unescaped = dumb_unescape_html(example)
unescaped = "// " + source + unescaped
sublime.set_clipboard(unescaped)
self.view.hide_popup()
sublime.status_message('Example of using ' + func_name + ' copied to clipboard !')
Running a script that writes a block of code into a textarea on a website. The first four "lines" write correctly until var url = "https: at which point the cursor jumps to the upper left of the text area and then continues writing. Each time the / character is come across, the cursor returns to the upper left before continuing writing.
How can I prevent the cursor being affected.
I have tried \/, \\/, {/}, and similar ways to escape the slash.
self.driver.find_element_by_id('textarea').send_keys(
'\nvar device = this\n\nvar url = "' + baseurl + '/' + firmwarename + '"\n\nvar conv = TR69.createConnection(device)\n\ntry {'+
'var uuid = java.util.UUID.randomUUID().toString().replace("-","") \n'+
What it physically does:
myhiddenurl.comSG9C130016_prod-mycomponent-5260-8a.27.103-combined-squashfs.img.gsdf"
var conv = TR69.createConnection(device)
var device = this
var url = "http:
Notice that lines 3 and 4 should be 1 and 2. And that line 1 is the continuation of what is now line 4.
Here is sample code which shows the issue...
firmwarename = "tchrisdemo-code-3-2-3.gsdf"
self.driver.get("https://futureoftesting.us/os.html")
self.driver.find_element_by_id('textarea').clear()
baseurl = "http://myhiddendomain.com/"
self.driver.find_element_by_id('textarea').send_keys(
'\nvar device = this\n\nvar url = "' + baseurl + '/' +
firmwarename + '"\n\nvar conv = TR69.createConnection(device)\n\ntry {'+
'var uuid = java.util.UUID.randomUUID().toString().replace("-","") \n'+
'var dlRequest = new TR69.DownloadRequest() \n' )
Line 5 of the code is the problem...
I've tried a variety of changes akin to your comment. The .format one allowed one allowed one "/" through then jumped to the top of the textarea and continued writing on the next one.
baseurl = r"http://myhiddendomain.com/"
url = "{}/{}".format(baseurl,firmwarename)
self.driver.find_element_by_id('textarea').send_keys(
'\nvar device = this\n\nvar url = "' + baseurl + firmwarename + '"\n\nvar conv = TR69.createConnection(device)\n\ntry {'+
'var uuid = java.util.UUID.randomUUID().toString().replace("-","") \n'+
'var dlRequest = new TR69.DownloadRequest() \nThis is formatting: ' + url)
which sadly generated this:
var dlRequest = new TR69.DownloadRequest()
This is formatting: http:/myhiddendomain.com/
var device = this
Not sure I fully get this solution.
It appears after more searching that the "jumping cursor" is a known problem and that the "devs have to fix it"
Backslash '\' is the escape character
So try \ / (space in between so it doesn't look like a V but you obv want without the space)
Try to parse procedure documentation comment block with pyparsing.
proc test {a b c} {
# Proc description
# :args: a - argument a, b - second argument
# c - third argument
# :return: nothing
puts $a
}
Tokens below created:
EOL = Suppress(pp.LineEnd())
line = pp.SkipTo(EOL)
commentStart = pp.Suppress('#')
commentLine = tclCommentStart + restOfLine
startDocsReturn = commentStart + pp.Keyword(":return:").suppress()
docsReturnLine = startDocsReturn + line
startDocsArgs = commentStart + pp.Keyword(":args:").suppress()
docsArgsLine = startDocsArgs + line
docsDescription = pp.OneOrMore(commentLine, stopOn=startDocsArgs).setParseAction(join_lines)
But it parse correctly if args block is one line. If it is multiline then adding OneOrMore to line token in docsArgsLine doesn't work because the line starts from the sharp character.
What is the correct expression to parse keyword multiline block whish start from special characters?
########################################
# some comment
# other comment
########################################
block1 {
value=data
some_value=some other kind of data
othervalue=032423432
}
block2 {
value=data
some_value=some other kind of data
othervalue=032423432
}
The best way would be to use an existing format such as JSON.
Here's an example parser for your format:
from lepl import (AnyBut, Digit, Drop, Eos, Integer, Letter,
NON_GREEDY, Regexp, Space, Separator, Word)
# EBNF
# name = ( letter | "_" ) , { letter | "_" | digit } ;
name = Word(Letter() | '_',
Letter() | '_' | Digit())
# words = word , space+ , word , { space+ , word } ;
# two or more space-separated words (non-greedy to allow comment at the end)
words = Word()[2::NON_GREEDY, ~Space()[1:]] > list
# value = integer | word | words ;
value = (Integer() >> int) | Word() | words
# comment = "#" , { all characters - "\n" } , ( "\n" | EOF ) ;
comment = '#' & AnyBut('\n')[:] & ('\n' | Eos())
with Separator(~Regexp(r'\s*')):
# statement = name , "=" , value ;
statement = name & Drop('=') & value > tuple
# suite = "{" , { comment | statement } , "}" ;
suite = Drop('{') & (~comment | statement)[:] & Drop('}') > dict
# block = name , suite ;
block = name & suite > tuple
# config = { comment | block } ;
config = (~comment | block)[:] & Eos() > dict
from pprint import pprint
pprint(config.parse(open('input.cfg').read()))
Output:
[{'block1': {'othervalue': 32423432,
'some_value': ['some', 'other', 'kind', 'of', 'data'],
'value': 'data'},
'block2': {'othervalue': 32423432,
'some_value': ['some', 'other', 'kind', 'of', 'data'],
'value': 'data'}}]
Well, the data looks pretty regular. So you could do something like this (untested):
class Block(object):
def __init__(self, name):
self.name = name
infile = open(...) # insert filename here
current = None
blocks = []
for line in infile:
if line.lstrip().startswith('#'):
continue
elif line.rstrip().endswith('{'):
current = Block(line.split()[0])
elif '=' in line:
attr, value = line.strip().split('=')
try:
value = int(value)
except ValueError:
pass
setattr(current, attr, value)
elif line.rstrip().endswith('}'):
blocks.append(current)
The result will be a list of Block instances, where block.name will be the name ('block1', 'block2', etc.) and other attributes correspond to the keys in your data. So, blocks[0].value will be 'data', etc. Note that this only handles strings and integers as values.
(there is an obvious bug here if your keys can ever include 'name'. You might like to change self.name to self._name or something if this can happen)
HTH!
If you do not really mean parsing, but rather text processing and the input data is really that regular, then go with John's solution. If you really need some parsing (like there are some a little more complex rules to the data that you are getting), then depending on the amount of data that you need to parse, I'd go either with pyparsing or simpleparse. I've tried both of them, but actually pyparsing was too slow for me.
You might look into something like pyparsing.
Grako (for grammar compiler) allows to separate the input format specification (grammar) from its interpretation (semantics). Here's grammar for your input format in Grako's variety of EBNF:
(* a file contains zero or more blocks *)
file = {block} $;
(* a named block has at least one assignment statement *)
block = name '{' {assignment}+ '}';
assignment = name '=' value NEWLINE;
name = /[a-z][a-z0-9_]*/;
value = integer | string;
NEWLINE = /\n/;
integer = /[0-9]+/;
(* string value is everything until the next newline *)
string = /[^\n]+/;
To install grako, run pip install grako. To generate the PEG parser from the grammar:
$ grako -o config_parser.py Config.ebnf
To convert stdin into json using the generated config_parser module:
#!/usr/bin/env python
import json
import string
import sys
from config_parser import ConfigParser
class Semantics(object):
def file(self, ast):
# file = {block} $
# all blocks should have unique names within the file
return dict(ast)
def block(self, ast):
# block = name '{' {assignment}+ '}'
# all assignment statements should use unique names
return ast[0], dict(ast[2])
def assignment(self, ast):
# assignment = name '=' value NEWLINE
# value = integer | string
return ast[0], ast[2] # name, value
def integer(self, ast):
return int(ast)
def string(self, ast):
return ast.strip() # remove leading/trailing whitespace
parser = ConfigParser(whitespace='\t\n\v\f\r ', eol_comments_re="#.*?$")
ast = parser.parse(sys.stdin.read(), rule_name='file', semantics=Semantics())
json.dump(ast, sys.stdout, indent=2, sort_keys=True)
Output
{
"block1": {
"othervalue": 32423432,
"some_value": "some other kind of data",
"value": "data"
},
"block2": {
"othervalue": 32423432,
"some_value": "some other kind of data",
"value": "data"
}
}