pattern to dictionary of lists Python

pattern to dictionary of lists Python - python

I have a file like this
module1 instance1(.wire1 (connectionwire1), .wire2 (connectionwire2),.... ,wire100 (connectionwire100)) ; module 2 instance 2(.wire1 (newconnectionwire1), .wire2 (newconnectionwire2),.... ,wire99 (newconnectionwire99))
Ther wires are repeated along modules. There can be many modules.
I want to build a dictionary like this (not every wire in 2nd module is a duplicate).
[wire1:[(module1, instance1, connection1), (module2, instance2,newconnection1), wire2:[(module1 instance1 connection2),(module2, instance2,newconnection1)]... wire99:module2, instance2, connection99), ]
I am splitting the string on ; then splitting on , and then ( to get wire and connectionwire strings . I am not sure how to fill the data structure though so the wire is the key and module, instancename and connection are elements.
Goal- get this datastructure- [ wire: (module, instance, connectionwire) ]
filedata=file.read()
realindex=list(find_pos(filedata,';'))
tempindex=0
for l in realindex:
module=filedata[tempindex:l]
modulename=module.split()[0]
openbracketindex=module.find("(")
closebracketindex=module.strip("\n").find(");")
instancename=module[:openbracketindex].split()[1]
tempindex=l
tempwires=module[openbracketindex:l+1]
#got to split wires on commas
for tempw in tempwires.split(","):
wires=tempw
listofwires.append(wires)

Using the re module.
import re
from collections import defaultdict
s = "module1 instance1(.wire1 (connectionwire1), .wire2 (connectionwire2), .wire100 (connectionwire100)) ; module2 instance2(.wire1 (newconnectionwire1), .wire2 (newconnectionwire2), wire99 (newconnectionwire99))'
d = defaultdict(list)
module_pattern = r'(\w+)\s(\w+)\(([^;]+)'
mod_rex = re.compile(module_pattern)
wire_pattern = r'\.(\w+)\s\(([^\)]+)'
wire_rex = re.compile(wire_pattern)
for match in mod_rex.finditer(s):
#print '\n'.join(match.groups())
module, instance, wires = match.groups()
for match in wire_rex.finditer(wires):
wire, connection = match.groups()
#print '\t', wire, connection
d[wire].append((module, instance, connection))
for k, v in d.items():
print k, ':', v
Produces
wire1 : [('module1', 'instance1', 'connectionwire1'), ('module2', 'instance2', 'newconnectionwire1')]
wire2 : [('module1', 'instance1', 'connectionwire2'), ('module2', 'instance2', 'newconnectionwire2')]
wire100 : [('module1', 'instance1', 'connectionwire100')]

Answer provided by wwii using re is correct. I'm sharing an example of how you can solve your problem using pyparsing module which makes parsing human readable and easy to do.
from pyparsing import Word, alphanums, Optional, ZeroOrMore, Literal, Group, OneOrMore
from collections import defaultdict
s = 'module1 instance1(.wire1 (connectionwire1), .wire2 (connectionwire2), .wire100 (connectionwire100)) ; module2 instance2(.wire1 (newconnectionwire1), .wire2 (newconnectionwire 2), .wire99 (newconnectionwire99))'
connection = Word(alphanums)
wire = Word(alphanums)
module = Word(alphanums)
instance = Word(alphanums)
dot = Literal(".").suppress()
comma = Literal(",").suppress()
lparen = Literal("(").suppress()
rparen = Literal(")").suppress()
semicolon = Literal(";").suppress()
wire_connection = Group(dot + wire("wire") + lparen + connection("connection") + rparen + Optional(comma))
wire_connections = Group(OneOrMore(wire_connection))
module_instance = Group(module("module") + instance("instance") + lparen + ZeroOrMore(wire_connections("wire_connections")) + rparen + Optional(semicolon))
module_instances = OneOrMore(module_instance)
results = module_instances.parseString(s)
# create a dict
d = defaultdict(list)
for r in results:
m = r['module']
i = r['instance']
for wc in r['wire_connections']:
w = wc['wire']
c = wc['connection']
d[w].append((m, i, c))
print d
Output:
defaultdict(<type 'list'>, {'wire1': [('module1', 'instance1', 'connectionwire1'), ('module2', 'instance2', 'newconnectionwire1')], 'wire2': [('module1', 'instance1', 'connectionwire2'), ('module2', 'instance2', 'newconnectionwire2')], 'wire100': [('module1', 'instance1', 'connectionwire100')], 'wire99': [('module2', 'instance2', 'newconnectionwire99')]})

Related

PyParsing: parse if not a keyword

I am trying to parse a file as follows:
testp.txt
title = Test Suite A;
timeout = 10000
exp_delay = 500;
log = TRUE;
sect
{
type = typeA;
name = "HelloWorld";
output_log = "c:\test\out.log";
};
sect
{
name = "GoodbyeAll";
type = typeB;
comm1_req = 0xDEADBEEF;
comm1_resp = (int, 1234366);
};
The file first contains a section with parameters and then some sects. I can parse a file containing just parameters and I can parse a file just containing sects but I can't parse both.
from pyparsing import *
from pathlib import Path
command_req = Word(alphanums)
command_resp = "(" + delimitedList(Word(alphanums)) + ")"
kW = Word(alphas+'_', alphanums+'_') | command_req | command_resp
keyName = ~Literal("sect") + Word(alphas+'_', alphanums+'_') + FollowedBy("=")
keyValue = dblQuotedString.setParseAction( removeQuotes ) | OneOrMore(kW,stopOn=LineEnd())
param = dictOf(keyName, Suppress("=")+keyValue+Optional(Suppress(";")))
node = Group(Literal("sect") + Literal("{") + OneOrMore(param) + Literal("};"))
final = OneOrMore(node) | OneOrMore(param)
param.setDebug()
p = Path(__file__).with_name("testp.txt")
with open(p) as f:
try:
x = final.parseFile(f, parseAll=True)
print(x)
print("...")
dx = x.asDict()
print(dx)
except ParseException as pe:
print(pe)
The issue I have is that param matches against sect so it expects a =. So I tried putting in ~Literal("sect") in keyName but that just leads to another error:
Exception raised:Found unwanted token, "sect", found '\n' (at char 188), (line:4, col:56)
Expected end of text, found 's' (at char 190), (line:6, col:1)
How do I get it use one parse method for sect and another (param) if not sect?
My final goal would be to have the whole lot in a Dict with the global params and sects included.
EDIT
Think I've figured it out:
This line...
final = OneOrMore(node) | OneOrMore(param)
...should be:
final = ZeroOrMore(param) + ZeroOrMore(node)
But I wonder if there is a more structured way (as I'd ultimately like a dict)?

Dealing with ZeroOrMore in pyparsing

I'm trying to parse pactl list with pyparsing: So far all parse is working correctly but I cannot make ZeroOrMore to work correctly.
I can find foo: or foo: bar and try to deal with that with ZeroOrMore but it doesn't work, I have to add special case "Argument:" to find results without value, but there're Argument: foo results (with value) so it will not work, and I expect any other property to exist without value.
With this definition, and a fixed pactl list output:
#!/usr/bin/env python
#
# parsing pactl list
#
from pyparsing import *
import os
from subprocess import check_output
import sys
data = '''
Module #6
Argument:
Name: module-alsa-card
Usage counter: 0
Properties:
module.author = "Lennart Poettering"
module.description = "ALSA Card"
module.version = "14.0-rebootstrapped"
'''
indentStack = [1]
stmt = Forward()
identifier = Word(alphanums+"-_.")
sect_def = Group(Group(identifier) + Suppress("#") + Group(Word(nums)))
inner_section = indentedBlock(stmt, indentStack)
section = (sect_def + inner_section)
value = Group(Group(Combine(OneOrMore(identifier|White(' ')))) + Suppress(":") + Group(Combine(ZeroOrMore(Word(alphanums+'-/=_".')|White(' ', max=1)))))
prop_name = Literal("Properties:")
prop_section = indentedBlock(stmt, indentStack)
prop_val = Group(Group(identifier) + Suppress("=") + Group(Combine(OneOrMore(Word(alphanums+'-"/.')|White(' \t')))))
prop = (prop_name + prop_section)
stmt << ( section | prop | ("Argument:") | value | prop_val )
syntax = OneOrMore(stmt)
parseTree = syntax.parseString(data)
parseTree.pprint()
This gets:
$ ./pactl.py
Module #6
Argument:
Name: module-alsa-card
Usage counter: 0
Properties:
module.author = "Lennart Poettering"
module.description = "ALSA Card"
module.version = "14.0-rebootstrapped"
[[['Module'], ['6']],
[['Argument:'],
[[['Name'], ['module-alsa-card']]],
[[['Usage counter'], ['0']]],
['Properties:',
[[[['module.author'], ['"Lennart Poettering"']]],
[[['module.description'], ['"ALSA Card"']]],
[[['module.version'], ['"14.0-rebootstrapped"']]]]]]]
So far so good, but removing special case for Argument: it gets into error, as ZeroOrMore doesn't behave as expected:
#!/usr/bin/env python
#
# parsing pactl list
#
from pyparsing import *
import os
from subprocess import check_output
import sys
data = '''
Module #6
Argument:
Name: module-alsa-card
Usage counter: 0
Properties:
module.author = "Lennart Poettering"
module.description = "ALSA Card"
module.version = "14.0-rebootstrapped"
'''
indentStack = [1]
stmt = Forward()
identifier = Word(alphanums+"-_.")
sect_def = Group(Group(identifier) + Suppress("#") + Group(Word(nums)))
inner_section = indentedBlock(stmt, indentStack)
section = (sect_def + inner_section)
value = Group(Group(Combine(OneOrMore(identifier|White(' ')))) + Suppress(":") + Group(Combine(ZeroOrMore(Word(alphanums+'-/=_".')|White(' ', max=1))))).setDebug()
prop_name = Literal("Properties:")
prop_section = indentedBlock(stmt, indentStack)
prop_val = Group(Group(identifier) + Suppress("=") + Group(Combine(OneOrMore(Word(alphanums+'-"/.')|White(' \t')))))
prop = (prop_name + prop_section)
stmt << ( section | prop | value | prop_val )
syntax = OneOrMore(stmt)
parseTree = syntax.parseString(data)
parseTree.pprint()
This results in:
$ ./pactl.py
Module #6
Argument:
Name: module-alsa-card
Usage counter: 0
Properties:
module.author = "Lennart Poettering"
module.description = "ALSA Card"
module.version = "14.0-rebootstrapped"
Match Group:({Group:(Combine:({{W:(ABCD...) | <SP>}}...)) Suppress:(":") Group:(Combine:([{W:(ABCD...) | <SP>}]...))}) at loc 19(3,9)
Matched Group:({Group:(Combine:({{W:(ABCD...) | <SP>}}...)) Suppress:(":") Group:(Combine:([{W:(ABCD...) | <SP>}]...))}) -> [[['Argument'], ['Name']]]
Match Group:({Group:(Combine:({{W:(ABCD...) | <SP>}}...)) Suppress:(":") Group:(Combine:([{W:(ABCD...) | <SP>}]...))}) at loc 1(2,1)
Exception raised:Expected ":", found '#' (at char 8), (line:2, col:8)
Traceback (most recent call last):
File "/home/alberto/projects/node/pacmd_list_json/./pactl.py", line 55, in <module>
parseTree = syntax.parseString(partial)
File "/usr/local/lib/python3.9/site-packages/pyparsing.py", line 1955, in parseString
raise exc
File "/usr/local/lib/python3.9/site-packages/pyparsing.py", line 6336, in checkUnindent
raise ParseException(s, l, "not an unindent")
pyparsing.ParseException: Expected {{Group:({Group:(W:(ABCD...)) Suppress:("#") Group:(W:(0123...))}) indented block} | {"Properties:" indented block} | Group:({Group:(Combine:({{W:(ABCD...) | <SP>}}...)) Suppress:(":") Group:(Combine:([{W:(ABCD...) | <SP>}]...))}) | Group:({Group:(W:(ABCD...)) Suppress:("=") Group:(Combine:({{W:(ABCD...) | <SP><TAB>}}...))})}, found ':' (at char 41), (line:4, col:13)
See from setDebug value grammar ZeroOrMore is getting the tokens from next line [[['Argument'], ['Name']]]
I tried LineEnd() and other tricks but none works.
Any idea on how to deal with ZeroOrMore to stop on LineEnd() or without special cases?
NOTE: Real output can be retrieved using:
env = os.environ.copy()
env['LANG'] = 'C'
data = check_output(
['pactl', 'list'], universal_newlines=True, env=env)

indentedBlock is not the easiest pyparsing element to work with. But there are a few things that you are doing that are getting in your way.
To debug this, I broke down some of your more complex expressions, use setName() to give them names, and then added .setDebug(). Like this:
identifier = Word(alphas, alphanums+"-_.").setName("identifier").setDebug()
This will tell pyparsing to output a message whenever this expression is about to be matched, if it matched successfully, or if not, the exception that was raised.
Match identifier at loc 1(2,1)
Matched identifier -> ['Module']
Match identifier at loc 15(3,5)
Matched identifier -> ['Argument']
Match identifier at loc 15(3,5)
Matched identifier -> ['Argument']
Match identifier at loc 23(3,13)
Exception raised:Expected identifier, found ':' (at char 23), (line:3, col:13)
It looks like these expressions are messing up the indentedBlock matching, by processing whitespace that should be indentation space:
Combine(OneOrMore(Word(alphanums+'-"/.')|White(' \t')))
The " character in the Word and the whitespace lead me to believe you are trying to match quoted strings. I replaced this expression with:
Combine(OneOrMore(Word(alphas, alphanums+'-/.') | quotedString))
You also need to take care not to read past the end of the line, or you'll also mess up the indentedBlock indentation tracking. I added this expression for a newline at the top:
NL = LineEnd()
and then used it as the stopOn argument to OneOrMore and ZeroOrMore:
prop_val_value = Combine(OneOrMore(Word(alphas, alphanums+'-/.') | quotedString(), stopOn=NL)).setName("prop_val_value")#.setDebug()
prop_val = Group(identifier + Suppress("=") + Group(prop_val_value)).setName("prop_val")#.setDebug()
Here is the parser I ended up with:
indentStack = [1]
stmt = Forward()
NL = LineEnd()
identifier = Word(alphas, alphanums+"-_.").setName("identifier").setDebug()
sect_def = Group(Group(identifier) + Suppress("#") + Group(Word(nums))).setName("sect_def")#.setDebug()
inner_section = indentedBlock(stmt, indentStack)
section = (sect_def + inner_section)
#~ value = Group(Group(Combine(OneOrMore(identifier|White(' ')))) + Suppress(":") + Group(Combine(ZeroOrMore(Word(alphanums+'-/=_".')|White(' ', max=1))))).setDebug()
value_label = originalTextFor(OneOrMore(identifier)).setName("value_label")#.setDebug()
value = Group(value_label
+ Suppress(":")
+ Optional(~NL + Group(Combine(ZeroOrMore(Word(alphanums+'-/=_.') | quotedString(), stopOn=NL))))).setName("value")#.setDebug()
prop_name = Literal("Properties:")
prop_section = indentedBlock(stmt, indentStack)
#~ prop_val = Group(Group(identifier) + Suppress("=") + Group(Combine(OneOrMore(Word(alphanums+'-"/.')|White(' \t')))))
prop_val_value = Combine(OneOrMore(Word(alphas, alphanums+'-/.') | quotedString(), stopOn=NL)).setName("prop_val_value")#.setDebug()
prop_val = Group(identifier + Suppress("=") + Group(prop_val_value)).setName("prop_val")#.setDebug()
prop = (prop_name + prop_section).setName("prop")#.setDebug()
stmt << ( section | prop | value | prop_val )
Which gives this:
[[['Module'], ['6']],
[[['Argument']],
[['Name', ['module-alsa-card']]],
[['Usage counter', ['0']]],
['Properties:',
[[['module.author', ['"Lennart Poettering"']]],
[['module.description', ['"ALSA Card"']]],
[['module.version', ['"14.0-rebootstrapped"']]]]]]]

Visualizing nested function calls in python

I need a way to visualize nested function calls in python, preferably in a tree-like structure. So, if I have a string that contains f(g(x,h(y))), I'd like to create a tree that makes the levels more readable. For example:
f()
|
g()
/ \
x h()
|
y
Or, of course, even better, a tree plot like the one that sklearn.tree.plot_tree creates.
This seems like a problem that someone has probably solved long ago, but it has so far resisted my attempts to find it. FYI, this is for the visualization of genetic programming output that tends to have very complex strings like this.
thanks!
update:
toytree and toyplot get pretty close, but just not quite there:
This is generated with:
import toytree, toyplot
mystyle = {"layout": 'down','node_labels':True}
s = '((x,(y)));'
toytree.tree(s).draw(**mystyle);
It's close, but the node labels aren't strings...
Update 2:
I found another potential solution that gets me closer in text form:
https://rosettacode.org/wiki/Visualize_a_tree#Python
tree2 = Node('f')([
Node('g')([
Node('x')([]),
Node('h')([
Node('y')([])
])
])
])
print('\n\n'.join([drawTree2(True)(False)(tree2)]))
This results in the following:
That's right, but I had to hand convert my string to the Node notation the drawTree2 function needs.

Here's a solution using pyparsing and asciitree. This can be adapted to parse just about anything and to generate whatever data structure is required for plotting. In this case, the code generates nested dictionaries suitable for input to asciitree.
#!/usr/bin/env python3
from collections import OrderedDict
from asciitree import LeftAligned
from pyparsing import Suppress, Word, alphas, Forward, delimitedList, ParseException, Optional
def grammar():
lpar = Suppress('(')
rpar = Suppress(')')
identifier = Word(alphas).setParseAction(lambda t: (t[0], {}))
function_name = Word(alphas)
expr = Forward()
function_arg = delimitedList(expr)
function = (function_name + lpar + Optional(function_arg) + rpar).setParseAction(lambda t: (t[0] + '()', OrderedDict(t[1:])))
expr << (function | identifier)
return function
def parse(expr):
g = grammar()
try:
parsed = g.parseString(expr, parseAll=True)
except ParseException as e:
print()
print(expr)
print(' ' * e.loc + '^')
print(e.msg)
raise
return dict([parsed[0]])
if __name__ == '__main__':
expr = 'f(g(x,h(y)))'
tree = parse(expr)
print(LeftAligned()(tree))
Output:
f()
+-- g()
+-- x
+-- h()
+-- y
Edit
With some tweaks, you can build an edge list suitable for plotting in your favorite graph library (igraph example below).
#!/usr/bin/env python3
import igraph
from pyparsing import Suppress, Word, alphas, Forward, delimitedList, ParseException, Optional
class GraphBuilder(object):
def __init__(self):
self.labels = {}
self.edges = []
def add_edges(self, source, targets):
for target in targets:
self.add_edge(source, target)
return source
def add_edge(self, source, target):
x = self.labels.setdefault(source, len(self.labels))
y = self.labels.setdefault(target, len(self.labels))
self.edges.append((x, y))
def build(self):
g = igraph.Graph()
g.add_vertices(len(self.labels))
g.vs['label'] = sorted(self.labels.keys(), key=lambda l: self.labels[l])
g.add_edges(self.edges)
return g
def grammar(gb):
lpar = Suppress('(')
rpar = Suppress(')')
identifier = Word(alphas)
function_name = Word(alphas).setParseAction(lambda t: t[0] + '()')
expr = Forward()
function_arg = delimitedList(expr)
function = (function_name + lpar + Optional(function_arg) + rpar).setParseAction(lambda t: gb.add_edges(t[0], t[1:]))
expr << (function | identifier)
return function
def parse(expr, gb):
g = grammar(gb)
g.parseString(expr, parseAll=True)
if __name__ == '__main__':
expr = 'f(g(x,h(y)))'
gb = GraphBuilder()
parse(expr, gb)
g = gb.build()
layout = g.layout('tree', root=len(gb.labels)-1)
igraph.plot(g, layout=layout, vertex_size=30, vertex_color='white')

Convert lvm.conf to python dict using pyparsing

I'm trying to convert lvm.conf to python (JSON like) object.
LVM (Logical Volume Management) configuration file looks like this:
# Configuration section config.
# How LVM configuration settings are handled.
config {
# Configuration option config/checks.
# If enabled, any LVM configuration mismatch is reported.
# This implies checking that the configuration key is understood by
# LVM and that the value of the key is the proper type. If disabled,
# any configuration mismatch is ignored and the default value is used
# without any warning (a message about the configuration key not being
# found is issued in verbose mode only).
checks = 1
# Configuration option config/abort_on_errors.
# Abort the LVM process if a configuration mismatch is found.
abort_on_errors = 0
# Configuration option config/profile_dir.
# Directory where LVM looks for configuration profiles.
profile_dir = "/etc/lvm/profile"
}
local {
}
log {
verbose=0
silent=0
syslog=1
overwrite=0
level=0
indent=1
command_names=0
prefix=" "
activation=0
debug_classes=["memory","devices","activation","allocation","lvmetad","metadata","cache","locking","lvmpolld","dbus"]
}
I'd like to get Python dict, like this:
{ "section_name"":
{"value1" : 1,
"value2" : "some_string",
"value3" : [list, of, strings]}... and so on.}
The parser function:
def parseLvmConfig2(path="/etc/lvm/lvm.conf"):
try:
EQ, LBRACE, RBRACE, LQ, RQ = map(pp.Suppress, "={}[]")
comment = pp.Suppress("#") + pp.Suppress(pp.restOfLine)
configSection = pp.Word(pp.alphas + "_") + LBRACE
sectionKey = pp.Word(pp.alphas + "_")
sectionValue = pp.Forward()
entry = pp.Group(sectionKey + EQ + sectionValue)
real = pp.Regex(r"[+-]?\d+\.\d*").setParseAction(lambda x: float(x[0]))
integer = pp.Regex(r"[+-]?\d+").setParseAction(lambda x: int(x[0]))
listval = pp.Regex(r'(?:\[)(.*)?(?:\])').setParseAction(lambda x: eval(x[0]))
pp.dblQuotedString.setParseAction(pp.removeQuotes)
struct = pp.Group(pp.ZeroOrMore(entry) + RBRACE)
sectionValue << (pp.dblQuotedString | real | integer | listval)
parser = pp.ZeroOrMore(configSection + pp.Dict(struct))
res = parser.parseFile(path)
print(res)
except (pp.ParseBaseException, ) as e:
print("lvm.conf bad format {0}".format(e))
The result is messy and the question is, how to make pyparsing do the job, without additional logic?
UPDATE(SOLVED):
For anyone who wants to understand pyparsing better, please check #PaulMcG explanation below. (Thanks for pyparsing, Paul! )
import pyparsing as pp
def parseLvmConf(conf="/etc/lvm/lvm.conf", res_type="dict"):
EQ, LBRACE, RBRACE, LQ, RQ = map(pp.Suppress, "={}[]")
comment = "#" + pp.restOfLine
integer = pp.nums
real = pp.Word(pp.nums + "." + pp.nums)
pp.dblQuotedString.setParseAction(pp.removeQuotes)
scalar_value = real | integer | pp.dblQuotedString
list_value = pp.Group(LQ + pp.delimitedList(scalar_value) + RQ)
key = pp.Word(pp.alphas + "_", pp.alphanums + '_')
key_value = pp.Group(key + EQ + (scalar_value | list_value))
struct = pp.Forward()
entry = key_value | pp.Group(key + struct)
struct <<= pp.Dict(LBRACE + pp.ZeroOrMore(entry) + RBRACE)
parser = pp.Dict(pp.ZeroOrMore(entry))
parser.ignore(comment)
try:
#return lvm.conf as dict
if res_type == "dict":
return parser.parseFile(conf).asDict()
# return lvm.conf as list
elif res_type == "list":
return parser.parseFile(conf).asList()
else:
#return lvm.conf as ParseResults
return parser.parseFile(conf)
except (pp.ParseBaseException,) as e:
print("lvm.conf bad format {0}".format(e))

Step 1 should always be to at least rough out a BNF for the format you are going to parse. This really helps organize your thoughts, and gets you thinking about the structure and data you are parsing, before starting to write actual code.
Here is a BNF that I came up with for this config (it looks like a Python string because that makes it easy to paste into your code for future reference - but pyparsing does not work with or require such strings, they are purely a design tool):
BNF = '''
key_struct ::= key struct
struct ::= '{' (key_value | key_struct)... '}'
key_value ::= key '=' (scalar_value | list_value)
key ::= word composed of alphas and '_'
list_value ::= '[' scalar_value [',' scalar_value]... ']'
scalar_value ::= real | integer | double-quoted-string
comment ::= '#' rest-of-line
'''
Notice that the opening and closing {}'s and []'s are at the same level, rather than having an opener in one expression and a closer in another.
This BNF also will allow for structs nested within structs, which is not strictly required in the sample text you posted, but since your code looked to be supporting that, I included it.
Translating to pyparsing is pretty straightforward from here, working bottom-up through the BNF:
EQ, LBRACE, RBRACE, LQ, RQ = map(pp.Suppress, "={}[]")
comment = "#" + pp.restOfLine
integer = ppc.integer #pp.Regex(r"[+-]?\d+").setParseAction(lambda x: int(x[0]))
real = ppc.real #pp.Regex(r"[+-]?\d+\.\d*").setParseAction(lambda x: float(x[0]))
pp.dblQuotedString.setParseAction(pp.removeQuotes)
scalar_value = real | integer | pp.dblQuotedString
# `delimitedList(expr)` is a shortcut for `expr + ZeroOrMore(',' + expr)`
list_value = pp.Group(LQ + pp.delimitedList(scalar_value) + RQ)
key = pp.Word(pp.alphas + "_", pp.alphanums + '_')
key_value = pp.Group(key + EQ + (scalar_value | list_value))
struct = pp.Forward()
entry = key_value | pp.Group(key + struct)
struct <<= (LBRACE + pp.ZeroOrMore(entry) + RBRACE)
parser = pp.ZeroOrMore(entry)
parser.ignore(comment)
Running this code:
try:
res = parser.parseString(lvm_source)
# print(res.dump())
res.pprint()
return res
except (pp.ParseBaseException, ) as e:
print("lvm.conf bad format {0}".format(e))
Gives this nested list:
[['config',
['checks', 1],
['abort_on_errors', 0],
['profile_dir', '/etc/lvm/profile']],
['local'],
['log',
['verbose', 0],
['silent', 0],
['syslog', 1],
['overwrite', 0],
['level', 0],
['indent', 1],
['command_names', 0],
['prefix', ' '],
['activation', 0],
['debug_classes',
['memory',
'devices',
'activation',
'allocation',
'lvmetad',
'metadata',
'cache',
'locking',
'lvmpolld',
'dbus']]]]
I think the format you would prefer is one where you can access the values as keys in a nested dict or in a hierarchical object. Pyparsing has a class called Dict that will do this at parse time, so that the results names are automatically assigned for nested subgroups. Change these two lines to have their sub-entries automatically dict-ified:
struct <<= pp.Dict(LBRACE + pp.ZeroOrMore(entry) + RBRACE)
parser = pp.Dict(pp.ZeroOrMore(entry))
Now if we call dump() instead of pprint(), we'll see the hierarchical naming:
[['config', ['checks', 1], ['abort_on_errors', 0], ['profile_dir', '/etc/lvm/profile']], ['local'], ['log', ['verbose', 0], ['silent', 0], ['syslog', 1], ['overwrite', 0], ['level', 0], ['indent', 1], ['command_names', 0], ['prefix', ' '], ['activation', 0], ['debug_classes', ['memory', 'devices', 'activation', 'allocation', 'lvmetad', 'metadata', 'cache', 'locking', 'lvmpolld', 'dbus']]]]
- config: [['checks', 1], ['abort_on_errors', 0], ['profile_dir', '/etc/lvm/profile']]
- abort_on_errors: 0
- checks: 1
- profile_dir: '/etc/lvm/profile'
- local: ''
- log: [['verbose', 0], ['silent', 0], ['syslog', 1], ['overwrite', 0], ['level', 0], ['indent', 1], ['command_names', 0], ['prefix', ' '], ['activation', 0], ['debug_classes', ['memory', 'devices', 'activation', 'allocation', 'lvmetad', 'metadata', 'cache', 'locking', 'lvmpolld', 'dbus']]]
- activation: 0
- command_names: 0
- debug_classes: ['memory', 'devices', 'activation', 'allocation', 'lvmetad', 'metadata', 'cache', 'locking', 'lvmpolld', 'dbus']
- indent: 1
- level: 0
- overwrite: 0
- prefix: ' '
- silent: 0
- syslog: 1
- verbose: 0
You can then access the fields as res['config']['checks'] or res.log.indent.

How to replace text in curly brackets with another text based on comparisons using Python Regex

I am quiet new to regular expressions. I have a string that looks like this:
str = "abc/def/([default], [testing])"
and a dictionary
dict = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
and using Python RE, I want str in this form, after comparisons of each element in dict to str:
str = "abc/def/(2.7, 2.1)"
Any help how to do it using Python RE?
P.S. its not the part of any assignment, instead it is the part of my project at work and I have spent many hours to figure out solution but in vain.

import re
st = "abc/def/([default], [testing], [something])"
dic = {'abc/def/[default]' : '2.7',
'abc/def/[testing]' : '2.1',
'bcd/xed/[something]' : '3.1'}
prefix_regex = "^[\w*/]*"
tag_regex = "\[\w*\]"
prefix = re.findall(prefix_regex, st)[0]
tags = re.findall(tag_regex, st)
for key in dic:
key_prefix = re.findall(prefix_regex, key)[0]
key_tag = re.findall(tag_regex, key)[0]
if prefix == key_prefix:
for tag in tags:
if tag == key_tag:
st = st.replace(tag, dic[key])
print st
OUTPUT:
abc/def/(2.7, 2.1, [something])

Here is a solution using re module.
Hypotheses :
there is a dictionary whose keys are composed of a prefix and a variable part, the variable part is enclosed in brackets ([])
the values are strings by which the variable parts are to be replaced in the string
the string is composed by a prefix, a (, a list of variable parts and a )
the variable parts in the string are enclosed in []
the variable parts in the string are separated by a comma followed by optional spaces
Python code :
import re
class splitter:
pref = re.compile("[^(]+")
iden = re.compile("\[[^]]*\]")
def __init__(self, d):
self.d = d
def split(self, s):
m = self.pref.match(s)
if m is not None:
p = m.group(0)
elts = self.iden.findall(s, m.span()[1])
return p, elts
return None
def convert(self, s):
p, elts = self.split(s)
return p + "(" + ", ".join((self.d[p + elt] for elt in elts)) + ")"
Usage :
s = "abc/def/([default], [testing])"
d = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
sp = splitter(d)
print(sp.convert(s))
output :
abc/def/(2.7, 2.1)

Regex is probably not required here. Hope this helps
lhs,rhs = str.split("/(")
rhs1,rhs2 = rhs.strip(")").split(", ")
lhs+="/"
print "{0}({1},{2})".format(lhs,dict[lhs+rhs1],dict[lhs+rhs2])
output
abc/def/(2.7,2.1)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

pattern to dictionary of lists Python - python

Related

PyParsing: parse if not a keyword

Dealing with ZeroOrMore in pyparsing

Visualizing nested function calls in python

Convert lvm.conf to python dict using pyparsing

How to replace text in curly brackets with another text based on comparisons using Python Regex

Categories

Resources