Abstract regular expressions - python

I am working on processing queries in russian language (translating them into SQL code to be executed). I tokenize the query, do morphological analysis, from lemmas I get links to database objects. So now I want to use patterns like regular expressions to get things like conditions, ORDER BY expressions and so on. But the thing is, regex can only be used with list of characters (strings).
Is there a library/solution for Python (preferably) which works like regular expressions but for any kinds of objects (list of tokens with grammatical properties or database objects), not just strings?
So, as an example, I want to write patterns which would look something this:
[db-column]((','|'and')[db-column])*
this pattern would match a list of database objects like this: columnA, columnB and columnC.

If someone is interested in the topic, there is a GitHub repository with my work, I wrote the following code for Abstract regular expressions:
"""
AbstractRegularExpressions.py
Abstract Regular Expressions.
They are like regular expressions, but can work with any kinds of objects,
not just characters as in regex.
This code is based on Mark-Jason Dominus's article «How Regexes Work»,
you can find it here: https://perl.plover.com/Regex/article.html
"""
# NFA - Nondeterministic Finite Automata (machine).
# Used for defining custom operators.
# http://code.activestate.com/recipes/384122/
class Infix:
def __init__(self, function):
self.function = function
def __ror__(self, other):
return Infix(lambda x, self=self, other=other: self.function(other, x))
def __or__(self, other):
return self.function(other)
def __rlshift__(self, other):
return Infix(lambda x, self=self, other=other: self.function(other, x))
def __rshift__(self, other):
return self.function(other)
def __call__(self, value1, value2):
return self.function(value1, value2)
# Alternatives («|» in regex): a|b|c (in regex) <==> a |OR| b |OR| c (here).
class Cases:
def __init__(self, cases):
self.cases = cases
def add(self, case):
self.cases.append(case)
return self
# Operator for joining cases, used like this: a |OR| b |OR| c.
OR = Infix(lambda x, y: x.add(y) if isinstance(x, Cases) else Cases([x, y]))
# Primitives are used to test one token for some predicate.
# In regex there's only one primitive — whether a token is equal to some character or not.
# Examples of primitives: «is token a table column?», «is token's type — string» etc.
class Primitive:
def __init__(self, name, predicate):
self.predicate = predicate
self.name = name
def test(self, *args):
return self.predicate(*args)
def __str__(self):
return self.name
# Transitions are the arrows in NFAs,
# one transition leads from one state to another if the pattern's condition is met for a token.
# If there is no pattern (pattern = None), then you can always go to the next state (epsilon-transition).
class Transition:
def __init__(self, pattern, nextState=None):
self.pattern = pattern
self.nextState = nextState
stateID = 0 # Only used for pretty-printing Patterns (machines).
# State is just a bunch of transitions leading to other states or the final state (None).
class State:
def __init__(self, transitions):
global stateID
stateID += 1
self.ID = stateID
self.transitions = transitions
def __str__(self):
return str(self.ID)
# Recursively creates an NFA
# (makes simple NFAs from primitives and then connects them with connectMachines).
def makeMachine(pattern):
# Subpatterns are treated the same as primitives.
if (isinstance(pattern, Pattern) or isinstance(pattern, Primitive)):
return State({ Transition(pattern) })
# Make a machine out of each member of a list and then connect them with connectMachines.
elif (isinstance(pattern, list)):
if (len(pattern) == 0): return None
accMachine = makeMachine(pattern[-1])
for i in range(1, len(pattern)):
accMachine = connectMachines(makeMachine(pattern[-1 - i]), accMachine)
return accMachine
# Quantifiers (+, ?, *).
elif (isinstance(pattern, tuple)):
(p, quantifier) = pattern
machine = makeMachine(p)
if (quantifier == '+' or quantifier == '*'):
endTransitions = []
for state in statesIterator(machine, set()):
for transition in state.transitions:
if transition.nextState == None:
endTransitions.append((state, transition))
for (state, transition) in endTransitions:
transition.nextState = machine
state.transitions.add(Transition(transition.pattern, None))
if (quantifier == '?' or quantifier == '*'):
if (len([m for m in machine.transitions if m.pattern == None and m.nextState == None]) == 0):
machine.transitions.add(Transition(None))
return machine
# For cases (a |OR| b |OR| c).
elif (isinstance(pattern, Cases)):
return combineMachines([makeMachine(p) for p in pattern.cases])
# Goes through each state of a machine.
def statesIterator(state, passedStates = set()):
if (state != None): yield state
else: return
passedStates.add(state)
for transition in state.transitions:
if (not transition.nextState in passedStates):
for s in statesIterator(transition.nextState, passedStates):
yield s
# Connects two machines into one (replaces finish states of machineA with start states of machineB).
def connectMachines(machineA, machineB):
endTransitions = []
for state in statesIterator(machineA, set()):
for transition in state.transitions:
if transition.nextState == None:
endTransitions.append(transition)
for transition in endTransitions:
transition.nextState = machineB
return machineA
# Connects cases of machines (machineA |OR| machineB |OR| ... |OR| machineN) into one machine.
def combineMachines(machines):
return State({ t for machine in machines for t in machine.transitions })
# Pattern, as in regular expressions.
# Example: p = Pattern('name', [a, b, c, (d, '+') |OR| e])
# which gives you a regex «abc(d+|e)»,
# where a, b, c, d and e are another patterns or primitives.
class Pattern:
def __init__(self, name, pattern):
global stateID
stateID = 0
self.name = name
self.machine = makeMachine(pattern)
def __str__(self):
return self.name
# Structure stores tokens of a found pattern.
class Structure:
def __init__(self, name, elements=None):
self.name = name
self.elements = elements if elements != None else []
def __str__(self):
return f' --- {self.name} --- {[str(el) for el in self.elements]}'
# Class for storing a token and pattern which found this token.
class PatternToken:
def __init__(self, pattern, token):
self.pattern = pattern
self.token = token
def __str__(self):
return f'{self.pattern}: {self.token.text}'
# Used for storing linked matched tokens.
class CurrentState:
def __init__(self, token, transition=None, previousState=None, patternsStack=[]):
self.transition = transition
self.token = token
self.previousState = previousState
self.patternsStack = patternsStack
def __str__(self):
t = self.token.text if self.token != None else ''
return f'== [{self.transition.pattern.name} ({len(self.patternsStack)}): {t}]\n{self.previousState}'
# Connects linked CurrentStates into one Structure.
def connect(self, name):
state = self
structuresStack = []
result = []
indexes = []
while True:
token = state.token
transition = state.transition
# Pattern
if (isinstance(transition.pattern, Pattern)):
if (len(structuresStack) == 0 or structuresStack[-1].name != transition.pattern.name):
structuresStack.append(Structure(transition.pattern.name))
else:
structure = structuresStack.pop()
structure.elements = structure.elements[::-1] # reverse the elements.
if (len(structuresStack) == 0):
result.append(structure)
else:
structuresStack[-1].elements.append(structure)
# Primitive
elif (isinstance(transition.pattern, Primitive)):
indexes.append(token.index)
if (len(structuresStack) == 0):
result.append(PatternToken(transition.pattern, token))
else:
structuresStack[-1].elements.append(PatternToken(transition.pattern, token))
state = state.previousState
if (state == None): break
return ((min(indexes), max(indexes)), Structure(name, result[::-1]))
# Used for running a pattern on a list of tokens.
class Automata:
def __init__(self, pattern):
self.pattern = pattern
self.finalStates = set()
self.currentStates = set()
def feedToken(self, token):
currentStates = set(self.currentStates)
self.currentStates = set()
for state in currentStates:
for transition in state.transition.nextState.transitions:
self.processTransition(transition, token, state)
for transition in self.pattern.machine.transitions:
self.processTransition(transition, token)
def __str__(self):
return "\n\n".join([str(state) for state in self.finalStates])
def processTransition(self, transition, token, previousState=None):
# Epsilon
if (transition.pattern == None):
if (transition.nextState != None):
for t in transition.nextState.transitions:
self.processTransition(t, token, previousState)
elif (len(previousState.patternsStack) == 0):
self.finalStates.add(previousState)
else:
newState = previousState
while True:
patternsStack = list(newState.patternsStack)
patternState = patternsStack.pop()
newState = CurrentState(None, patternState.transition, newState, patternsStack)
if (newState.transition.nextState != None):
for t in newState.transition.nextState.transitions:
self.processTransition(t, token, newState)
break
if (len(newState.patternsStack) == 0):
self.finalStates.add(newState)
break
# Primitive
elif (isinstance(transition.pattern, Primitive)):
if (transition.pattern.test(token)):
patternsStack = previousState.patternsStack if previousState != None else []
newState = CurrentState(token, transition, previousState, list(patternsStack))
if (newState.transition.nextState != None):
self.currentStates.add(newState)
elif (len(newState.patternsStack) == 0):
self.finalStates.add(newState)
else:
while True:
patternsStack = list(newState.patternsStack)
patternState = patternsStack.pop()
newState = CurrentState(None, patternState.transition, newState, patternsStack)
if (newState.transition.nextState != None):
self.currentStates.add(newState)
break
if (len(newState.patternsStack) == 0):
self.finalStates.add(newState)
break
# Pattern
elif (isinstance(transition.pattern, Pattern)):
patternsStack = previousState.patternsStack if previousState != None else []
newState = CurrentState(None, transition, previousState, list(patternsStack))
newState.patternsStack.append(newState)
for t in transition.pattern.machine.transitions:
self.processTransition(t, token, newState)
# Pretty-print a machine.
def printMachine(machine):
padding = 0
for state in statesIterator(machine, set()):
for t in state.transitions:
print(padding*' ' + f'{state}: --{t.pattern}->{t.nextState}')
padding += 1
# Pretty-print a pattern.
def printPattern(pattern):
print(f'-=-=-=-=-= {pattern.name} =-=-=-=-=-')
printMachine(pattern.machine)
Which allows you to write templates like these:
# Selecting
table = Primitive('table', lambda token: token.type == 'table')
column = Primitive('column', lambda token: token.type == 'column')
columnExpr = Pattern('columnExpr', [(operator, '*'), column])
columnLiteralExpr = Pattern('columnExpr', [(operator, '*'), column |OR| literal])
listOfTables = Pattern('listOfTables', [table])
listOfColumns = Pattern('listOfColumns', [columnExpr, ([connector, columnExpr], '*'), (table, '?')])
selectExpr = Pattern('selectExpr', [listOfColumns |OR| listOfTables])

Related

CS50's Introduction to Artificial Intelligence with Python - Knowledge

I'm studyng Harvard's Introduction to Artificial Intelligence with Python course. I'm enjoying a lot. However I downloaded logic file to use Boolean algebra and Knowledge, that simple operations (OR,AND,NOT...) Before I show my doubt I will share the Knowledge class from harvard source code, I hope there isn't issues on it:
link to this class:
Harvard class
logic.py
import itertools
class Sentence():
def evaluate(self, model):
"""Evaluates the logical sentence."""
raise Exception("nothing to evaluate")
def formula(self):
"""Returns string formula representing logical sentence."""
return ""
def symbols(self):
"""Returns a set of all symbols in the logical sentence."""
return set()
#classmethod
def validate(cls, sentence):
if not isinstance(sentence, Sentence):
raise TypeError("must be a logical sentence")
#classmethod
def parenthesize(cls, s):
"""Parenthesizes an expression if not already parenthesized."""
def balanced(s):
"""Checks if a string has balanced parentheses."""
count = 0
for c in s:
if c == "(":
count += 1
elif c == ")":
if count <= 0:
return False
count -= 1
return count == 0
if not len(s) or s.isalpha() or (
s[0] == "(" and s[-1] == ")" and balanced(s[1:-1])
):
return s
else:
return f"({s})"
class Symbol(Sentence):
def __init__(self, name):
self.name = name
def __eq__(self, other):
return isinstance(other, Symbol) and self.name == other.name
def __hash__(self):
return hash(("symbol", self.name))
def __repr__(self):
return self.name
def evaluate(self, model):
try:
return bool(model[self.name])
except KeyError:
raise Exception(f"variable {self.name} not in model")
def formula(self):
return self.name
def symbols(self):
return {self.name}
class Not(Sentence):
def __init__(self, operand):
Sentence.validate(operand)
self.operand = operand
def __eq__(self, other):
return isinstance(other, Not) and self.operand == other.operand
def __hash__(self):
return hash(("not", hash(self.operand)))
def __repr__(self):
return f"Not({self.operand})"
def evaluate(self, model):
return not self.operand.evaluate(model)
def formula(self):
return "¬" + Sentence.parenthesize(self.operand.formula())
def symbols(self):
return self.operand.symbols()
class And(Sentence):
def __init__(self, *conjuncts):
for conjunct in conjuncts:
Sentence.validate(conjunct)
self.conjuncts = list(conjuncts)
def __eq__(self, other):
return isinstance(other, And) and self.conjuncts == other.conjuncts
def __hash__(self):
return hash(
("and", tuple(hash(conjunct) for conjunct in self.conjuncts))
)
def __repr__(self):
conjunctions = ", ".join(
[str(conjunct) for conjunct in self.conjuncts]
)
return f"And({conjunctions})"
def add(self, conjunct):
Sentence.validate(conjunct)
self.conjuncts.append(conjunct)
def evaluate(self, model):
return all(conjunct.evaluate(model) for conjunct in self.conjuncts)
def formula(self):
if len(self.conjuncts) == 1:
return self.conjuncts[0].formula()
return " ∧ ".join([Sentence.parenthesize(conjunct.formula())
for conjunct in self.conjuncts])
def symbols(self):
return set.union(*[conjunct.symbols() for conjunct in self.conjuncts])
class Or(Sentence):
def __init__(self, *disjuncts):
for disjunct in disjuncts:
Sentence.validate(disjunct)
self.disjuncts = list(disjuncts)
def __eq__(self, other):
return isinstance(other, Or) and self.disjuncts == other.disjuncts
def __hash__(self):
return hash(
("or", tuple(hash(disjunct) for disjunct in self.disjuncts))
)
def __repr__(self):
disjuncts = ", ".join([str(disjunct) for disjunct in self.disjuncts])
return f"Or({disjuncts})"
def evaluate(self, model):
return any(disjunct.evaluate(model) for disjunct in self.disjuncts)
def formula(self):
if len(self.disjuncts) == 1:
return self.disjuncts[0].formula()
return " ∨ ".join([Sentence.parenthesize(disjunct.formula())
for disjunct in self.disjuncts])
def symbols(self):
return set.union(*[disjunct.symbols() for disjunct in self.disjuncts])
class Implication(Sentence):
def __init__(self, antecedent, consequent):
Sentence.validate(antecedent)
Sentence.validate(consequent)
self.antecedent = antecedent
self.consequent = consequent
def __eq__(self, other):
return (isinstance(other, Implication)
and self.antecedent == other.antecedent
and self.consequent == other.consequent)
def __hash__(self):
return hash(("implies", hash(self.antecedent), hash(self.consequent)))
def __repr__(self):
return f"Implication({self.antecedent}, {self.consequent})"
def evaluate(self, model):
return ((not self.antecedent.evaluate(model))
or self.consequent.evaluate(model))
def formula(self):
antecedent = Sentence.parenthesize(self.antecedent.formula())
consequent = Sentence.parenthesize(self.consequent.formula())
return f"{antecedent} => {consequent}"
def symbols(self):
return set.union(self.antecedent.symbols(), self.consequent.symbols())
class Biconditional(Sentence):
def __init__(self, left, right):
Sentence.validate(left)
Sentence.validate(right)
self.left = left
self.right = right
def __eq__(self, other):
return (isinstance(other, Biconditional)
and self.left == other.left
and self.right == other.right)
def __hash__(self):
return hash(("biconditional", hash(self.left), hash(self.right)))
def __repr__(self):
return f"Biconditional({self.left}, {self.right})"
def evaluate(self, model):
return ((self.left.evaluate(model)
and self.right.evaluate(model))
or (not self.left.evaluate(model)
and not self.right.evaluate(model)))
def formula(self):
left = Sentence.parenthesize(str(self.left))
right = Sentence.parenthesize(str(self.right))
return f"{left} <=> {right}"
def symbols(self):
return set.union(self.left.symbols(), self.right.symbols())
def model_check(knowledge, query):
"""Checks if knowledge base entails query."""
def check_all(knowledge, query, symbols, model):
"""Checks if knowledge base entails query, given a particular model."""
# If model has an assignment for each symbol
if not symbols:
# If knowledge base is true in model, then query must also be true
if knowledge.evaluate(model):
return query.evaluate(model)
return True
else:
# Choose one of the remaining unused symbols
remaining = symbols.copy()
p = remaining.pop()
# Create a model where the symbol is true
model_true = model.copy()
model_true[p] = True
# Create a model where the symbol is false
model_false = model.copy()
model_false[p] = False
# Ensure entailment holds in both models
return (check_all(knowledge, query, remaining, model_true) and
check_all(knowledge, query, remaining, model_false))
# Get all symbols in both knowledge and query
symbols = set.union(knowledge.symbols(), query.symbols())
# Check that knowledge entails query
return check_all(knowledge, query, symbols, dict())
I know it's too much code, but my doubt is very simple, I tested basic Knowledge Boolean algebra operations such as NOT, AND, and OR. The problem is only at OR fucntion, it always should return TRUE if at least one is true. But it's returning false.
from logic import *
a = Symbol("a")
b = Symbol("b")
# OR
# Error here
orSentence = Or(a, b)
valueOrSentence = model_check(orSentence, a)
print(orSentence.formula() + f" ({valueOrSentence})")
valueOrSentence = model_check(orSentence, Not(a))
print(orSentence.formula() + f" ({valueOrSentence})")
print('---/---/---/')
It should return "true" when check the model, but instead of it it's returning "false"
I prefer to belive there is no error on Harvard logic.py file, what should I do to fix this "OR" logic?
For one specific case of model your knowledge entails but query doesn't, hence it is returning False. There is nothing wrong with it.
When model = {'a': False, 'b': True} then orSentence.evaluate(model) would return True but a.evaluate(model) would return False making the overall result of model_check as False.
If you use andSentence = And(a, b) and then run model_check(andSentence, a), it would return True because for every value of model either andSentence (knowledge) and a (query) both are True or both are False.

How to avoid printing a variable without using the say function in python sly?

So I am using a python package sly which has a lexer and parser class. I am making my own programming language called NoobPy. So currently, the code will open test.noob and read each line and parse it. Now, if I were to define a variable, let's say x, and just write x in a line, it would print it, and I don't want that. I want it to print only if it's passed in the say function which I made.
Lexer class
class NoobpyLexer(Lexer):
tokens = {NUMBER, STRING, FALSE, TRUE, NAME, WHILE, IF, ELSE, SAY,
PLUS, MINUS, TIMES, DIVIDE, ASSIGN,
EQ, LT, LE, GT, GE, NEQ}
literals = {'(', ')', ':'}
# String containing ignored characters
ignore = ' \t'
# Regular expression rules for tokens
STRING = r'\".*?\"'
PLUS = r'\+'
MINUS = r'-'
TIMES = r'\*'
DIVIDE = r'/'
EQ = r'=='
NEQ = r'!='
ASSIGN = r'='
LE = r'<='
GE = r'>='
LT = r'<'
GT = r'>'
#_(r'\d+')
def NUMBER(self, t):
t.value = int(t.value)
return t
# #_(r'^((true$|false$)$)')
# def BOOL(self, t):
# return t
#_(r'true')
def TRUE(self, t):
return t
#_(r'false')
def FALSE(self, t):
return t
# Identifiers and keywords
NAME = r'\b(?!((true$|false$)$)\b)\w+' # [a-zA-Z_][a-zA-Z0-9_]*$
NAME['if'] = IF
NAME['else'] = ELSE
NAME['while'] = WHILE
NAME['say'] = SAY
ignore_comment = r'\#.*'
# Line number tracking
#_(r'\n+')
def ignore_newline(self, t):
self.lineno += t.value.count('\n')
def error(self, t):
print("t: ", t)
print('Line %d: Bad character %r' % (self.lineno, t.value[0]))
self.index += 1
Parser class
class NoobpyParser(Parser):
# Get the token list from the lexer (required)
tokens = NoobpyLexer.tokens
log = logging.getLogger()
log.setLevel(logging.ERROR)
# debugfile = 'parser.out'
precedence = (
('left', PLUS, MINUS),
('left', TIMES, DIVIDE),
('right', UMINUS)
)
def __init__(self):
self.variables = {}
#_('')
def statement(self, p):
pass
#_('SAY expr')
def statement(self, p):
return 'say', p.expr
#_('NAME')
def expr(self, p):
return 'var', p.NAME
#_('var_assign')
def statement(self, p):
return p.var_assign
#_('NAME ASSIGN expr')
def var_assign(self, p):
return 'var_assign', p.NAME, p.expr
#_('expr')
def statement(self, p):
return p.expr
#_('expr PLUS expr')
def expr(self, p):
return 'add', p.expr0, p.expr1
#_('expr MINUS expr')
def expr(self, p):
return 'sub', p.expr0, p.expr1
#_('expr TIMES expr')
def expr(self, p):
return 'mul', p.expr0, p.expr1
#_('expr DIVIDE expr')
def expr(self, p):
return 'div', p.expr0, p.expr1
#_('MINUS expr %prec UMINUS')
def expr(self, p):
expression = list(p.expr)
if isinstance(expression[1], tuple):
res = 0
for i in expression[1]:
res += i
expression[1] = res
expression[1] = -expression[1]
return expression
#_('expr EQ expr')
def expr(self, p):
return 'eq', p.expr0, p.expr1
#_('"(" expr ")"')
def expr(self, p):
return p.expr
#_('NUMBER')
def expr(self, p):
return 'num', p.NUMBER
#_('STRING')
def expr(self, p):
return 'str', p.STRING
#_('TRUE')
def expr(self, p):
return p.TRUE
#_('FALSE')
def expr(self, p):
return p.FALSE
Execute class
class NoobpyExecute:
def __init__(self, tree, variables):
self.variables = variables
result = self.walkTree(tree)
if result is None:
pass
elif result is not None and type(result) in [int, float]:
print(result)
elif isinstance(result, str):
print(result)
elif isinstance(result, bool):
if result is True:
print("true")
else:
print("false")
def walkTree(self, node):
if isinstance(node, int):
return node
if isinstance(node, str):
return node
if node is None:
return None
if node[0] == 'say':
return self.walkTree(node[1])
if node[0] == 'num':
return node[1]
if node[0] == 'str':
return node[1]
if node[0] == 'eq':
return self.walkTree(node[1]) == self.walkTree(node[2])
if node[0] == 'add':
return self.walkTree(node[1]) + self.walkTree(node[2])
elif node[0] == 'sub':
return self.walkTree(node[1]) - self.walkTree(node[2])
elif node[0] == 'mul':
return self.walkTree(node[1]) * self.walkTree(node[2])
elif node[0] == 'div':
return self.walkTree(node[1]) / self.walkTree(node[2])
if node[0] == 'var_assign':
self.variables[node[1]] = self.walkTree(node[2])
if node[0] == 'var':
try:
return self.variables[node[1]]
except LookupError:
print("Undefined name '{}'".format(node[1]))
return 0
This:
if __name__ == '__main__':
lexer = NoobpyLexer()
parser = NoobpyParser()
variables = {}
args = argparse.ArgumentParser()
args.add_argument(metavar='filename', dest="filename", type=str, help='name of the file you want to run')
args = args.parse_args()
with open(args.filename) as file:
for line in file.readlines():
tree = parser.parse(lexer.tokenize(line))
NoobpyExecute(tree, variables)
Example test.noob
x = 2
x
^ prints 2
In your NoobPy constructor, you print out the result of evaluating the syntax tree (unless it's None, which will happen if you evaluate an assignment):
if result is None:
pass
elif result is not None and type(result) in [int, float]:
print(result)
elif isinstance(result, str):
print(result)
elif isinstance(result, bool):
if result is True:
print("true")
else:
print("false")
Leaving aside the fact that all that could be simplified, the code seems to clearly indicate that the intention of printing the result of the evaluation. If you now don't want to print the result of the evaluation, you shouldn't print the result of the evaluation.
When you see a say function in the tree, you return the result of evaluating its argument:
if node[0] == 'say':
return self.walkTree(node[1])
If you want the say function to have the effect of printing the result of the evaluation of its argument, you should print the result of the evaluation of its argument instead of returning the result of the evaluation of its argument (or as well as returning the result, depending on what you think the semantics of say are).

Why is the logic for this gated SR latch not working?

I'm currently learning Computer Science and I thought I'd try my hand at simulating how a computer works at the level of the computer architecture. The idea is that I would start with the basic components and combine them to create more and more complex parts, like an ALU or a small amount of RAM until I can run simple programs on it. This is being built in Python.
The two most basic kinds of objects I built are:
Signals, which are nodes with a constant value and no input. Their value can also be flipped with an object method some_signal.flip().
Gates. Each gate is a node with a value that depends on the value of its children. They all inherit from a Component class. The calculation for a gate is performed when gate.get_output() is called on any Component gate, which also calls get_output on its children. I'll include the code at the end.
The Signals were no problem. It took me a while but I got each of the logic gates to work as expected. I had to add special code to make the logic gates work when connected to themselves. Now they work as expected.
I used the Gates to construct an SR latch.
class Latch(And):
def __init__(self, set, reset):
self.not_reset = Not(reset)
self.latch_or = Or(None, set)
super().__init__(None, self.not_reset)
self.receiving_from_a = self.latch_or
self.latch_or.receiving_from_a = self
self.this_type = "Latch"
It works as expected.
The problem happens when I try to attach a data line and a write-enable line (I'm sorry, I don't know what that's called).
and1 = And(data, enable)
not_data = Not(data)
and2 = And(not_data, enable)
not_and2 = Not(and2)
bit = Latch(and1, not_and2)
For some reason I am not able to write the signal from data to bit regardless of the state of enable. I'm baffled because everything worked as expected until this point. Is it a problem with my implementation of the code or did I just construct the final gate wrong?
Here is the full code if you want to run it (If you know how to make this hideable please do):
##GENERIC COMPONENT CLASS (BASE FOR OTHER OBJECTS)
class Component:
#MAGIC
def __init__(self, output, receiving_from_a = None, receiving_from_b = None, this_type = "Component"): #output is value, receiving_from is next_node
self.output = output
self.receiving_from_a = receiving_from_a
self.receiving_from_b = receiving_from_b
self.this_type = this_type
def __repr__(self):
return self.this_type + " with output = " + str(self.get_output())
#SETTERS
#generic for one input, sets a
def set_receiving_from(self, receiving_from):
self.receiving_from_a = receiving_from
#for 2 inputs
def set_receiving_from_a(self, receiving_from_a): #set component self receives from
self.receiving_from_a = receiving_from_a
def set_receiving_from_b(self, receiving_from_b):
self.receiving_from_b = receiving_from_b
#GETTERS
#generic for one input, gets a
def get_receiving_from(self):
return self.receiving_from_a
#for components with 2 inputs
def get_receiving_from_a(self):
return self.receiving_from_a
def get_receiving_from_b(self):
return self.receiving_from_b
def get_output(self):
return self.output
#gets what receiving from is showing
#generic for one input, gets a
def get_input(self):
return self.get_receiving_from_a().get_output()
#for components with 2 inputs
def get_input_a(self):
return self.get_receiving_from_a().get_output()
def get_input_b(self):
return self.get_receiving_from_b().get_output()
#FUNCTIONAL
#Creates a Component to itself
#---------------------------------------------------------------------------------------
###LOGIC GATES
#-----------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------
## TEST SIGNAL DEFINITIONS
class Signal: # test signals, nodes with outputs but no input, can be flipped
def __init__(self, output):
if type(output) == bool:
self.output = output
else:
print("Error! Signal has wrong type.")
def __repr__(self):
return "Signal with output = " + str(self.output)
def get_output(self):
return self.output
def flip(self):
if self.output == True:
self.output = False
else:
self.output = True
return self.output
#------------------------------------------------
#NOT GATE DEFINITION
class Not(Component):
def __init__(self, receiving_from):
super().__init__(not(receiving_from.get_output()), receiving_from, None, "NOT Gate")
def set_output(self):
self.output = not (self.get_input())
def get_output(self):
self.set_output()
return self.output
#-------------------------------------------------------------------------------
##OR GATE DEFINITION
class Or(Component):
def __init__(self, receiving_from_a = None, receiving_from_b = None):
if (receiving_from_a != None) and (receiving_from_b != None):
super().__init__(receiving_from_a.get_output() or receiving_from_b.get_output(), receiving_from_a, receiving_from_b, "OR Gate")
else:
super().__init__(False, receiving_from_a, receiving_from_b)
self.prev_out = False
def set_output(self):
try:
self.prev_out = self.output
if self.get_receiving_from_a() == self:
self.output = self.prev_out or self.get_input_b()
elif self.get_receiving_from_b() == self:
self.output = self.get_input_a() or self.prev_out
else:
self.output = (self.get_input_a() or self.get_input_b())
except RecursionError:
self.output = self.prev_out
def get_output(self):
self.set_output()
return self.output
def or_connected_to_self_a(signal):
target = Or(None, signal)
target.set_receiving_from_a(target)
target.get_output()
return target
def or_connected_to_self_b(signal):
target = Or(signal, None)
target.set_receiving_from_b(target)
target.get_output()
return target
def or_connected_to_self(signal, char):
if char == "a":
return Or.or_connected_to_self_a(signal)
elif char == "b":
return Or.or_connected_to_self_b(signal)
else:
print("ERROR! Did not specify which side to connect OR gate to")
#-------------------------------------------------------------------------------
##AND GATE DEFINITION
class And(Component):
def __init__(self, receiving_from_a = None, receiving_from_b = None):
if (receiving_from_a != None) and (receiving_from_b != None):
super().__init__(receiving_from_a.get_output() and receiving_from_b.get_output(), receiving_from_a, receiving_from_b, "AND Gate")
else:
super().__init__(True, receiving_from_a, receiving_from_b)
self.prev_out = True
def set_output(self):
self.prev_out = self.output
try:
if self.get_receiving_from_a() == self:
self.output = self.prev_out and self.get_input_b()
elif self.get_receiving_from_b() == self:
self.output = self.get_input_a() and self.prev_out
else:
self.output = (self.get_input_a() and self.get_input_b())
except RecursionError:
self.output = self.prev_out
def get_output(self):
self.set_output()
return self.output
def and_connected_to_self_a(signal):
target = And(None, signal)
target.set_receiving_from_a(target)
target.get_output()
return target
def and_connected_to_self_b(signal):
target = And(signal, None)
target.set_receiving_from_b(target)
target.get_output()
return target
def and_connected_to_self(signal, char):
if char == "a":
return And.and_connected_to_self_a(signal)
elif char == "b":
return And.and_connected_to_self_b(signal)
else:
print("ERROR! Did not specify which side to connect AND gate to")
#-------------------------------------------------------------------------------------
##XOR GATE
class XOr(And): #Top node is and
def __init__(self, receiving_from_a, receiving_from_b):
##(A or B) and ~(A and B)
self.A_or_B = Or(receiving_from_a, receiving_from_b)
self.A_and_B = And(receiving_from_a, receiving_from_b)
self.not_A_and_B = Not(self.A_and_B)
self.this_type = "XOR Gate"
super().__init__(self.A_or_B, self.not_A_and_B)
#-------------------------------------------------------------------------------------
##LATCH
class Latch(And):
def __init__(self, set, reset):
self.not_reset = Not(reset)
self.latch_or = Or(None, set)
super().__init__(None, self.not_reset)
self.receiving_from_a = self.latch_or
self.latch_or.receiving_from_a = self
self.this_type = "Latch"
#-------------------------------------------------------------------------------------
##FUNCTIONS
def print_truth_table(gate, A, B = None):
print("{} {} {}".format(A.get_output(), B.get_output(), gate.get_output()))
A.flip()
print("{} {} {}".format(A.get_output(), B.get_output(), gate.get_output()))
A.flip()
B.flip()
print("{} {} {}".format(A.get_output(), B.get_output(), gate.get_output()))
A.flip()
print("{} {} {}".format(A.get_output(), B.get_output(), gate.get_output()))
A.flip()
B.flip()
#-------------------------------------------------------------------------------------
#Constant signals
false = Signal(False)
true = Signal(True)
#-------------------------------------------------------------------------------------
##TEST CODE HERE
# You can use signal.flip() to flip a signal.
# Some_Gate(signal1, signal2) constructs the specified gate connected to signal1 on the left and signal2 on the right.
#
data = Signal(True)
enable = Signal(False)
and1 = And(data, enable)
not_data = Not(data)
and2 = And(not_data, enable)
not_and2 = Not(and2)
bit = Latch(and1, not_and2)
##Uncomment to test an SR latch
#set = Signal(False)
#reset = Signal(False)
#latch = Latch(set, reset)

How to find all path of a graph

Update
Thanks to the comments of some community members, I realize that there are some similar problems, but they may a bit different, please allow me to explain it further.
I actually hope to use the same method in a real problem, So briefly:
Reuse of edges in differernt path is completely allowed
a unique(or a new) path from A to B is defined as a collection of vertices that have any different vertices.
Let me use a quiz from Python data structure and algorithm analysis by Bradley .N Miller and David L. Ranum to expain my qusetion.
Quesion:
Consider the task of converting the word FOOL to SAGE, also called word ladder problem. In solving
In the word ladder problem, only one letter must be replaced at a time, and the result of each step must be a word, not non-existent.
Input:
FOUL
FOOL
FOIL
FAIL
COOL
FALL
POOL
PALL
POLL
POLE
PALE
PAGE
SALE
POPE
POPE
SAGE
We can easily find the path from FOOL to SAGE, as Bradley showed:
enter image description here
and I used Breadth First Search (BFS) to solve probem:
class Vertex:
def __init__(self, key, value = None):
self.id = key
self.connectedTo = {}
self.color = 'white'
self.dist = sys.maxsize
self.pred = []
self.disc = 0
self.fin = 0
self.value = value,
#self.GraphBulided = False
self.traverseIndex = 0
self.predNum = 0
def addNeighbor(self, nbr, weight=0):
self.connectedTo[nbr] = weight
def __str__(self):
return '{} connectedTo: {}'.format(self.id, \
str([x.id for x in self.connectedTo]))
def setColor(self, color):
self.color = color
def setDistance(self, d):
self.dist = d
#I want store all Pred for next traverse so I use a list to do it
def setPred(self, p, list = False):
if not list:
self.pred = p
else:
self.pred.append(p)
self.predNum += 1
def setDiscovery(self,dtime):
self.disc = dtime
def setFinish(self,ftime):
self.fin = ftime
#def setGraphBulided(self, tag = True):
# self.GraphBulided = tag
def getFinish(self):
return self.fin
def getDiscovery(self):
return self.disc
def getPred(self):
if isinstance(self.pred, list):
if self.traverseIndex < self.predNum:
return self.pred[self.traverseIndex]
else:
return self.pred[-1]
else:
return self.pred
def __hash__(self):
return hash(self.id)
def getPredById(self):
if self.traverseIndex < self.predNum and isinstance(self.pred, list):
pred = self.pred[self.traverseIndex]
self.traverseIndex += 1
print("vertix {}: {} of {} preds".format(self.id, self.traverseIndex, self.predNum))
return [pred, self.traverseIndex]
else:
pred = None
return [pred, None]
def getCurrPredStaus(self):
#if not self.pred:
# return None
return self.predNum - self.traverseIndex
def getDistance(self):
return self.dist
def getColor(self):
return self.color
def getConnections(self):
return self.connectedTo.keys()
def getId(self):
return self.id
def getWeight(self, nbr):
return self.connectedTo[nbr]
def getValue(self):
return self.value
def findPath(self, dest):
pass
class Graph:
def __init__(self):
self.vertList = {}
self.numVertics = 0
self.verticsInSerach = set()
self.GraphBulided = False
def addVertex(self, key, value = None):
self.numVertics = self.numVertics + 1
newVertex = Vertex(key, value=value)
self.vertList[key] = newVertex
return newVertex
def getVertex(self, n):
if n in self.vertList:
return self.vertList[n]
else:
return None
def __contains__(self, n):
return n in self.vertList
def addEdge(self, f, t, cost = 0, fvalue = None, tvalue = None):
if f not in self.vertList:
nv = self.addVertex(f, fvalue)
if t not in self.vertList:
nv = self.addVertex(t, tvalue)
self.vertList[f].addNeighbor(self.vertList[t], cost)
def setGraphBulided(self, tag = True):
self.GraphBulided = tag
def getVertices(self):
return self.vertList.keys()
def setGraphBulided(self, tag = True):
self.GraphBulided = tag
def setSerachedVertixs(self, vertix):
self.verticsInSerach.add(vertix)
def getGraphBulided(self):
return self.GraphBulided
def getSerachedVertixs(self):
return self.verticsInSerach
def __iter__(self):
return iter(self.vertList.values())
def __hash__(self):
hashIds = [x for x in self.getVertices()]
if len(hashIds) > 0 and hashIds[0]:
return hash(', '.join(hashIds))
else:
return None
Here are some additional functions for building graphs
def buildGraph(wordFile, DFSgraph = False):
d = {}
g = Graph()
if DFSgraph:
g = DFSGraph()
wfile = open(wordFile)
for line in wfile:
word = line[:-1]
for i in range(len(word)):
bucket = word[:i] + '_' + word[i+1:]
if bucket in d:
d[bucket].append(word)
else:
d[bucket] = [word]
for bucket in d.keys():
for word1 in d[bucket]:
for word2 in d[bucket]:
if word1 != word2:
g.addEdge(word1, word2)
wfile.close()
return g
class Queue:
def __init__(self):
self.items = []
def isEmpty(self):
return self.items == []
def enqueue(self, item):
self.items.insert(0,item)
def dequeue(self):
return self.items.pop()
def size(self):
return len(self.items)
def bfs(g, start, listpred = False):
start.setDistance(0)
start.setPred(None)
vertQueue = Queue()
vertQueue.enqueue(start)
while (vertQueue.size() > 0):
currentVert = vertQueue.dequeue()
if currentVert.getConnections():
g.setSerachedVertixs(currentVert)
for nbr in currentVert.getConnections():
#print('sreach {}'.format(currentVert.getId()))
if (nbr.getColor() == 'white' or nbr.getColor() == 'gray'):
nbr.setColor('gray')
nbr.setDistance(currentVert.getDistance() + 1)
if nbr.predNum > 0 and currentVert.getId() not in [x.getId() for x in nbr.pred]:
nbr.setPred(currentVert, listpred)
elif nbr.predNum == 0:
nbr.setPred(currentVert, listpred)
vertQueue.enqueue(nbr)
currentVert.setColor('black')
Therefore, we can easily find the shortest path we need (If we only store one pred for one vertix).
wordGraph = buildGraph('fourletterwords1.txt', DFSgraph=False)
bfs(wordGraph, wordGraph.getVertex('FOOL'), listpred=True)
def traverse(y):
x=y
while(x.getPred()):
print(x.getPred())
x = x.getPred()
print(x.getId())
traverse(wordGraph.getVertex('SAGE'))
However, I still don't know how to trace all the paths correctly, can you give me some suggestions?
FIND path from src to dst ( Dijkstra algorithm )
ADD path to list of paths
LOOP P over list of paths
LOOP V over vertices in P
IF V == src OR V == dst
CONTINUE to next V
COPY graph to working graph
REMOVE V from working graph
FIND path from src to dst in working graph( Dijkstra algorithm )
IF path found
IF path not in list of paths
ADD path to list of paths

Python: Find If Substring Exists in String Given Condition

I'm trying to optimize this solution for a function that accepts 2 arguments: fullstring and substring. The function will return True if the substring exists in the fullstring, and False if it does not. There is one special wildcard that could be entered in the substring that denotes 0 or 1 of the previous symbol, and there can be more than one wildcard in the substring.
For example, "a*" means "" or "a"
The solution I have works fine but I'm trying to reduce the number of for loops (3) and optimize for time complexity. Using regex is not permitted. Is there a more pythonic way to do this?
Current Solution:
def complex_search(fullstring, substring):
patterns = []
if "*" in substring:
index = substring.index("*")
patterns.append(substring[:index-1] + substring[index+1:])
patterns.append(substring[:index] + substring[index+1:])
else:
patterns.append(substring)
def check(s1, s2):
for a, b in zip(s1, s2):
if a != b:
return False
return True
for pattern in patterns:
for i in range(len(fullstring) - len(pattern) + 1):
if check(fullstring[i:i+len(pattern)], pattern):
return True
return False
>> print(complex_search("dogandcats", "dogs*andcats"))
>> True
Approach
Create all alternatives for the substring based upon '" in substring (can have zero or more '' in substring)
See Function combs(...) below
Use Aho-Corasick to check if one of the substring patterns is in the string. Aho-Corasick is a very efficient algorithm for checking if one or more substrings appear in a string and formed as the basis of the original Unix command fgrep.
For illustrative purposes a Python version of Aho-Corasik is used below, but a C implementation (with Python wrapper) is available at pyahocorasick for higher performance.
See class Aho-Corasick below.
Code
# Note: This is a modification of code explained in https://carshen.github.io/data-structures/algorithms/2014/04/07/aho-corasick-implementation-in-python.html
from collections import deque
class Aho_Corasick():
def __init__(self, keywords):
self.adj_list = []
# creates a trie of keywords, then sets fail transitions
self.create_empty_trie()
self.add_keywords(keywords)
self.set_fail_transitions()
def create_empty_trie(self):
""" initalize the root of the trie """
self.adj_list.append({'value':'', 'next_states':[],'fail_state':0,'output':[]})
def add_keywords(self, keywords):
""" add all keywords in list of keywords """
for keyword in keywords:
self.add_keyword(keyword)
def find_next_state(self, current_state, value):
for node in self.adj_list[current_state]["next_states"]:
if self.adj_list[node]["value"] == value:
return node
return None
def add_keyword(self, keyword):
""" add a keyword to the trie and mark output at the last node """
current_state = 0
j = 0
keyword = keyword.lower()
child = self.find_next_state(current_state, keyword[j])
while child != None:
current_state = child
j = j + 1
if j < len(keyword):
child = self.find_next_state(current_state, keyword[j])
else:
break
for i in range(j, len(keyword)):
node = {'value':keyword[i],'next_states':[],'fail_state':0,'output':[]}
self.adj_list.append(node)
self.adj_list[current_state]["next_states"].append(len(self.adj_list) - 1)
current_state = len(self.adj_list) - 1
self.adj_list[current_state]["output"].append(keyword)
def set_fail_transitions(self):
q = deque()
child = 0
for node in self.adj_list[0]["next_states"]:
q.append(node)
self.adj_list[node]["fail_state"] = 0
while q:
r = q.popleft()
for child in self.adj_list[r]["next_states"]:
q.append(child)
state = self.adj_list[r]["fail_state"]
while (self.find_next_state(state, self.adj_list[child]["value"]) == None
and state != 0):
state = self.adj_list[state]["fail_state"]
self.adj_list[child]["fail_state"] = self.find_next_state(state, self.adj_list[child]["value"])
if self.adj_list[child]["fail_state"] is None:
self.adj_list[child]["fail_state"] = 0
self.adj_list[child]["output"] = self.adj_list[child]["output"] + self.adj_list[self.adj_list[child]["fail_state"]]["output"]
def get_keywords_found(self, line):
""" returns keywords in trie from line """
line = line.lower()
current_state = 0
keywords_found = []
for i, c in enumerate(line):
while self.find_next_state(current_state, c) is None and current_state != 0:
current_state = self.adj_list[current_state]["fail_state"]
current_state = self.find_next_state(current_state, c)
if current_state is None:
current_state = 0
else:
for j in self.adj_list[current_state]["output"]:
yield {"index":i-len(j) + 1,"word":j}
def pattern_found(self, line):
''' Returns true when the pattern is found '''
return next(self.get_keywords_found(line), None) is not None
def combs(word, n = 0, path = ""):
''' Generate all combinations of words with star
e.g. list(combs("he*lp*")) = ['help', 'helpp', 'heelp', 'heelpp']
'''
if n == len(word):
yield path
elif word[n] == '*':
# Next letter
yield from combs(word, n+1, path) # don't add * to path
else:
if n < len(word) - 1 and word[n+1] == '*':
yield from combs(word, n+1, path) # Not including letter at n
yield from combs(word, n+1, path + word[n]) # including letter at n
Test
patterns = combs("dogs*andcats") # ['dogandcats', 'dogsandcats']
aho = Aho_Corasick(patterns) # Aho-Corasick structure to recognize patterns
print(aho.pattern_found("dogandcats")) # Output: True
print(aho.pattern_found("dogsandcats")) # Output: True

Categories

Resources