Cannot enumerate all arithmetic expressions by recursion - python

I want to generate all possible expressions involving +, -, *, and / from a given ordered set of numbers:
class Node:
def __init__(self, data):
self.left = None
self.right = None
self.data = data
def is_leaf(self):
if self.left is None:
assert self.right is None
return True
return False
def __repr__(self):
if self.is_leaf():
return repr(self.data)
return '(%s%s%s)' % (self.left, self.data, self.right)
def enumerate_trees(numbers):
n = len(numbers)
if n == 1:
yield Node(numbers[0])
else:
for i in range(1, n):
left_subtrees = enumerate_trees(numbers[:i])
right_subtrees = enumerate_trees(numbers[i:])
for left in left_subtrees:
for right in right_subtrees:
for op in ['+', '-', '*', '/']:
root = Node(op)
root.left = left
root.right = right
yield root
if __name__ == '__main__':
for tree in enumerate_trees([5, 7, 10, 1]):
print(repr(tree)[1:-1])
The output is:
5+(7+(10+1))
5-(7+(10+1))
5*(7+(10+1))
5/(7+(10+1))
5+(7-(10+1))
5-(7-(10+1))
5*(7-(10+1))
5/(7-(10+1))
5+(7*(10+1))
5-(7*(10+1))
5*(7*(10+1))
5/(7*(10+1))
5+(7/(10+1))
5-(7/(10+1))
5*(7/(10+1))
5/(7/(10+1))
5+(7+(10-1))
5-(7+(10-1))
5*(7+(10-1))
5/(7+(10-1))
5+(7-(10-1))
5-(7-(10-1))
5*(7-(10-1))
5/(7-(10-1))
5+(7*(10-1))
5-(7*(10-1))
5*(7*(10-1))
5/(7*(10-1))
5+(7/(10-1))
5-(7/(10-1))
5*(7/(10-1))
5/(7/(10-1))
5+(7+(10*1))
5-(7+(10*1))
5*(7+(10*1))
5/(7+(10*1))
5+(7-(10*1))
5-(7-(10*1))
5*(7-(10*1))
5/(7-(10*1))
5+(7*(10*1))
5-(7*(10*1))
5*(7*(10*1))
5/(7*(10*1))
5+(7/(10*1))
5-(7/(10*1))
5*(7/(10*1))
5/(7/(10*1))
5+(7+(10/1))
5-(7+(10/1))
5*(7+(10/1))
5/(7+(10/1))
5+(7-(10/1))
5-(7-(10/1))
5*(7-(10/1))
5/(7-(10/1))
5+(7*(10/1))
5-(7*(10/1))
5*(7*(10/1))
5/(7*(10/1))
5+(7/(10/1))
5-(7/(10/1))
5*(7/(10/1))
5/(7/(10/1))
5+((7+10)+1)
5-((7+10)+1)
5*((7+10)+1)
5/((7+10)+1)
5+((7+10)-1)
5-((7+10)-1)
5*((7+10)-1)
5/((7+10)-1)
5+((7+10)*1)
5-((7+10)*1)
5*((7+10)*1)
5/((7+10)*1)
5+((7+10)/1)
5-((7+10)/1)
5*((7+10)/1)
5/((7+10)/1)
(5+7)+(10+1)
(5+7)-(10+1)
(5+7)*(10+1)
(5+7)/(10+1)
(5+7)+(10-1)
(5+7)-(10-1)
(5+7)*(10-1)
(5+7)/(10-1)
(5+7)+(10*1)
(5+7)-(10*1)
(5+7)*(10*1)
(5+7)/(10*1)
(5+7)+(10/1)
(5+7)-(10/1)
(5+7)*(10/1)
(5+7)/(10/1)
(5+(7+10))+1
(5+(7+10))-1
(5+(7+10))*1
(5+(7+10))/1
There are at least two problems I can see from the output:
Some trees are not reached, for example (((5 7) 10) 1).
For a certain tree, it is possible that not all the expressions are covered. For example for the tree ((5 (7 10)) 1), only
(5+(7+10))+1
(5+(7+10))-1
(5+(7+10))*1
(5+(7+10))/1
are reached.
What is the reason? Thanks.

Your recursive calls look like this:
left_subtrees = enumerate_trees(numbers[:i])
right_subtrees = enumerate_trees(numbers[i:])
for left in left_subtrees:
for right in right_subtrees:
#...
enumerate_trees returns a generator object, which can only be iterated over once. So the for loop over right_subtrees will only work the first time, and give no results on the next iterations of the outer for loop.
To fix this, you can either put the recursive calls directly in the for statements so that they are executed each time, or you can use list(enumerate_trees(...)) to copy the results into a list.

Related

Find matrix row elements in a binary tree

I am trying to write a function which given a binary search tree T of integers and a rectangular matrix M of integers, verify if there exists a row of M whose values belong to T.
This is my code:
M = [
[1, 2, 3],
[4, 5, 6]
]
class Tree:
def __init__(self, root=None, left=None, right=None):
self.root = root
self.left = left
self.rigth = right
def FindRowInTree(T, M):
if T is None:
return False
else:
for r in M:
for e in r:
if e == T.root and FindRowInTree(T.left, M) is True and FindRowInTree(T.right, M) is True:
return True
FindRowInTree(T.left, M) and FindRowInTree(T.right,M)
return False
t = Tree(4, Tree(5, None, None), Tree(6, None, None))
x = FindRowInTree(t, M)
print(x)
It always returns False.
What would I need to change to make it work properly?
Break the problem into pieces. First write a function to find a single value in the tree:
class Tree:
def __init__(self, root=None, left=None, right=None):
self.root = root
self.left = left
self.right = right
def __contains__(self, value):
return (
self.root == value
or self.left is not None and value in self.left
or self.right is not None and value in self.right
)
Note that with an ordered binary tree, you could make this function more efficient by having it only check left or right depending on how the value you're looking for compares to the root value; that's what a "binary search" is. Since your tree is unordered, though, you just need to search both children at each node, meaning you're traversing the entire tree.
In any case, once you have a function that looks up a single value, all you need to do is call it in a loop:
def find_row_in_tree(tree, matrix):
return any(
all(i in tree for i in row)
for row in matrix
)
If you're trying to do this in a more efficient way, an unordered binary tree is not doing you any favors; I'd just write a utility function to convert it to something more useful, like a set:
def tree_to_set(tree):
if tree is None:
return set()
return {tree.root} | tree_to_set(tree.left) | tree_to_set(tree.right)
def find_row_in_tree(tree, matrix):
tree_as_set = tree_to_set(tree)
return any(tree_as_set.issuperset(row) for row in matrix)

Printing a binary tree in the specific format

def str_tree(atree,indent_char ='.',indent_delta=2):
def str_tree_1(indent,atree):
if atree == None:
return ''
else:
answer = ''
answer += str_tree_1(indent+indent_delta,atree.right)
answer += indent*indent_char+str(atree.value)+'\n'
answer += str_tree_1(indent+indent_delta,atree.left)
return answer
return str_tree_1(0,atree)
def build_balanced_bst(l):
d = []
if len(l) == 0:
return None
else:
mid = (len(l)-1)//2
if mid >= 1:
d.append(build_balanced_bst(l[:mid]))
d.append(build_balanced_bst(l[mid:]))
else:
return d
The build_balanced_bst(l) takes in a list of unique values that are sorted in increasing order. It returns a reference to the root of a well-balanced binary search tree. For example, calling build_ballanced_bst( list(irange(1,10)) returns a binary search tree of height 3 that would print as:
......10
....9
..8
......7
....6
5
......4
....3
..2
....1
The str_tree function prints what the build_balanced_bst function returns
I am working on the build_balanced_bst(l) function to make it apply to the str_tree function. I used the middle value in the list as the root’s value.
But when I call the function as the way below:
l = list(irange(1,10))
t = build_balanced_bst(l)
print('Tree is\n',str_tree(t),sep='')
it doesn't print anything. Can someone help me to fix my build_balanced_bst(l) function?
Keeping the str_tree method as it is, here's the remaining code.
class Node:
"""Represents a single node in the tree"""
def __init__(self, value, left=None, right=None):
self.value = value
self.left = left
self.right = right
def build_balanced_bst(lt):
"""
Find the middle element in the sorted list
and make it root.
Do same for left half and right half recursively.
"""
if len(lt) == 1:
return Node(lt[0])
if len(lt) == 0:
return None
mid = (len(lt)-1)//2
left = build_balanced_bst(lt[:mid])
right = build_balanced_bst(lt[mid+1:])
root = Node(lt[mid], left, right)
return root
ordered_list = list(range(1,11))
bst=build_balanced_bst(ordered_list)
bst_repr = str_tree(bst)
print(bst_repr)
The output comes out as follows:
......10
....9
..8
......7
....6
5
......4
....3
..2
....1

Python: Create a Binary search Tree using a list

The objective of my code is to get each seperate word from a txt file and put it into a list and then making a binary search tree using that list to count the frequency of each word and printing each word in alphabetical order along with its frequency. Each word in the can only contain letters, numbers, -, or ' The part that I am unable to do with my beginner programming knowledge is to make the Binary Search Tree using the list I have (I am only able to insert the whole list in one Node instead of putting each word in a Node to make the tree). The code I have so far is this:
def read_words(filename):
openfile = open(filename, "r")
templist = []
letterslist = []
for lines in openfile:
for i in lines:
ii = i.lower()
letterslist.append(ii)
for p in letterslist:
if p not in ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',"'","-",' '] and p.isdigit() == False:
letterslist.remove(p)
wordslist = list("".join(letterslist).split())
return wordslist
class BinaryTree:
class _Node:
def __init__(self, value, left=None, right=None):
self._left = left
self._right = right
self._value = value
self._count = 1
def __init__(self):
self.root = None
def isEmpty(self):
return self.root == None
def insert(self, value) :
if self.isEmpty() :
self.root = self._Node(value)
return
parent = None
pointer = self.root
while (pointer != None) :
if value == pointer._value:
pointer._count += 1
return
elif value < pointer._value:
parent = pointer
pointer = pointer._left
else :
parent = pointer
pointer = pointer._right
if (value <= parent._value) :
parent._left = self._Node(value)
else :
parent._right = self._Node(value)
def printTree(self):
pointer = self.root
if pointer._left is not None:
pointer._left.printTree()
print(str(pointer._value) + " " + str(pointer._count))
if pointer._right is not None:
pointer._right.printTree()
def createTree(self,words):
if len(words) > 0:
for word in words:
BinaryTree().insert(word)
return BinaryTree()
else:
return None
def search(self,tree, word):
node = tree
depth = 0
count = 0
while True:
print(node.value)
depth += 1
if node.value == word:
count = node.count
break
elif word < node.value:
node = node.left
elif word > node.value:
node = node.right
return depth, count
def main():
words = read_words('sample.txt')
b = BinaryTree()
b.insert(words)
b.createTree(words)
b.printTree()
Since you're a beginner I'd advice to implement the tree methods with recursion instead of iteration since this will result to simpler implementation. While recursion might seem a bit difficult concept at first often it is the easiest approach.
Here's a draft implementation of a binary tree which uses recursion for insertion, searching and printing the tree, it should support the functionality you need.
class Node(object):
def __init__(self, value):
self.value = value
self.left = None
self.right = None
self.count = 1
def __str__(self):
return 'value: {0}, count: {1}'.format(self.value, self.count)
def insert(root, value):
if not root:
return Node(value)
elif root.value == value:
root.count += 1
elif value < root.value:
root.left = insert(root.left, value)
else:
root.right = insert(root.right, value)
return root
def create(seq):
root = None
for word in seq:
root = insert(root, word)
return root
def search(root, word, depth=1):
if not root:
return 0, 0
elif root.value == word:
return depth, root.count
elif word < root.value:
return search(root.left, word, depth + 1)
else:
return search(root.right, word, depth + 1)
def print_tree(root):
if root:
print_tree(root.left)
print root
print_tree(root.right)
src = ['foo', 'bar', 'foobar', 'bar', 'barfoo']
tree = create(src)
print_tree(tree)
for word in src:
print 'search {0}, result: {1}'.format(word, search(tree, word))
# Output
# value: bar, count: 2
# value: barfoo, count: 1
# value: foo, count: 1
# value: foobar, count: 1
# search foo, result: (1, 1)
# search bar, result: (2, 2)
# search foobar, result: (2, 1)
# search bar, result: (2, 2)
# search barfoo, result: (3, 1)
To answer your direct question, the reason why you are placing all of the words into a single node is because of the following statement inside of main():
b.insert(words)
The insert function creates a Node and sets the value of the node to the item you pass in. Instead, you need to create a node for each item in the list which is what your createTree() function does. The preceeding b.insert is not necessary.
Removing that line makes your tree become correctly formed, but reveals a fundamental problem with the design of your data structure, namely the printTree() method. This method seems designed to traverse the tree and recursively call itself on any child. In your initial version this function worked, because there the tree was mal-formed with only a single node of the whole list (and the print function simply printed that value since right and left were empty).
However with a correctly formed tree the printTree() function now tries to invoke itself on the left and right descendants. The descendants however are of type _Node, not of type BinaryTree, and there is no methodprintTree() declared for _Node objects.
You can salvage your code and solve this new error in one of two ways. First you can implement your BinaryTree.printTree() function as _Node.printTree(). You can't do a straight copy and paste, but the logic of the function won't have to change much. Or you could leave the method where it is at, but wrap each _left or _right node inside of a new BinaryTree so that they would have the necessary printTree() method. Doing this would leave the method where it is at, but you will still have to implement some kind of helper traversal method inside of _Node.
Finally, you could change all of your _Node objects to be _BinaryTree objects instead.
The semantic difference between a node and a tree is one of scope. A node should only be aware of itself, its direct children (left and right), and possibly its parent. A tree on the other hand can be aware of any of its descendents, no matter how far removed. This is accomplished by treating any child node as its own tree. Even a leaf, without any children at all can be thought of as a tree with a depth of 0. This behavior is what lets a tree work recursively. Your code is mixing the two together.

In order BST traversal: find

I am trying to find the kth smallest element of binary search tree and I have problems using recursion. I understand how to print the tree inorder/postorder etc. but I fail to return the rank of the element. Can someone point where I am making a mistake? In general, I am having hard time understanding recursion in trees.
Edit: this is an exercise, so I am not looking for using built-in functions. I have another solution where I keep track of number of left and right children as I insert nodes and that code is working fine. I am wondering if it is possible to do this using inorder traversal because it seems to be a simpler solution.
class BinaryTreeNode:
def __init__(self, data, left=None, right=None):
self.data = data
self.left = left
self.right = right
def traverseInOrder(root,order):
if root == None:
return
traverseInOrder(root.left,order+1)
print root.data,
print order
traverseInOrder(root.right,order)
"""
a
/ \
b c
/ \ / \
d e f g
/ \
h i
"""
h = BinaryTreeNode("h")
i = BinaryTreeNode("i")
d = BinaryTreeNode("d", h, i)
e = BinaryTreeNode("e")
f = BinaryTreeNode("f")
g = BinaryTreeNode("g")
b = BinaryTreeNode("b", d, e)
c = BinaryTreeNode("c", f, g)
a = BinaryTreeNode("a", b, c)
print traverseInOrder(a,0)
If this is an academic exercise, make traverseInOrder (or similar method tailored to the purpose) return the number of children it visited. From there things get simpler.
If this isn't academic, have a look at http://stromberg.dnsalias.org/~dstromberg/datastructures/ - the dictionary-like objects are all trees, and support iterators - so finding the nth is a matter of zip(tree, range(n)).
You could find the smallets element in the binary search tree first. Then from that element call a method to give you the next element k times.
For find_smallest_node method, note that you can traverse all the nodes "in-order" until reach to smallest. But that approach takes O(n) time.
However, you do not need a recursion to find the smallest node, because in BST smallest node is simply the left most node, so you can traverse the nodes until finding a node that has no left child and it takes O(log n) time:
class BST(object):
def find_smallest_node(self):
if self.root == None:
return
walking_node = self.root
smallest_node = self.root
while walking_node != None:
if walking_node.data <= smallest_node.data:
smallest_node = walking_node
if walking_node.left != None:
walking_node = walking_node.left
elif walking_node.left == None:
walking_node = None
return smallest_node
def find_k_smallest(self, k):
k_smallest_node = self.find_smallest_node()
if k_smallest_node == None:
return
else:
k_smallest_data = k_smallest_node.data
count = 1
while count < k:
k_smallest_data = self.get_next(k_smallest_data)
count += 1
return k_smallest_data
def get_next (self, key):
...
It just requires to keep the parent of the nodes when inserting them to the tree.
class Node(object):
def __init__(self, data, left=None, right=None, parent=None):
self.data = data
self.right = right
self.left = left
self.parent = parent
An implementation of the bst class with the above methods and also def get_next (self, key) function is here. The upper folder contains the test cases for it and it worked.

Python: Optimizing, or at least getting fresh ideas for a tree generator

I have written a program that generates random expressions and then uses genetic techniques to select for fitness.
The following part of the program generates the random expression and stores it in a tree structure.
As this can get called billions of times during a run, I thought it should be optimized for time.
I'm new to programming and I work (play) by myself so, as much as I search on the inernet for
ideas, I'd like some input as I feel like I'm doing this in isolation.
The bottlenecks seem to be Node.init (), (22% of total time) and random.choice(), (14% of total time)
import random
def printTreeIndented(data, level=0):
'''utility to view the tree
'''
if data == None:
return
printTreeIndented(data.right, level+1)
print ' '*level + ' '+ str(data.cargo)#+ ' '+ str(data.seq)+ ' '+ str(data.branch)
printTreeIndented(data.left, level+1)
#These are the global constants used in the Tree.build_nodes() method.
Depth = 5
Ratio = .6 #probability of terminating the current branch.
Atoms = ['1.0','2.0','3.0','4.0','5.0','6.0','7.0','8.0','9.0','x','x','x','x']
#dict of operators. the structure is: operator: number of arguements
Operators = {'+': 2, '-': 2, '*': 2, '/': 2, '**': 2}
class KeySeq:
'''Iterator to produce sequential
integers for keys in Tree.thedict
'''
def __init__(self, data = 0):
self.data = data
def __iter__(self):
return self
def next(self):
self.data = self.data + 1
return self.data
KS = KeySeq()
class Node(object):
'''
'''
def __init__(self, cargo, left=None, right=None):
object.__init__(self)
self.isRoot = False
self.cargo = cargo
self.left = left
self.right = right
self.parent = None
self.branch = None
self.seq = 0
class Tree(object):
def __init__(self):
self.thedict = {} #provides access to the nodes for further mutation and
# crossbreeding.
#When the Tree is instantiated, it comes filled with data.
self.data = self.build_nodes()
# Uncomment the following lines to see the data and a crude graphic of the tree.
# print 'data: '
# for v in self.thedict.itervalues():
# print v.cargo,
# print
# print
# printTreeIndented(self.data)
def build_nodes (self, depth = Depth, entry = 1, pparent = None,
bbranch = None):
'''
'''
r = float()
r = random.random()
#If r > Ratio, it forces a terminal node regardless of
#the value of depth.
#If entry = 1, then it's the root node and we don't want
# a tree with just a value in the root node.
if (depth <= 0) or ((r > Ratio) and (not (entry))):
'''
Add a terminal node.
'''
this_atom = (random.choice(Atoms))
this_atom = str(this_atom)
this_node = Node(this_atom)
this_node.parent = pparent
this_node.branch = bbranch
this_node.seq = KS.next()
self.thedict[this_node.seq] = this_node
return this_node
else:
'''
Add a node that has branches.
'''
this_operator = (random.choice(Operators.keys()))
this_node = Node(this_operator)
if entry:
this_node.isRoot = True
this_node.parent = pparent
this_node.branch = bbranch
this_node.seq = KS.next()
self.thedict[this_node.seq] = this_node
#branch as many times as 'number of arguements'
# it's only set up for 2 arguements now.
for i in range(Operators[this_operator]):
depth =(depth - 1)
if i == 0:
this_node.left = (self.build_nodes(entry = 0, depth =(depth),
pparent = this_node, bbranch = 'left'))
else:
this_node.right = (self.build_nodes(entry = 0, depth =(depth),
pparent = this_node, bbranch = 'right'))
return this_node
def Main():
for i in range(100000):
t = Tree()
return t
if __name__ == '__main__':
rresult = Main()
Below, I've summarized some of the more obvious optimization efforts, without really touching the algorithm much. All timings are done with Python 2.6.4 on a Linux x86-64 system.
Initial time: 8.3s
Low-Hanging Fruits
jellybean already pointed some out. Just fixing those already improves the runtime a little bit. Replacing the repeated calls to Operators.keys() by using the same list again and again also saves some time.
Time: 6.6s
Using itertools.count
Pointed out by Dave Kirby, simply using itertools.count also saves you some time:
from itertools import count
KS = count()
Time: 6.2s
Improving the Constructor
Since you're not setting all attributes of Node in the ctor, you can just move the attribute declarations into the class body:
class Node(object):
isRoot = False
left = None
right = None
parent = None
branch = None
seq = 0
def __init__(self, cargo):
self.cargo = cargo
This does not change the semantics of the class as far as you're concerned, since all values used in the class body are immutable (False, None, 0), if you need other values, read this answer on class attributes first.
Time: 5.2s
Using namedtuple
In your code, you're not changing the expression tree any more, so you might as well use an object that is immutable. Node also does not have any behavior, so using a namedtuple is a good option. This does have an implication though, since the parent member had to be dropped for now. Judging from the fact that you might introduce operators with more than two arguments, you would have to replace left/right with a list of children anyway, which is mutable again and would allow creating the parent node before all the children.
from collections import namedtuple
Node = namedtuple("Node", ["cargo", "left", "right", "branch", "seq", "isRoot"])
# ...
def build_nodes (self, depth = Depth, entry = 1, pparent = None,
bbranch = None):
r = random.random()
if (depth <= 0) or ((r > Ratio) and (not (entry))):
this_node = Node(
random.choice(Atoms), None, None, bbranch, KS.next(), False)
self.thedict[this_node.seq] = this_node
return this_node
else:
this_operator = random.choice(OpKeys)
this_node = Node(
this_operator,
self.build_nodes(entry = 0, depth = depth - 1,
pparent = None, bbranch = 'left'),
self.build_nodes(entry = 0, depth = depth - 2,
pparent = None, bbranch = 'right'),
bbranch,
KS.next(),
bool(entry))
self.thedict[this_node.seq] = this_node
return this_node
I've kept the original behavior of the operand loop, that decrements the depth at each iteration. I'm not sure this is wanted behavior, but changing it increases runtime and therefore makes comparison impossible.
Final time: 4.1s
Where to go from here
If you want to have support for more than two operators and/or support for the parent attribute, use something along the lines of the following code:
from collections import namedtuple
Node = namedtuple("Node", ["cargo", "args", "parent", "branch", "seq", "isRoot"])
def build_nodes (self, depth = Depth, entry = 1, pparent = None,
bbranch = None):
r = random.random()
if (depth <= 0) or ((r > Ratio) and (not (entry))):
this_node = Node(
random.choice(Atoms), None, pparent, bbranch, KS.next(), False)
self.thedict[this_node.seq] = this_node
return this_node
else:
this_operator = random.choice(OpKeys)
this_node = Node(
this_operator, [], pparent, bbranch,
KS.next(), bool(entry))
this_node.args.extend(
self.build_nodes(entry = 0, depth = depth - (i + 1),
pparent = this_node, bbranch = i)
for i in range(Operators[this_operator]))
self.thedict[this_node.seq] = this_node
return this_node
This code also decreases the depth with the operator position.
You can omit lots of braces in your code, that's one of Python's benefits. E.g. when putting braces around conditions, like
if (depth <= 0) or ((r > Ratio) and (not (entry))):
just write
if depth <= 0 or (r > Ratio and not entry):
And I think there are a couple of redundant calls, e.g.
this_atom = str(this_atom)
(this_atom will already be a string, and building strings is always expensive, so just omit this line)
or the call to the object constructor
object.__init__(self)
which isn't necessary, either.
As for the Node.__init__ method being the "bottleneck": I guess spending most of your time there cannot be avoided, since when constructing trees like this there's not much else you'll be doing but creating new Nodes.
You can replace the KeySeq generator with itertools.count which does exactly the same thing but is implemented in C.
I don't see any way of speeding up the Node constructor. The call to random.choice you could optimise by inlining the code - cut & paste it from the source for the random module. This will eliminate a function call, which are relatively expensive in Python.
You could speed it up by running under psyco, which is a kind of JIT optimiser. However this only works for 32 bit Intel builds of Python. Alternatively you could use cython - this converts python(ish) code into C, which can be compiled into a Python C module. I say pythonish since there some things that cannot be converted, and you can add C data type annotations to make the generated code more efficient.

Categories

Resources