Funcparserlib.lexer.Spec ImportError: cannot import name 'Spec' - python

For learning purposes, I'm trying to convert a Chef interpreter project to python 3.4 and trying to wrangle the libraries involved into their newest versions, but when it comes to funcparserlib I'm a little over my head.
Here's the Chef script:
from pprint import pprint
from collections import namedtuple
import re
import logging
import funcparserlib.parser as p
from funcparserlib.lexer import make_tokenizer
from funcparserlib.lexer import Spec
from funcparserlib.contrib.lexer import space, newline
from funcparserlib.contrib.common import sometok, unarg
from common import *
log = logging.getLogger('preserve.chefparser')
#log.addHandler(logging.StreamHandler())
#log.setLevel(logging.DEBUG)
pos = 0
# order matters
instruction_spec = [
Spec(x.lower().split()[0], x) for x in [
'Take', 'Put', 'Fold', 'Add', 'Remove', 'Combine', 'Divide', 'Stir', 'Mix', 'Clean', 'Pour', 'Set aside', 'Refrigerate', 'from', 'the', 'for', 'contents of the', 'until', 'refrigerator', 'minute', 'minutes', 'hour', 'hours', 'well'
]
]
instruction_spec.insert(0, Spec('to', r'to'))
instruction_spec.insert(0, Spec('into', r'into'))
instruction_spec.insert(0, Spec('add_dry', 'Add dry ingredients'))
instruction_spec.insert(0, Spec('liquefy', 'Liquefy|Liquify'))
instruction_spec.append(Spec('serve_with', r'Serve with'))
instruction_spec.append(Spec('bowl', 'mixing bowl'))
instruction_spec.append(Spec('dish', 'baking dish'))
instruction_spec.append(space)
instruction_spec.append(Spec('string', '[A-Za-z]+'))
instruction_spec.append(Spec('ordinal', '[0-9]+(st|nd|rd|th)'))
instruction_spec.append(Spec('number', '[0-9]+'))
tokens = [
Spec('ingredients_start', 'Ingredients'),
Spec('method_start', r'^Method', re.MULTILINE),
Spec('dry_measure', r' g | kg | pinch[es]? '),
Spec('liquid_measure', r' ml | l | dash[es]? '),
Spec('mix_measure', r'cup[s]?|teaspoon[s]?|tablespoon[s]?'),
Spec('measure_type', 'heaped|level'),
# TODO hours minutes
Spec('cooking_time', r'Cooking time:'),
# TODO gas mark
Spec('oven', r'Pre\-heat oven to'),
Spec('oven_temp', 'degrees Celcius'),
# serve is treated separate here as it is
# not necessary for it to appear
# following 'Method.'
# But it is treated as just another
# instruction by the interpreter
Spec('serve', r'^Serves', re.MULTILINE),
Spec('number', '[0-9]+'),
space,
Spec('period', r'\.'),
Spec('string', r'[^\.\r\n]+'),
]
def tokenize_minus_whitespace(token_list, input):
return [x for x in make_tokenizer(token_list)(input) if x.type not in ['space']]
def tokenize_instruction(spec):
return tokenize_minus_whitespace(instruction_spec, spec)
def tokenize(input):
return tokenize_minus_whitespace(tokens, input)
def parse_instruction(spec):
string = p.oneplus(sometok('string')) >> (lambda x: ' '.join(x))
ordinal = sometok('ordinal')
bowl = sometok('bowl')
the = sometok('the')
dish = sometok('dish')
to = sometok('to')
into = sometok('into')
concat = lambda list: ' '.join(list)
take_i = sometok('take') + (p.oneplus(string) >> concat) + sometok('from') + sometok('refrigerator')
put_i = sometok('put') + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + p.skip(into) + p.maybe(ordinal|the) + bowl
liquefy_1 = sometok('liquefy') + sometok('contents') + p.maybe(ordinal) + bowl
liquefy_2 = sometok('liquefy') + (p.oneplus(string) >> concat)
liquefy_i = liquefy_1 | liquefy_2
pour_i = sometok('pour') + sometok('contents') + p.maybe(ordinal) + bowl + sometok('into') + the + p.maybe(ordinal) + dish
fold_i = sometok('fold') + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + into + p.maybe(ordinal|the) + bowl
# cleanup repitition
add_i = sometok('add') + (p.oneplus(string) >> concat) + p.maybe(to + p.maybe(ordinal|the) + bowl)
remove_i = sometok('remove') + (p.oneplus(string) >> concat) + p.maybe(sometok('from') + p.maybe(ordinal|the) + bowl)
combine_i = sometok('combine') + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal|the) + bowl)
divide_i = sometok('divide') + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal|the) + bowl)
add_dry_i = sometok('add_dry') + p.maybe(to + p.maybe(ordinal|the) + bowl)
stir_1 = sometok('stir') + p.maybe(the + p.maybe(ordinal|the) + bowl) + sometok('for') + sometok('number') + (sometok('minute')|sometok('minutes'))
stir_2 = sometok('stir') + (p.oneplus(string) >> concat) + into + the + p.maybe(ordinal) + bowl
stir_i = stir_1 | stir_2
mix_i = sometok('mix') + p.maybe(the + p.maybe(ordinal) + bowl) + sometok('well')
clean_i = sometok('clean') + p.maybe(ordinal|the) + bowl
loop_start_i = (sometok('string') + p.maybe(the) + (p.oneplus(string) >> concat)) >> (lambda x: ('loop_start', x))
loop_end_i = (sometok('string') + p.maybe(p.maybe(the) + (p.oneplus(string) >> concat)) + sometok('until') + string) >> (lambda x: ('loop_end', x))
set_aside_i = sometok('set') >> (lambda x: (x, None))
serve_with_i = sometok('serve_with') + (p.oneplus(string) >> concat)
refrigerate_i = sometok('refrigerate') + p.maybe(sometok('for') + sometok('number') + (sometok('hour')|sometok('hours')))
instruction = ( take_i
| put_i
| liquefy_i
| pour_i
| add_i
| fold_i
| remove_i
| combine_i
| divide_i
| add_dry_i
| stir_i
| mix_i
| clean_i
| loop_end_i # -| ORDER matters
| loop_start_i # -|
| set_aside_i
| serve_with_i
| refrigerate_i
) >> (lambda x: Instruction(x[0].lower().replace(' ', '_'), x[1:]))
return instruction.parse(tokenize_instruction(spec))
def parse(input):
period = sometok('period')
string = p.oneplus(sometok('string')) >> (lambda x: ' '.join(x))
number = sometok('number')
title = string + p.skip(period) >> RecipeTitle
ingredients_start = sometok('ingredients_start') + p.skip(period) >> IngredientStart
dry_measure = p.maybe(sometok('measure_type')) + sometok('dry_measure')
liquid_measure = sometok('liquid_measure')
mix_measure = sometok('mix_measure')
# is this valid ? 'g of butter', unit w/o initial_value
ingredient = (p.maybe(number)
+ p.maybe(dry_measure
| liquid_measure
| mix_measure)
+ string >> unarg(Ingredient)
)
ingredients = p.many(ingredient)
cooking_time = (p.skip(sometok('cooking_time'))
+ (number
>> unarg(CookingTime))
+ p.skip(sometok('period'))
)
oven_temp = (p.skip(sometok('oven'))
+ p.many(number)
+ p.skip(sometok('oven_temp'))
>> unarg(Oven)
)
method_start = sometok('method_start') + p.skip(period)
comment = p.skip(p.many(string|period))
header = title + p.maybe(comment)
instruction = (string
+ p.skip(period)
) >> parse_instruction
instructions = p.many(instruction)
program = (method_start + instructions) >> unarg(MethodStart)
serves = (sometok('serve') + number >> (lambda x: Serve('serve', x[1])) ) + p.skip(period)
ingredients_section = (ingredients_start + ingredients) >> unarg(IngredientSection)
recipe = ( header
+ p.maybe(ingredients_section)
+ p.maybe(cooking_time)
+ p.maybe(oven_temp)
+ p.maybe(program)
+ p.maybe(serves)
) >> RecipeNode
main_parser = p.oneplus(recipe)
return main_parser.parse(tokenize(input))
Running the script fails:
ImportError: cannot import name 'Spec'
The version of funcparserlib.lexer that I have is:
#Snipped some licence. Hint it's MIT.
__all__ = ['make_tokenizer', 'Token', 'LexerError']
import re
class LexerError(Exception):
def __init__(self, place, msg):
self.place = place
self.msg = msg
def __str__(self):
s = 'cannot tokenize data'
line, pos = self.place
return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)
class Token(object):
def __init__(self, type, value, start=None, end=None):
self.type = type
self.value = value
self.start = start
self.end = end
def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value)
def __eq__(self, other):
# FIXME: Case sensitivity is assumed here
return self.type == other.type and self.value == other.value
def _pos_str(self):
if self.start is None or self.end is None:
return ''
else:
sl, sp = self.start
el, ep = self.end
return '%d,%d-%d,%d:' % (sl, sp, el, ep)
def __str__(self):
s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
return s.strip()
#property
def name(self):
return self.value
def pformat(self):
return "%s %s '%s'" % (self._pos_str().ljust(20),
self.type.ljust(14),
self.value)
def make_tokenizer(specs):
"""[(str, (str, int?))] -> (str -> Iterable(Token))"""
def compile_spec(spec):
name, args = spec
return name, re.compile(*args)
compiled = [compile_spec(s) for s in specs]
def match_specs(specs, str, i, position):
line, pos = position
for type, regexp in specs:
m = regexp.match(str, i)
if m is not None:
value = m.group()
nls = value.count('\n')
n_line = line + nls
if nls == 0:
n_pos = pos + len(value)
else:
n_pos = len(value) - value.rfind('\n') - 1
return Token(type, value, (line, pos + 1), (n_line, n_pos))
else:
errline = str.splitlines()[line - 1]
raise LexerError((line, pos + 1), errline)
def f(str):
length = len(str)
line, pos = 1, 0
i = 0
while i < length:
t = match_specs(compiled, str, i, (line, pos))
yield t
line, pos = t.end
i += len(t.value)
return f
# This is an example of a token spec. See also [this article][1] for a
# discussion of searching for multiline comments using regexps
# (including `*?`).
#
# [1]: http://ostermiller.org/findcomment.html
_example_token_specs = [
('COMMENT', (r'\(\*(.|[\r\n])*?\*\)', re.MULTILINE)),
('COMMENT', (r'\{(.|[\r\n])*?\}', re.MULTILINE)),
('COMMENT', (r'//.*',)),
('NL', (r'[\r\n]+',)),
('SPACE', (r'[ \t\r\n]+',)),
('NAME', (r'[A-Za-z_][A-Za-z_0-9]*',)),
('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*',)),
('INT', (r'[0-9]+',)),
('INT', (r'\$[0-9A-Fa-f]+',)),
('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/#\^]',)),
('STRING', (r"'([^']|(''))*'",)),
('CHAR', (r'#[0-9]+',)),
('CHAR', (r'#\$[0-9A-Fa-f]+',)),
]
#tokenize = make_tokenizer(_example_token_specs)
And I can sure see why it can't import Spec! There's no Spec there! What's the best way to go about this, guys? Is there a simple "find-replace" that I can do to move forward with this project? Drudging through the repos I could find online (and there are confusing several) wasn't much help to me, but maybe I missed something.

You don't need the Specs class, in the current version of the funcparserlib you just have to declare a list of tuples, if you need to set up tokenizer.
See the example in the lexer module:
_example_token_specs = [
('COMMENT', (r'\(\*(.|[\r\n])*?\*\)', re.MULTILINE)),
('COMMENT', (r'\{(.|[\r\n])*?\}', re.MULTILINE)),
('COMMENT', (r'//.*',)),
('NL', (r'[\r\n]+',)),
('SPACE', (r'[ \t\r\n]+',)),
('NAME', (r'[A-Za-z_][A-Za-z_0-9]*',)),
('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*',)),
('INT', (r'[0-9]+',)),
('INT', (r'\$[0-9A-Fa-f]+',)),
('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/#\^]',)),
('STRING', (r"'([^']|(''))*'",)),
('CHAR', (r'#[0-9]+',)),
('CHAR', (r'#\$[0-9A-Fa-f]+',)),
]
Specs class is out of date, according to source of the funcparserlib.

Related

Python self defined function with None argument

I want to define my own function as below:
def myown(df, ADD1, ADD2 = None, OtherArgument_1, OtherArgument_2):
tmp = df
tmp['NEWADD'] = (tmp['ADD1'] + ' ' + tmp['ADD2']).str.strip()
return tmp
I know this is incorrect so I can add if statement in the function.
def myown(df, ADD1, ADD2 = None, OtherArgument_1, OtherArgument_2):
tmp = df
if ADD2 == None:
tmp['NEWADD'] = tmp[ADD1].str.strip()
else:
tmp['NEWADD'] = (tmp[ADD1] + ' ' + tmp[ADD2]).str.strip()
However, If I don know how many ADD inputs at first, how can I modify this?
For example, there are 5 ADD need to be combined this time and next time it may be 3. It is difficult to re-write function each time like this:
def myown(df, ADD1, ADD2, ADD3, ADD4, ADD5, OtherArgument_1, OtherArgument_2):
tmp = df
tmp['NEWADD'] = (tmp[ADD1] + ' ' + tmp[ADD2] + ' ' + tmp[ADD3] + ' ' + tmp[ADD4] + ' ' + tmp[ADD5]).str.strip()
You can accomplish this by using loops and lists like this:
def myown(df, add_args, OtherArgument_1, OtherArgument_2):
tmp = df
new_add = ''
for i in add_args:
new_add = new_add + tmp[i].str.strip() + ''
tmp['NEWADD'] = new_add
Your add_args parameter must be a list, which looks like this:
add_args = [ADD1, ADD2, ADDn]

Accumulate conditions during recursion on classification tree

I have the following function which produces code from a sci-kit learn classification tree:
def mxTreeToCode(tree, feature_names, mx_name = 'mxTree', rm_file = False):
# Remove pre-existent file
if rm_file:
import os
try:
os.remove('./tree.py')
except OSError:
pass
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
file = open('tree.py', 'a')
file.write('def ' + mx_name + '(x):'+ '\n')
#col_name = ''
def recurse(node, depth):
global col_name
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
file.write(indent +"if x['"+ name + "'] <= " + str(threshold) + ':' + '\n')
col_name += "'"+name + '_' + '<=' + str(threshold) +"'"
recurse(tree_.children_left[node], depth + 1)
file.write(indent + "else: # if x['"+ name +"'] > " + str(threshold) + '\n')
col_name += "'"+name + '_' + '>' + str(threshold) +"'"
recurse(tree_.children_right[node], depth + 1)
else:
file.write(indent + 'return '+str(col_name) + '\n')
#print(col_name)
col_name = ""
recurse(0, 1)
file.close()
With this I obtain the following output on file 'tree.py' for a given classification tree:
def mxTree(x):
if x['V1'] <= 0.5:
if x['V2'] <= 0.5:
return 'V1_<=0.5''V2_<=0.5'
else: # if x['V2'] > 0.5
return 'V2_>0.5'
else: # if x['V1'] > 0.5
return 'V1_>0.5'
While I can cumulate the conditions on the IF side and return the addition of conditions, I fail to do the accumulation when the IF and ELSE (left/right side of the tree node) follows:
def mxTree(x):
if x['V1'] <= 0.5:
if x['V2'] <= 0.5:
return 'V1_<=0.5''V2_<=0.5'
else: # if x['V2'] > 0.5
return 'V1_<=0.5''V2_>0.5' # 'V1<=0.5' must be added
else: # if x['V1'] > 0.5
return 'V1_>0.5'
I would appreciate any suggestion.
Since the left/right side of each node are recursed at the same time, I just created an additional variable which saves the output for each side. Finally I concatenate to variable col_name:
col_name = ""
names_list={}
def mxTreeToCode(tree, feature_names, mx_name = 'mxTree', rm_file = False):
# Remove pre-existent file
if rm_file:
import os
try:
os.remove('./tree.py')
except OSError:
pass
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
file = open('tree.py', 'a')
file.write('def ' + mx_name + '(x):'+ '\n')
def recurse(node, depth):
global col_name, names_list
indent = " " * depth
names_list[node] = col_name
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
file.write(indent +"if x['"+ name + "'] <= " + str(threshold) + ':' + '\n')
col_name += "'"+name + '_' + '<=' + str(threshold) +"'"
recurse(tree_.children_left[node], depth + 1)
file.write(indent + "else: # if x['"+ name +"'] > " + str(threshold) + '\n')
col_name += names_list[node]
col_name += "'"+name + '_' + '>' + str(threshold) +"'"
recurse(tree_.children_right[node], depth + 1)
else:
file.write(indent + 'return '+str(col_name) + '\n')
col_name = ""
recurse(0, 1)
file.close()
I wonder if there are other working approaches.

print binary tree level by level in python

I want to print my binary tree in the following manner:
10
6 12
5 7 11 13
I have written code for insertion of nodes but can't able to write for printing the tree. so please help on this . My code is :
class Node:
def __init__(self,data):
self.data=data
self.left=None
self.right=None
self.parent=None
class binarytree:
def __init__(self):
self.root=None
self.size=0
def insert(self,data):
if self.root==None:
self.root=Node(data)
else:
current=self.root
while 1:
if data < current.data:
if current.left:
current=current.left
else:
new=Node(data)
current.left=new
break;
elif data > current.data:
if current.right:
current=current.right
else:
new=Node(data)
current.right=new
break;
else:
break
b=binarytree()
Here's my attempt, using recursion, and keeping track of the size of each node and the size of children.
class BstNode:
def __init__(self, key):
self.key = key
self.right = None
self.left = None
def insert(self, key):
if self.key == key:
return
elif self.key < key:
if self.right is None:
self.right = BstNode(key)
else:
self.right.insert(key)
else: # self.key > key
if self.left is None:
self.left = BstNode(key)
else:
self.left.insert(key)
def display(self):
lines, *_ = self._display_aux()
for line in lines:
print(line)
def _display_aux(self):
"""Returns list of strings, width, height, and horizontal coordinate of the root."""
# No child.
if self.right is None and self.left is None:
line = '%s' % self.key
width = len(line)
height = 1
middle = width // 2
return [line], width, height, middle
# Only left child.
if self.right is None:
lines, n, p, x = self.left._display_aux()
s = '%s' % self.key
u = len(s)
first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s
second_line = x * ' ' + '/' + (n - x - 1 + u) * ' '
shifted_lines = [line + u * ' ' for line in lines]
return [first_line, second_line] + shifted_lines, n + u, p + 2, n + u // 2
# Only right child.
if self.left is None:
lines, n, p, x = self.right._display_aux()
s = '%s' % self.key
u = len(s)
first_line = s + x * '_' + (n - x) * ' '
second_line = (u + x) * ' ' + '\\' + (n - x - 1) * ' '
shifted_lines = [u * ' ' + line for line in lines]
return [first_line, second_line] + shifted_lines, n + u, p + 2, u // 2
# Two children.
left, n, p, x = self.left._display_aux()
right, m, q, y = self.right._display_aux()
s = '%s' % self.key
u = len(s)
first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s + y * '_' + (m - y) * ' '
second_line = x * ' ' + '/' + (n - x - 1 + u + y) * ' ' + '\\' + (m - y - 1) * ' '
if p < q:
left += [n * ' '] * (q - p)
elif q < p:
right += [m * ' '] * (p - q)
zipped_lines = zip(left, right)
lines = [first_line, second_line] + [a + u * ' ' + b for a, b in zipped_lines]
return lines, n + m + u, max(p, q) + 2, n + u // 2
import random
b = BstNode(50)
for _ in range(50):
b.insert(random.randint(0, 100))
b.display()
Example output:
__50_________________________________________
/ \
________________________43_ ________________________99
/ \ /
_9_ 48 ____________67_____________________
/ \ / \
3 11_________ 54___ ______96_
/ \ \ \ / \
0 8 ____26___________ 61___ ________88___ 97
/ \ / \ / \
14_ __42 56 64_ 75_____ 92_
/ \ / / \ / \ / \
13 16_ 33_ 63 65_ 72 81_ 90 94
\ / \ \ / \
25 __31 41 66 80 87
/ /
28_ 76
\
29
class Node(object):
def __init__(self, value, left=None, right=None):
self.value = value
self.left = left
self.right = right
def printTree(node, level=0):
if node != None:
printTree(node.left, level + 1)
print(' ' * 4 * level + '-> ' + str(node.value))
printTree(node.right, level + 1)
t = Node(1, Node(2, Node(4, Node(7)),Node(9)), Node(3, Node(5), Node(6)))
printTree(t)
output:
-> 7
-> 4
-> 2
-> 9
-> 1
-> 5
-> 3
-> 6
What you're looking for is breadth-first traversal, which lets you traverse a tree level by level. Basically, you use a queue to keep track of the nodes you need to visit, adding children to the back of the queue as you go (as opposed to adding them to the front of a stack). Get that working first.
After you do that, then you can figure out how many levels the tree has (log2(node_count) + 1) and use that to estimate whitespace. If you want to get the whitespace exactly right, you can use other data structures to keep track of how many spaces you need per level. A smart estimation using number of nodes and levels should be enough, though.
I am leaving here a stand-alone version of #J. V.'s code. If anyone wants to grab his/her own binary tree and pretty print it, pass the root node and you are good to go.
If necessary, change val, left and right parameters according to your node definition.
def print_tree(root, val="val", left="left", right="right"):
def display(root, val=val, left=left, right=right):
"""Returns list of strings, width, height, and horizontal coordinate of the root."""
# No child.
if getattr(root, right) is None and getattr(root, left) is None:
line = '%s' % getattr(root, val)
width = len(line)
height = 1
middle = width // 2
return [line], width, height, middle
# Only left child.
if getattr(root, right) is None:
lines, n, p, x = display(getattr(root, left))
s = '%s' % getattr(root, val)
u = len(s)
first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s
second_line = x * ' ' + '/' + (n - x - 1 + u) * ' '
shifted_lines = [line + u * ' ' for line in lines]
return [first_line, second_line] + shifted_lines, n + u, p + 2, n + u // 2
# Only right child.
if getattr(root, left) is None:
lines, n, p, x = display(getattr(root, right))
s = '%s' % getattr(root, val)
u = len(s)
first_line = s + x * '_' + (n - x) * ' '
second_line = (u + x) * ' ' + '\\' + (n - x - 1) * ' '
shifted_lines = [u * ' ' + line for line in lines]
return [first_line, second_line] + shifted_lines, n + u, p + 2, u // 2
# Two children.
left, n, p, x = display(getattr(root, left))
right, m, q, y = display(getattr(root, right))
s = '%s' % getattr(root, val)
u = len(s)
first_line = (x + 1) * ' ' + (n - x - 1) * '_' + s + y * '_' + (m - y) * ' '
second_line = x * ' ' + '/' + (n - x - 1 + u + y) * ' ' + '\\' + (m - y - 1) * ' '
if p < q:
left += [n * ' '] * (q - p)
elif q < p:
right += [m * ' '] * (p - q)
zipped_lines = zip(left, right)
lines = [first_line, second_line] + [a + u * ' ' + b for a, b in zipped_lines]
return lines, n + m + u, max(p, q) + 2, n + u // 2
lines, *_ = display(root, val, left, right)
for line in lines:
print(line)
print_tree(root)
__7
/ \
___10_ 3
/ \
_19 13
/ \
9 8_
/ \ \
4 0 12
Simple solution with no recursion
def PrintTree(root):
def height(root):
return 1 + max(height(root.left), height(root.right)) if root else -1
nlevels = height(root)
width = pow(2,nlevels+1)
q=[(root,0,width,'c')]
levels=[]
while(q):
node,level,x,align= q.pop(0)
if node:
if len(levels)<=level:
levels.append([])
levels[level].append([node,level,x,align])
seg= width//(pow(2,level+1))
q.append((node.left,level+1,x-seg,'l'))
q.append((node.right,level+1,x+seg,'r'))
for i,l in enumerate(levels):
pre=0
preline=0
linestr=''
pstr=''
seg= width//(pow(2,i+1))
for n in l:
valstr= str(n[0].val)
if n[3]=='r':
linestr+=' '*(n[2]-preline-1-seg-seg//2)+ '¯'*(seg +seg//2)+'\\'
preline = n[2]
if n[3]=='l':
linestr+=' '*(n[2]-preline-1)+'/' + '¯'*(seg+seg//2)
preline = n[2] + seg + seg//2
pstr+=' '*(n[2]-pre-len(valstr))+valstr #correct the potition acording to the number size
pre = n[2]
print(linestr)
print(pstr)
Sample output
1
/¯¯¯¯¯¯ ¯¯¯¯¯¯\
2 3
/¯¯¯ ¯¯¯\ /¯¯¯ ¯¯¯\
4 5 6 7
/¯ ¯\ /¯ /¯
8 9 10 12
I enhanced Prashant Shukla answer to print the nodes on the same level in the same line without spaces.
class Node(object):
def __init__(self, value, left=None, right=None):
self.value = value
self.left = left
self.right = right
def __str__(self):
return str(self.value)
def traverse(root):
current_level = [root]
while current_level:
print(' '.join(str(node) for node in current_level))
next_level = list()
for n in current_level:
if n.left:
next_level.append(n.left)
if n.right:
next_level.append(n.right)
current_level = next_level
t = Node(1, Node(2, Node(4, Node(7)), Node(9)), Node(3, Node(5), Node(6)))
traverse(t)
Just use this small method of print2DTree:
class bst:
def __init__(self, value):
self.value = value
self.right = None
self.left = None
def insert(root, key):
if not root:
return bst(key)
if key >= root.value:
root.right = insert(root.right, key)
elif key < root.value:
root.left = insert(root.left, key)
return root
def insert_values(root, values):
for value in values:
root = insert(root, value)
return root
def print2DTree(root, space=0, LEVEL_SPACE = 5):
if (root == None): return
space += LEVEL_SPACE
print2DTree(root.right, space)
# print() # neighbor space
for i in range(LEVEL_SPACE, space): print(end = " ")
print("|" + str(root.value) + "|<")
print2DTree(root.left, space)
root = insert_values(None, [8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15])
print2DTree(root)
Results:
code Explanation:
by using the BFS get the lists of list contains elements of each level
number of white spaces at any level = (max number of element in tree)//2^level
maximum number of elements of h height tree = 2^h -1; considering root level height as 1
print the value and white spaces
find my Riple.it link here print-bst-tree
def bfs(node,level=0,res=[]):
if level<len(res):
if node:
res[level].append(node.value)
else:
res[level].append(" ")
else:
if node:
res.append([node.value])
else:
res.append([" "])
if not node:
return
bfs(node.left,level+1,res)
bfs(node.right,level+1,res)
return res
def printTree(node):
treeArray = bfs(node)
h = len(treeArray)
whiteSpaces = (2**h)-1
def printSpaces(n):
for i in range(n):
print(" ",end="")
for level in treeArray:
whiteSpaces = whiteSpaces//2
for i,x in enumerate(level):
if i==0:
printSpaces(whiteSpaces)
print(x,end="")
printSpaces(1+2*whiteSpaces)
print()
#driver Code
printTree(root)
#output
class magictree:
def __init__(self, parent=None):
self.parent = parent
self.level = 0 if parent is None else parent.level + 1
self.attr = []
self.rows = []
def add(self, value):
tr = magictree(self)
tr.attr.append(value)
self.rows.append(tr)
return tr
def printtree(self):
def printrows(rows):
for i in rows:
print("{}{}".format(i.level * "\t", i.attr))
printrows(i.rows)
printrows(self.rows)
tree = magictree()
group = tree.add("company_1")
group.add("emp_1")
group.add("emp_2")
emp_3 = group.add("emp_3")
group = tree.add("company_2")
group.add("emp_5")
group.add("emp_6")
group.add("emp_7")
emp_3.add("pencil")
emp_3.add("pan")
emp_3.add("scotch")
tree.printtree()
result:
['company_1']
['emp_1']
['emp_2']
['emp_3']
['pencil']
['pan']
['scotch']
['company_2']
['emp_5']
['emp_6']
['emp_7']
As I came to this question from Google (and I bet many others did too), here is binary tree that has multiple children, with a print function (__str__ which is called when doing str(object_var) and print(object_var)).
Code:
from typing import Union, Any
class Node:
def __init__(self, data: Any):
self.data: Any = data
self.children: list = []
def insert(self, data: Any):
self.children.append(Node(data))
def __str__(self, top: bool=True) -> str:
lines: list = []
lines.append(str(self.data))
for child in self.children:
for index, data in enumerate(child.__str__(top=False).split("\n")):
data = str(data)
space_after_line = " " * index
if len(lines)-1 > index:
lines[index+1] += " " + data
if top:
lines[index+1] += space_after_line
else:
if top:
lines.append(data + space_after_line)
else:
lines.append(data)
for line_number in range(1, len(lines) - 1):
if len(lines[line_number + 1]) > len(lines[line_number]):
lines[line_number] += " " * (len(lines[line_number + 1]) - len(lines[line_number]))
lines[0] = " " * int((len(max(lines, key=len)) - len(str(self.data))) / 2) + lines[0]
return '\n'.join(lines)
def hasChildren(self) -> bool:
return bool(self.children)
def __getitem__(self, pos: Union[int, slice]):
return self.children[pos]
And then a demo:
# Demo
root = Node("Languages Good For")
root.insert("Serverside Web Development")
root.insert("Clientside Web Development")
root.insert("For Speed")
root.insert("Game Development")
root[0].insert("Python")
root[0].insert("NodeJS")
root[0].insert("Ruby")
root[0].insert("PHP")
root[1].insert("CSS + HTML + Javascript")
root[1].insert("Typescript")
root[1].insert("SASS")
root[2].insert("C")
root[2].insert("C++")
root[2].insert("Java")
root[2].insert("C#")
root[3].insert("C#")
root[3].insert("C++")
root[0][0].insert("Flask")
root[0][0].insert("Django")
root[0][1].insert("Express")
root[0][2].insert("Ruby on Rails")
root[0][0][0].insert(1.1)
root[0][0][0].insert(2.1)
print(root)
This is part of my own implementation of BST. The ugly part of this problem is that you have to know the space that your children occupies before you can print out yourself. Because you can have very big numbers like 217348746327642386478832541267836128736..., but also small numbers like 10, so if you have a parent-children relationship between these two, then it can potentially overlap with your other child. Therefore, we need to first go through the children, make sure we get how much space they are having, then we use that information to construct ourself.
def __str__(self):
h = self.getHeight()
rowsStrs = ["" for i in range(2 * h - 1)]
# return of helper is [leftLen, curLen, rightLen] where
# leftLen = children length of left side
# curLen = length of keyStr + length of "_" from both left side and right side
# rightLen = children length of right side.
# But the point of helper is to construct rowsStrs so we get the representation
# of this BST.
def helper(node, curRow, curCol):
if(not node): return [0, 0, 0]
keyStr = str(node.key)
keyStrLen = len(keyStr)
l = helper(node.l, curRow + 2, curCol)
rowsStrs[curRow] += (curCol -len(rowsStrs[curRow]) + l[0] + l[1] + 1) * " " + keyStr
if(keyStrLen < l[2] and (node.r or (node.p and node.p.l == node))):
rowsStrs[curRow] += (l[2] - keyStrLen) * "_"
if(l[1]):
rowsStrs[curRow + 1] += (len(rowsStrs[curRow + 2]) - len(rowsStrs[curRow + 1])) * " " + "/"
r = helper(node.r, curRow + 2, len(rowsStrs[curRow]) + 1)
rowsStrs[curRow] += r[0] * "_"
if(r[1]):
rowsStrs[curRow + 1] += (len(rowsStrs[curRow]) - len(rowsStrs[curRow + 1])) * " " + "\\"
return [l[0] + l[1] + 1, max(l[2] - keyStrLen, 0) + keyStrLen + r[0], r[1] + r[2] + 1]
helper(self.head, 0, 0)
res = "\n".join(rowsStrs)
#print("\n\n\nStart of BST:****************************************")
#print(res)
#print("End of BST:****************************************")
#print("BST height: ", h, ", BST size: ", self.size)
return res
Here's some examples of running this:
[26883404633, 10850198033, 89739221773, 65799970852, 6118714998, 31883432186, 84275473611, 25958013736, 92141734773, 91725885198, 131191476, 81453208197, 41559969292, 90704113213, 6886252839]
26883404633___________________________________________
/ \
10850198033__ 89739221773___________________________
/ \ / \
6118714998_ 25958013736 65799970852_______________ 92141734773
/ \ / \ /
131191476 6886252839 31883432186_ 84275473611 91725885198
\ / /
41559969292 81453208197 90704113213
Another example:
['rtqejfxpwmggfro', 'viwmdmpedzwvvxalr', 'mvvjmkdcdpcfb', 'ykqehfqbpcjfd', 'iuuujkmdcle', 'nzjbyuvlodahlpozxsc', 'wdjtqoygcgbt', 'aejduciizj', 'gzcllygjekujzcovv', 'naeivrsrfhzzfuirq', 'lwhcjbmcfmrsnwflezxx', 'gjdxphkpfmr', 'nartcxpqqongr', 'pzstcbohbrb', 'ykcvidwmouiuz']
rtqejfxpwmggfro____________________
/ \
mvvjmkdcdpcfb_____________________________ viwmdmpedzwvvxalr_______________
/ \ \
iuuujkmdcle_________ nzjbyuvlodahlpozxsc_ ykqehfqbpcjfd
/ \ / \ /
aejduciizj_____________ lwhcjbmcfmrsnwflezxx naeivrsrfhzzfuirq_ pzstcbohbrb wdjtqoygcgbt_
\ \ \
gzcllygjekujzcovv nartcxpqqongr ykcvidwmouiuz
/
gjdxphkpfmr
Here's a 2-pass solution with no recursion for general binary trees where each node has a value that "fits" within the allotted space (values closer to the root have more room to spare). (Pass 0 computes the tree height).
'''
0: 0
1: 1 2
2: 3 4 5 6
3: 7 8 9 a b c d e
h: 4
N: 2**4 - 1 <--| 2**0 + 2**1 + 2**2 + 2**3
'''
import math
def t2_lvl( i): return int(math.log2(i+1)) if 0<i else 0 # #meta map the global idx to the lvl
def t2_i2base(i): return (1<<t2_lvl(i))-1 # #meta map the global idx to the local idx (ie. the idx of elem 0 in the lvl at idx #i)
def t2_l2base(l): return (1<< l) -1 # #meta map the lvl to the local idx (ie. the idx of elem 0 in lvl #l)
class Tree2: # #meta a 2-tree is a tree with at most 2 sons per dad
def __init__(self, v=None):
self.v = v
self.l = None
self.r = None
def __str__(self): return f'{self.v}'
def t2_show(tree:Tree2): # #meta 2-pass fn. in the 1st pass we compute the height
if not tree: return
q0 = [] # perm queue
q1 = [] # temp queue
# pass 0
h = 0 # height is the number of lvls
q0.append((tree,0))
q1.append((tree,0))
while q1:
n,i = q1.pop(0)
h = max(h, t2_lvl(i))
if n.l: l=(n.l, 2*i+1); q0.append(l); q1.append(l)
if n.r: r=(n.r, 2*i+2); q0.append(r); q1.append(r)
h += 1 # nlvls
N = 2**h - 1 # nelems (for a perfect tree of this height)
W = 1 # elem width
# pass 1
print(f'\n\x1b[31m{h} \x1b[32m{len(q0)}\x1b[0m')
print(f'{0:1x}\x1b[91m:\x1b[0m',end='')
for idx,(n,i) in enumerate(q0):
l = t2_lvl(i) # lvl
b = (1<<l)-1 # base
s0 = (N // (2**(l+1)))
s1 = (N // (2**(l+0)))
s = 3+1 + s0 + (i-b)*(s1+1) # absolute 1-based position (from the beginning of line)
w = int(2**(h-l-2)) # width (around the element) (to draw the surrounding #-)
# print(f'{i:2x} {l} {i-b} {s0:2x} {s1:2x} {s:2x} {w:x} {n.v:02x}')
if 0<idx and t2_lvl(q0[idx-1][1])!=l: print(f'\n{l:1x}\x1b[91m:\x1b[0m',end='') # new level: go to the next line
print(f"\x1b[{s-w}G{w*'-'}\x1b[1G", end='')
print(f"\x1b[{s}G{n.v:1x}\x1b[1G", end='') # `\x1b[XG` is an ANSI escape code that moves the cursor to column X
print(f"\x1b[{s+W}G{w*'-'}\x1b[1G", end='')
print()
And an example:
tree = Tree2(0)
tree.l = Tree2(1)
tree.r = Tree2(2)
tree.l.l = Tree2(3)
tree.r.l = Tree2(4)
tree.r.r = Tree2(5)
tree.l.l.l = Tree2(3)
tree.r.l.l = Tree2(6)
tree.r.l.r = Tree2(7)
tree.l.l.l.l = Tree2(3)
tree.r.l.l.l = Tree2(8)
tree.r.l.l.r = Tree2(9)
t2_show(tree)
Output:
5 12
0: --------0--------
1: ----1---- ----2----
2: --3-- --4-- --5--
3: -3- -6- -7-
4: 3 8 9
Another output example:
7 127
0: --------------------------------0--------------------------------
1: ----------------1---------------- ----------------2----------------
2: --------3-------- --------4-------- --------5-------- --------6--------
3: ----7---- ----8---- ----9---- ----a---- ----b---- ----c---- ----d---- ----e----
4: --f-- --0-- --1-- --2-- --3-- --4-- --5-- --6-- --7-- --8-- --9-- --a-- --b-- --c-- --d-- --e--
5: -f- -0- -1- -2- -3- -4- -5- -6- -7- -8- -9- -a- -b- -c- -d- -e- -f- -0- -1- -2- -3- -4- -5- -6- -7- -8- -9- -a- -b- -c- -d- -e-
6: f 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e
Record Each Level Separately using Breadth First Approach
You can use a breadth first traversal and record node values in a dictionary using level as key. This helps next when you want to print each level in a new line. If you maintain a count of nodes processed, you can find current node's level (since it's a binary tree) using -
level = math.ceil(math.log(count + 1, 2) - 1)
Sample Code
Here's my code using the above method (along with some helpful variables like point_span & line_space which you can modify as you like). I used my custom Queue class, but you can also use a list for maintaining queue.
def pretty_print(self):
q, current, count, level, data = Queue(), self.root, 1, 0, {}
while current:
level = math.ceil(math.log(count + 1, 2) - 1)
if data.get(level) is None:
data[level] = []
data[level].append(current.value)
count += 1
if current.left:
q.enqueue(current.left)
if current.right:
q.enqueue(current.right)
current = q.dequeue()
point_span, line_space = 8, 4
line_width = int(point_span * math.pow(2, level))
for l in range(level + 1):
current, string = data[l], ''
for c in current:
string += str(c).center(line_width // len(current))
print(string + '\n' * line_space)
And here's how the output looks:
Similar question is being answered over here This may help following code will print in this format
>>>
1
2 3
4 5 6
7
>>>
Code for this is as below :
class Node(object):
def __init__(self, value, left=None, right=None):
self.value = value
self.left = left
self.right = right
def traverse(rootnode):
thislevel = [rootnode]
a = ' '
while thislevel:
nextlevel = list()
a = a[:len(a)/2]
for n in thislevel:
print a+str(n.value),
if n.left: nextlevel.append(n.left)
if n.right: nextlevel.append(n.right)
print
thislevel = nextlevel
t = Node(1, Node(2, Node(4, Node(7)),Node(9)), Node(3, Node(5), Node(6)))
traverse(t)
Edited code gives result in this format :
>>>
1
2 3
4 9 5 6
7
>>>
This is just a trick way to do what you want their maybe a proper method for that I suggest you to dig more into it.

Taylor polynomial calculation

I am currently doing a python exercise for my University studies. I am very stuck at this task:
The taylor polynomial of degree N for the exponential function e^x is given by:
N
p(x) = Sigma x^k/k!
k = 0
Make a program that (i) imports class Polynomial (found under), (ii) reads x and a series of N values from the command line, (iii) creates a Polynomial instance representing the Taylor polynomial, and (iv) prints the values of p(x) for the given N values as well as the exact value e^x. Try the program out with x = 0.5, 3, 10 and N = 2, 5, 10, 15, 25.
Polynomial.py
import numpy
class Polynomial:
def __init__(self, coefficients):
self.coeff = coefficients
def __call__(self, x):
"""Evaluate the polynomial."""
s = 0
for i in range(len(self.coeff)):
s += self.coeff[i]*x**i
return s
def __add__(self, other):
# Start with the longest list and add in the other
if len(self.coeff) > len(other.coeff):
result_coeff = self.coeff[:] # copy!
for i in range(len(other.coeff)):
result_coeff[i] += other.coeff[i]
else:
result_coeff = other.coeff[:] # copy!
for i in range(len(self.coeff)):
result_coeff[i] += self.coeff[i]
return Polynomial(result_coeff)
def __mul__(self, other):
c = self.coeff
d = other.coeff
M = len(c) - 1
N = len(d) - 1
result_coeff = numpy.zeros(M+N+1)
for i in range(0, M+1):
for j in range(0, N+1):
result_coeff[i+j] += c[i]*d[j]
return Polynomial(result_coeff)
def differentiate(self):
"""Differentiate this polynomial in-place."""
for i in range(1, len(self.coeff)):
self.coeff[i-1] = i*self.coeff[i]
del self.coeff[-1]
def derivative(self):
"""Copy this polynomial and return its derivative."""
dpdx = Polynomial(self.coeff[:]) # make a copy
dpdx.differentiate()
return dpdx
def __str__(self):
s = ''
for i in range(0, len(self.coeff)):
if self.coeff[i] != 0:
s += ' + %g*x^%d' % (self.coeff[i], i)
# Fix layout
s = s.replace('+ -', '- ')
s = s.replace('x^0', '1')
s = s.replace(' 1*', ' ')
s = s.replace('x^1 ', 'x ')
#s = s.replace('x^1', 'x') # will replace x^100 by x^00
if s[0:3] == ' + ': # remove initial +
s = s[3:]
if s[0:3] == ' - ': # fix spaces for initial -
s = '-' + s[3:]
return s
def simplestr(self):
s = ''
for i in range(0, len(self.coeff)):
s += ' + %g*x^%d' % (self.coeff[i], i)
return s
def _test():
p1 = Polynomial([1, -1])
p2 = Polynomial([0, 1, 0, 0, -6, -1])
p3 = p1 + p2
print p1, ' + ', p2, ' = ', p3
p4 = p1*p2
print p1, ' * ', p2, ' = ', p4
print 'p2(3) =', p2(3)
p5 = p2.derivative()
print 'd/dx', p2, ' = ', p5
print 'd/dx', p2,
p2.differentiate()
print ' = ', p5
p4 = p2.derivative()
print 'd/dx', p2, ' = ', p4
if __name__ == '__main__':
_test()
Now I'm really stuck at this, and I would love to get an explaination! I am supposed to write my code in a separate file. I'm thinking about making an instance of the Polynomial class, and sending in the list in argv[2:], but that doesn't seem to be working. Do I have to make a def to calculate the taylor polynomial for the different values of N before sending it in to the Polynomial class?
Any help is great, thanks in advance :)
Not quite finished, but this answers your main question I believe. Put class Polynomial in poly.p and import it.
from poly import Polynomial as p
from math import exp,factorial
def get_input(n):
''' get n numbers from stdin '''
entered = list()
for i in range(n):
print 'input number '
entered.append(raw_input())
return entered
def some_input():
return [[2,3,4],[4,3,2]]
get input from cmd line
n = 3
a = get_input(n)
b = get_input(n)
#a,b = some_input()
ap = p(a)
bp = p(b)
print 'entered : ',a,b
c = ap+bp
print 'a + b = ',c
print exp(3)
x = ap
print x
sum = p([0])
for k in range(1,5):
el = x
for j in range(1,k):
el el * x
print 'el: ',el
if el!=None and sum!=None:
sum = sum + el
print 'sum ',sum
output
entered : [2, 3, 4] [4, 3, 2]
a + b = 6*1 + 6*x + 6*x^2
20.0855369232
2*1 + 3*x + 4*x^2
sum 2*1 + 3*x + 4*x^2
el: 4*1 + 12*x + 25*x^2 + 24*x^3 + 16*x^4
sum 6*1 + 15*x + 29*x^2 + 24*x^3 + 16*x^4
el: 4*1 + 12*x + 25*x^2 + 24*x^3 + 16*x^4
el: 8*1 + 36*x + 102*x^2 + 171*x^3 + 204*x^4 + 144*x^5 + 64*x^6
sum 14*1 + 51*x + 131*x^2 + 195*x^3 + 220*x^4 + 144*x^5 + 64*x^6
el: 4*1 + 12*x + 25*x^2 + 24*x^3 + 16*x^4
el: 8*1 + 36*x + 102*x^2 + 171*x^3 + 204*x^4 + 144*x^5 + 64*x^6
el: 16*1 + 96*x + 344*x^2 + 792*x^3 + 1329*x^4 + 1584*x^5 + 1376*x^6 + 768*x^7 + 256*x^8
sum 30*1 + 147*x + 475*x^2 + 987*x^3 + 1549*x^4 + 1728*x^5 + 1440*x^6 + 768*x^7 + 256*x^8
I solved the task in the following way, though im not sure if it answers question (iv).
The output just compares the exact value of e**x to the calculated value from module Polynomial.
from math import factorial, exp
from Polynomial import *
from sys import *
#Reads x and N from the command line on the form [filename.py, x-value, N-value]
x = eval(argv[1])
N = eval(argv[2])
#Creating list of coefficients on the form [1 / i!]
list_coeff = [1./factorial(i) for i in range(N)]
print list_coeff
#Creating an instance of class Polynomial
p1 = Polynomial(list_coeff)
print 'Calculated value of e**%f = %f ' %(x, p1.__call__(x))
print 'Exact value of e**%f = %f'% (x, exp(x))
"""Test Execution
Terminal > python Polynomial_exp.py 0.5 5
[1.0, 1.0, 0.5, 0.16666666666666666, 0.041666666666666664]
Calculated value of e**0.500000 = 1.648438
Exact value of e**0.500000 = 1.648721
"""

avoiding code duplication in Python code redux

This is a followup to an earlier question. I got some good suggestions for that, so I thought I would try my luck again.
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
if K is None:
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
cf.write(line)
else:
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
Is it possible to compactify this code? If I have some stuff in common in two loops like this,
one obvious possibility is to just factor out the common code, but here, eww.
The annoying thing is that the only difference here is the writing to c.
Brief summary of code: If K is not None, then loop over K lines of a and write to both b and c. Otherwise, loop over all of a and just write to b.
Why not use only one loop, but including the condition inside that loop? Also, you can get rid of the redundancy in that lambda, I think.
from itertools import takewhile
k_is_none = K is None
def illuminacond(x):
global i
global K
result = x.split(',')[0] != '[Controls]'
if not k_is_none:
result = result and i < K
return result
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
if k_is_none:
cf.write(line)
One check, one loop, no classes, psyco-optimizable.
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
def action(cf, line): cf.write(line)
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
def action(cf, line): pass
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
i = 0
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
action(cf, line)
Why not just:
from itertools import takewhile
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and (K is None or i<K) #i'm not so sure about this part, confused me a little :).
af=open('a')
bf=open('b', 'w')
cf=open('c', 'w')
for line in takewhile(illuminacond, af):
line_split=line.split(',')
pid=line_split[1][0:3]
out = line_split[1] + ',' + line_split[2] + ',' + line_split[3][1] + line_split[3][3] + ',' \
+ line_split[15] + ',' + line_split[9] + ',' + line_split[10]
if pid!='cnv' and pid!='hCV' and pid!='cnv':
i = i+1
bf.write(out.strip('"')+'\n')
if K is None:
cf.write(line)
How about this (second class based version)?
from itertools import takewhile
class Foo:
def __init__(self, K = None):
self.bf=open('b', 'w')
self.cf=open('c', 'w')
self.count = 0
self.K = K
def Go(self):
for self.line in takewhile(self.Lamda(), open('a')):
self.SplitLine()
if self.IsValidPid():
self.WriteLineToFiles()
def SplitLine(self):
self.lineSplit=self.line.split(',')
def Lamda(self):
if self.K is None:
return lambda x: x.split(',')[0] != '[Controls]'
else:
return lambda x: x.split(',')[0] != '[Controls]' and self.count < self.K
def IsValidPid(self):
pid=self.lineSplit[1][0:3]
return pid!='cnv' and pid!='hCV' and pid!='cnv'
def WriteLineToFiles(self):
self.count += 1
self.bf.write(self.ParseLine())
if self.K is None:
self.cf.write(self.line)
def ParseLine(self):
return (self.lineSplit[1] + ',' + self.lineSplit[2] + ',' +
self.lineSplit[3][1] + self.lineSplit[3][3] + ',' +
self.lineSplit[15] + ',' + self.lineSplit[9] + ',' +
self.lineSplit[10]).strip('"')+'\n'
Foo().Go()
Original version:
from itertools import takewhile
if K is None:
illuminacond = lambda x: x.split(',')[0] != '[Controls]'
else:
illuminacond = lambda x: x.split(',')[0] != '[Controls]' and i < K
def Parse(line):
return (line[1] + ',' + line[2] + ',' + line[3][1] + line[3][3] + ',' +
line[15] + ',' + line[9] + ',' + line[10]).strip('"')+'\n'
def IsValidPid(line_split):
pid=line_split[1][0:3]
return pid!='cnv' and pid!='hCV' and pid!='cnv'
bf=open('b', 'w')
cf=open('c', 'w')
def WriteLineToFiles(line, line_split):
bf.write(Parse(line_split))
if K is None:
cf.write(line)
i = 0
for line in takewhile(illuminacond, open('a')):
line_split=line.split(',')
if IsValidPid(line_split):
WriteLineToFiles(line, line_split)
i += 1

Categories

Resources