Tiny Language compiler using python and regex - python

Hello stack overflow users
I hope you having a good
so I'm doing this tiny language compiler for my homework
tried using regex
but the output is so weird
First of all, I get an Identifier called 't' which is not used in my input
And it doesn't separate Identifier 'x' from the semicolon
thanks in advance for your help
Here is my input
read x; {input an integer }
if 0 < x then { don’t compute if x <= 0 }
fact := 1;
repeat
fact := fact * x;
x := x - 1
until x = 0;
write fact { output factorial of x }
end
And that's my code using regex
# -*- coding: utf-8 -*-
"""
Created on Wed May 13 04:11:06 2020
#author: PC
"""
class OwnCompiler (object):
def __init__ (self,file):
import re
self.file=open(file,"r").readlines()
self.symbols = {
"+":"PLUS_OP",
"-":"MINUS_OP",
"*":"MUL_OP",
"/":"DIV_OP",
"=":"EQUAL_OP",
"<":"LESS_OP",
">":"GREATER_OP",
"(":"LEFT_PARENTHESIS",
")":"RIGHT_PARENTHESIS",
":=":"ASSIGN",
";":"SEMICOLON",
}
self.commentPattern = re.compile(r".*({\n*\s*.*\s*})")
self.reservePattern = re.compile(r"\s*(read|write|if|then|else|end|repeat|until)+\s*(.*)(then)*")
self.symbolPattern = re.compile(r".*(\+|\*|-|/|=|<|>|\(|\)|;)")
self.identifierSymbol = re.compile(r".*(\w+)\s+(:=)\s+(.*)")
def compileOutput(self):
self.fileWrite=open("output.txt","w")
self.fileWrite.write("Type Token\n==================\n")
for i in self.file :
print(i)
self.getComment(i)
self.getReserveWord(i)
self.getIdentify(i)
self.fileWrite.close()#end
def getComment(self,text):
try:
self.fileWrite.write("COMMENT "+self.commentPattern.match(text).group(1)+"\n")
except:
print("NO_COMMENT")
def getReserveWord(self,text):
self.Compiled = self.reservePattern.match(text)
try:
self.fileWrite.write("RESERVE_WORD "+self.Compiled.group(1)+"\n")
self.getSymbols(self.Compiled.group(2))
try:
self.fileWrite.write("RESERVE_WORD "+self.Compiled.group(3)+"\n")
except:
print("NO_RESERVE_WORD2")
except:
print("NO_RESERVE_WORD")
def getSymbols(self,text):
self.Compiled= self.symbolPattern.match(text)
self.GOT_TOKEN= self.getTokensSymbols(self.Compiled.group())
try:
self.fileWrite.write(self.GOT_TOKEN+" "+self.Compiled.group()+"\n")
except:
print("NO_SYMBOLS")
def getIdentify(self,text):
self.Compiled = self.identifierSymbol.match(text)
try:
self.fileWrite.write("IDENTIFIER "+self.Compiled.group(1)+"\n")
self.getSymbols(text)
for i in self.Compiled.group(3):
if i ==" " :
continue
if self.isNumber(i):
self.fileWrite.write("NUMBER ")
else:
self.fileWrite.write("WORD ")
self.fileWrite.write(self.Compiled.group(3)+"\n")
except:
print("NO_IDENTIFIRES")
def getTokensSymbols(self,symbol):
try:
return self.symbols[symbol]
except:
print("NOT_DEFINED_IN_SYMBOL_DICT")
return "UNKNOWN"
def isNumber(self,text):
try:
int(text)
return True
except:
return False
if __name__ == "__main__":
instance = OwnCompiler("input.txt")
instance.compileOutput()
And here is my output
Type Token
==================
COMMENT { Sample program in TINY language – computes factorial }
COMMENT {input an integer }
RESERVE_WORD read
UNKNOWN x;
COMMENT { don’t compute if x <= 0 }
RESERVE_WORD if
UNKNOWN 0 < x then { don’t compute if x <=
IDENTIFIER t
UNKNOWN fact := 1;
RESERVE_WORD repeat
IDENTIFIER t
UNKNOWN fact := fact * x;
IDENTIFIER x
UNKNOWN x := x -
RESERVE_WORD until
UNKNOWN x = 0;
COMMENT { output factorial of x }
RESERVE_WORD write
RESERVE_WORD end

If you are going to parse a language you need a 'lexer' that will return individual tokens ignoring whitespace and comments. Along these lines, just as an example:
import re, collections
class Lexer(object):
WHITESPACE = r'(?P<WHITESPACE>\s+)'
COMMENT = r'(?P<COMMENT>{[^}]*})'
READ = r'(?P<READ>\bread\b)'
WRITE = r'(?P<WRITE>\bwrite\b)'
IF = r'(?P<IF>\bif\b)'
THEN = r'(?P<THEN>\bthen\b)'
ELSE = r'(?P<ELSE>\belse\b)'
END = r'(?P<END>\bend\b)'
REPEAT = r'(?P<REPEAT>\brepeat\b)'
UNTIL = r'(?P<UNTIL>\buntil\b)'
OPERATOR = r'(?P<OPERATOR>(?:[+*/=<>-]|:=))'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
IDENTIFIER = r'(?P<IDENTIFIER>[a-z]+)'
INTEGER = r'(?P<INTEGER>\d+)'
SEMICOLON = r'(?P<SEMICOLON>;)'
regex = re.compile('|'.join([
WHITESPACE,
COMMENT,
READ,
WRITE,
IF,
THEN,
ELSE,
END,
REPEAT,
UNTIL,
OPERATOR,
LPAREN,
RPAREN,
IDENTIFIER,
INTEGER,
SEMICOLON
]))
def __init__ (self, file):
def generate_tokens(text):
Token = collections.namedtuple('Token', ['type','value'])
scanner = Lexer.regex.finditer(text)
last_end = 0
for m in scanner:
start = m.start()
end = m.end()
if start != last_end:
# skipped over text to find the next token implies that there was unrecognizable text or an "error token"
text = self.text[last_end:start]
token = Token('ERROR', text)
yield token
last_end = end
token = Token(m.lastgroup, m.group())
if token.type != 'WHITESPACE' and token.type != 'COMMENT':
yield token
yield Token('EOF', '<end-of-file>')
with open(file, "r") as f:
text = f.read()
self._token_generator = generate_tokens(text)
def next_token(self):
# if you call this past the "EOF" token you will get a StopIteration exception
return self._token_generator.__next__()
lexer = Lexer('input.txt')
while True:
token = lexer.next_token()
print(token)
if token.type == 'EOF':
break
Prints:
Token(type='READ', value='read')
Token(type='IDENTIFIER', value='x')
Token(type='SEMICOLON', value=';')
Token(type='IF', value='if')
Token(type='INTEGER', value='0')
Token(type='OPERATOR', value='<')
Token(type='IDENTIFIER', value='x')
Token(type='THEN', value='then')
Token(type='IDENTIFIER', value='fact')
Token(type='OPERATOR', value=':=')
Token(type='INTEGER', value='1')
Token(type='SEMICOLON', value=';')
Token(type='REPEAT', value='repeat')
Token(type='IDENTIFIER', value='fact')
Token(type='OPERATOR', value=':=')
Token(type='IDENTIFIER', value='fact')
Token(type='OPERATOR', value='*')
Token(type='IDENTIFIER', value='x')
Token(type='SEMICOLON', value=';')
Token(type='IDENTIFIER', value='x')
Token(type='OPERATOR', value=':=')
Token(type='IDENTIFIER', value='x')
Token(type='OPERATOR', value='-')
Token(type='INTEGER', value='1')
Token(type='UNTIL', value='until')
Token(type='IDENTIFIER', value='x')
Token(type='OPERATOR', value='=')
Token(type='INTEGER', value='0')
Token(type='SEMICOLON', value=';')
Token(type='WRITE', value='write')
Token(type='IDENTIFIER', value='fact')
Token(type='END', value='end')
Token(type='EOF', value='<end-of-file>')

Related

How to replace and insert a new node into a ast tree using esprima for python?

I am having trouble trying to replace and insert a new node into the ast tree using esprima for python. There is an example on github but then it replaces all the nodes with the same node that I created but I just want one of them changed while keeping the rest of the tree intact.
from __future__ import print_function
import json
import esprima
from jscodegen_py import jscodegen
# Build a CallExpression expression statement manually:
# callee = esprima.nodes.Identifier("alert")
# args = [esprima.nodes.Literal("other alert", "'other alert'")]
# call = esprima.nodes.CallExpression(callee, args)
# other_alert = esprima.nodes.ExpressionStatement(call)
generator = jscodegen.CodeGenerator(indent = 2)
def js2ast(js: str):
return esprima.parseScript(js)
def ast2js(ast: dict):
return generator.generate(ast)
# Add a few expression statements using `parse()`:
af = {'Lg': {'RawString': 'var Lg = function(WN5, AN5) {\n return WN5 > AN5;\n };', 'RawValue': 'WN5 > AN5', 'operator': '>'}}
accessory_function_expression_statements = {}
for name in af:
accessory_function_expression_statements[name] = esprima.parse(af[name]['RawValue']).body[0]
class MyVisitor(esprima.NodeVisitor):
def transform_CallExpression(self, node, metadata):
# If the callee is an `alert()`, change it to `console.log()`:
if node.callee.name == 'Lg':
new_node_arguments = []
for item in node.arguments:
new_node_arguments.append(esprima.parse(generator.generate_expression(item.toDict(), 0)).body[0])
new_node = accessory_function_expression_statements['Lg'].expression
new_node.left = new_node_arguments[0].expression
new_node.right = new_node_arguments[1].expression
print(f'new_node: {new_node}')
return self.transform_Object(new_node, metadata) # every time this is called it will walk down the tree from the beginning
visitor = MyVisitor()
tree = esprima.parse("""
if (Lg(GP5["length"], 5)) {
var kP5 = window["parseInt"](GP5[5], 10);
lP5 = window["isNaN"](kP5) || dK(hA(1), kP5) ? window["Number"]["MAX_VALUE"] : kP5,
lP5 = kP5;
var abc = Boolean(Lg(Jj, 21))
}
""", delegate=visitor)
print(ast2js(tree.toDict()))
But using this code it gives this result.
if (Jj > 21) {
var kP5 = window["parseInt"](GP5[5], 10);
lP5 = window["isNaN"](kP5) || dK(hA(1), kP5) ? window["Number"]["MAX_VALUE"] : kP5, lP5 = kP5;
var abc = Boolean(Jj > 21);
}
As you can see the script in the code it replaces all the ifstatements to '(Jj > 21)' but I want it like this.
if (GP5.length > 5) {
var kP5 = window["parseInt"](GP5[5], 10);
lP5 = window["isNaN"](kP5) || dK(hA(1), kP5) ? window["Number"]["MAX_VALUE"] : kP5, lP5 = kP5;
var abc = Boolean(Jj > 21);
}
How can I do this using esprima in python?

PyParsing: parse if not a keyword

I am trying to parse a file as follows:
testp.txt
title = Test Suite A;
timeout = 10000
exp_delay = 500;
log = TRUE;
sect
{
type = typeA;
name = "HelloWorld";
output_log = "c:\test\out.log";
};
sect
{
name = "GoodbyeAll";
type = typeB;
comm1_req = 0xDEADBEEF;
comm1_resp = (int, 1234366);
};
The file first contains a section with parameters and then some sects. I can parse a file containing just parameters and I can parse a file just containing sects but I can't parse both.
from pyparsing import *
from pathlib import Path
command_req = Word(alphanums)
command_resp = "(" + delimitedList(Word(alphanums)) + ")"
kW = Word(alphas+'_', alphanums+'_') | command_req | command_resp
keyName = ~Literal("sect") + Word(alphas+'_', alphanums+'_') + FollowedBy("=")
keyValue = dblQuotedString.setParseAction( removeQuotes ) | OneOrMore(kW,stopOn=LineEnd())
param = dictOf(keyName, Suppress("=")+keyValue+Optional(Suppress(";")))
node = Group(Literal("sect") + Literal("{") + OneOrMore(param) + Literal("};"))
final = OneOrMore(node) | OneOrMore(param)
param.setDebug()
p = Path(__file__).with_name("testp.txt")
with open(p) as f:
try:
x = final.parseFile(f, parseAll=True)
print(x)
print("...")
dx = x.asDict()
print(dx)
except ParseException as pe:
print(pe)
The issue I have is that param matches against sect so it expects a =. So I tried putting in ~Literal("sect") in keyName but that just leads to another error:
Exception raised:Found unwanted token, "sect", found '\n' (at char 188), (line:4, col:56)
Expected end of text, found 's' (at char 190), (line:6, col:1)
How do I get it use one parse method for sect and another (param) if not sect?
My final goal would be to have the whole lot in a Dict with the global params and sects included.
EDIT
Think I've figured it out:
This line...
final = OneOrMore(node) | OneOrMore(param)
...should be:
final = ZeroOrMore(param) + ZeroOrMore(node)
But I wonder if there is a more structured way (as I'd ultimately like a dict)?

Avoiding repetition of if statement

I prepared functions and sorted data for this task:
(it's actually AoC day 4, but quick explanation to make it clear)
I have already sorted data to this 'structure'
byr:1991
eyr:2022
hcl:#341e13
iyr:2016
pid:729933757
hgt:167cm
ecl:gry
hcl:231d64
cid:124
ecl:gmt
eyr:2039
hgt:189in
pid:#9c3ea1
ecl:#1f58f9
pid:#758e59
iyr:2022
hcl:z
byr:2016
hgt:68
eyr:1933
[and so on +250 packages(by package I mean set of byr,ecl,eyr... separated by new line).]
and prepared this code:
def check_fields(list):
comparison_list = ['byr', 'iyr', 'eyr',
'hgt', 'hcl', 'ecl',
'pid']
statement = True
for i in comparison_list:
statement = statement and (i in list)
return statement
def check_byr_iyr_eyr(line):
prefix,value = line.split(':')
cases = {'byr':{'min':1920, 'max':2002},
'iyr':{'min':2010, 'max':2020},
'eyr':{'min':2020, 'max':2030} }
return cases[prefix]['min'] <= int(value) <= cases[prefix]['max']
def check_hgt(line):
unit = line[len(line)-2] + line[len(line)-1]
value = line[line.index(':')+1: -2]
cases = {'cm':{'min':150, 'max':193},
'in':{'min':59, 'max':76}}
return cases[unit]['min'] <= int(value) <= cases[unit]['max']
def check_hcl(line):
statement = True
if line[line.index(':')+1] != '#' or len(line[line.index(':')+2:]) != 6:
return False
else:
string = line[line.index('#')+1:]
for i in string:
statement = statement and (97 <= ord(i) <= 102 or 48 <= ord(i) <= 57)
return statement
def check_ecl(line):
comparison_list = ['amb', 'blu', 'brn',
'gry', 'grn', 'hzl',
'oth' ]
if line[line.index(':') + 1:] in comparison_list:
return True
return False
def check_pid(line):
if len(line[line.index(':')+1:]) != 9:
return False
try:
int(line[line.index(':')+1:])
return True
except:
return False
line_list = []
valid_passports = 0
with open('results.txt', 'r') as f:
for line in f:
if line != '\n':
''' add line to line_list'''
pass
else:
'''
check lines from line_list
using above declared functions
if every line is ok:
valid_passports +=1
'''
I have to check if every package contains every key except of cid, and then check if every value for each key is proper.
byr (Birth Year) - four digits; at least 1920 and at most 2002.
iyr (Issue Year) - four digits; at least 2010 and at most 2020.
eyr (Expiration Year) - four digits; at least 2020 and at most 2030.
hgt (Height) - a number followed by either cm or in:
If cm, the number must be at least 150 and at most 193.
If in, the number must be at least 59 and at most 76.
hcl (Hair Color) - a # followed by exactly six characters 0-9 or a-f.
ecl (Eye Color) - exactly one of: amb blu brn gry grn hzl oth.
pid (Passport ID) - a nine-digit number, including leading zeroes.
cid (Country ID) - ignored, missing or not.
(above mentioned rules are ensured by earlier declared functions)
And the question/problem is How can I avoid repetition of if statement during checking every line added to line list(it refers to part with multi-line comment with "pseudo code") ? - I mean I could do it like
if line[0:3] == "byr":
check_byr(line)
# and so on, many if statement checking the first 3 letters to adjust proper function to use
but it doesn't seem like proper and elegant solution, matybe you could give me hints how to deal with that, or give another idea to solve that problem in different way that I didn't use.
Please help, thanks.
Can't you have a mapping from prefix to target function?
Something like
line = # ...
prefix = # ... either "hgt" or "pid" or other
def check_hgt(line):
pass
def check_pid(line):
pass
# ... other checker functions
checker_functions_pool = {"hgt": check_hgt, "pid": check_pid}
checker_function = checker_functions_pool[prefix]
checker_function(line)
#viGor207, this is anther way to approach it: (part 2 example):
import re
passports = [
dict(
line.split(':')
for line
in pas.split()
)
for pas
in open('input').read().split('\n\n')
]
required = {'byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid'}
def valid(pas):
return bool(
1920 <= int(pas['byr']) <= 2002 and
2010 <= int(pas['iyr']) <= 2020 <= int(pas['eyr']) <= 2030 and
re.fullmatch(r'[0-9]{2,3}(cm|in)', pas['hgt']) and
(
(pas['hgt'][-2:] == 'cm' and 150 <= int(pas['hgt'][:-2]) <= 193) or
(pas['hgt'][-2:] == 'in' and 59 <= int(pas['hgt'][:-2]) <= 79)
) and
re.fullmatch(r'#[0-9a-f]{6}', pas['hcl']) and
pas['ecl'] in {'amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth'} and
re.fullmatch(r'[0-9]{9}', pas['pid'])
)
print(
sum(
all(r in pas for r in required) and valid(pas)
for pas
in passports
)
)
To make it complete, here is the part one:
passports = [
dict(
line.split(':')
for line
in pas.split()
)
for pas
in open('input').read().split('\n\n')
]
required = {'byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid'}
print(
sum(
all(r in pas for r in required)
for pas in passports
)
)
The task states:
Each passport is represented as a sequence of key:value pairs
separated by spaces or newlines. Passports are separated by blank
lines.
a sequence of key:value pairs is screaming use a list of dicts in Python, but your method could be used.
You could use a dict that maps field names to the function that checks for that field line, field_to_checker . I took your example parsed inputs as a list of your parsed lines then added a checker for cid that just returns True, and created the following code snippet:
def check_cid(line):
return True
field_to_checker = {
'byr': check_byr_iyr_eyr,
'cid': check_cid,
'ecl': check_ecl,
'eyr': check_byr_iyr_eyr,
'hcl': check_hcl,
'hgt': check_hgt,
'iyr': check_byr_iyr_eyr,
'pid': check_pid,
}
line_data = """byr:1991
eyr:2022
hcl:#341e13
iyr:2016
pid:729933757
hgt:167cm
ecl:gry
hcl:231d64
cid:124
ecl:gmt
eyr:2039
hgt:189in
pid:#9c3ea1
ecl:#1f58f9
pid:#758e59
iyr:2022
hcl:z
byr:2016
hgt:68
eyr:1933""".split('\n')
valid_passports = 0
ok_passport = True # Accumulating over all fields of one passport
for line in line_data + ['\n']: # Add empty line to force processing last passport
line = line.rstrip()
if line: # Not blank line
if ok_passport: # One False value in a passport will prevail
key = line[:3]
ok_passport = (key in field_to_checker
and field_to_checker[key](line))
else: # Blank line, end of passport record
if ok_passport:
valid_passports += 1
ok_passports = True
In the for line in line_data + ['\n'] loop the count of valid_passports is only updated when there is a blank line ending a passport record. The last passport needs to have a blank line after it to be properly counted hence the addition of an extra blank line to the end of the line_data.
The above is untested, but should give you tips on how to extend what you have started with.
I would suggest placing the dictionary values in variables as early as possible to make the logic simpler to write and read.
That should allow you to make single line conditions that are ledgible:
data = \
"""byr:1991
eyr:2022
hcl:#341e13
iyr:2016
pid:729933757
hgt:167cm
ecl:gry
hcl:231d64
cid:124
ecl:gmt
eyr:2039
hgt:189in
pid:#9c3ea1
ecl:#1f58f9
pid:#758e59
iyr:2022
hcl:z
byr:2016
hgt:68
eyr:1933"""
...
# iterator to get packages
def getPackages(d):
package = dict()
for line in d:
if line:
field,value = line.split(":",1)
package[field]=value
else:
yield package.copy()
package.clear()
if package: yield package
fields = ['byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid']
for package in getPackages(data.split("\n")):
values = [package.get(f,"") for f in fields]
byr, iyr, eyr, hgt, hcl, ecl, pid = values
isValid = "" not in values[:-1] \
and int(byr) in range(1920,2001+1) \
and int(iyr) in range(2010,2020+1) \
and int(eyr) in range(2020,2030+1) \
and int(hgt[:-2]) in {"cm":range(150,193+1),"in":range(59,76+1)}.get(hgt[-2:],[]) \
and hcl.startswith("#") and len(hcl)==7 \
and all(97 <= ord(i) <= 102 or 48 <= ord(i) <= 57 for i in hcl[1:]) \
and ecl in {'amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth' } \
and (pid == "" or pid.isdigit() and len(pid)==9)
print(pid,isValid)
"""
729933757 True
#9c3ea1 False
#758e59 False
"""

Reupload, How do i convert a php file to python dictionary?

After going over de fora, i did not find something that could solve this issue properly. I want to convert a file written in php to a python dictionary. In this case this file is a converted TrueType Font-file.
<?php
$type = 'TrueType';
$name = 'Calibri';
$desc = array('Ascent'=>750,'Descent'=>-250,'CapHeight'=>632,'Flags'=>32,'FontBBox'=>'[-503 -313 1240 1026]','ItalicAngle'=>0,'StemV'=>70,'MissingWidth'=>507);
$up = -113;
$ut = 65;
$cw = array(
chr(0)=>507,chr(1)=>507,chr(2)=>507,chr(3)=>507,chr(4)=>507,chr(5)=>507,chr(6)=>507,chr(7)=>507,chr(8)=>507,chr(9)=>507,chr(10)=>507,chr(11)=>507,chr(12)=>507,chr(13)=>507,chr(14)=>507,chr(15)=>507,chr(16)=>507,chr(17)=>507,chr(18)=>507,chr(19)=>507,chr(20)=>507,chr(21)=>507,
chr(22)=>507,chr(23)=>507,chr(24)=>507,chr(25)=>507,chr(26)=>507,chr(27)=>507,chr(28)=>507,chr(29)=>507,chr(30)=>507,chr(31)=>507,' '=>226,'!'=>326,'"'=>401,'#'=>498,'$'=>507,'%'=>715,'&'=>682,'\''=>221,'('=>303,')'=>303,'*'=>498,'+'=>498,
','=>250,'-'=>306,'.'=>252,'/'=>386,'0'=>507,'1'=>507,'2'=>507,'3'=>507,'4'=>507,'5'=>507,'6'=>507,'7'=>507,'8'=>507,'9'=>507,':'=>268,';'=>268,'<'=>498,'='=>498,'>'=>498,'?'=>463,'#'=>894,'A'=>579,
'B'=>544,'C'=>533,'D'=>615,'E'=>488,'F'=>459,'G'=>631,'H'=>623,'I'=>252,'J'=>319,'K'=>520,'L'=>420,'M'=>855,'N'=>646,'O'=>662,'P'=>517,'Q'=>673,'R'=>543,'S'=>459,'T'=>487,'U'=>642,'V'=>567,'W'=>890,
'X'=>519,'Y'=>487,'Z'=>468,'['=>307,'\\'=>386,']'=>307,'^'=>498,'_'=>498,'`'=>291,'a'=>479,'b'=>525,'c'=>423,'d'=>525,'e'=>498,'f'=>305,'g'=>471,'h'=>525,'i'=>229,'j'=>239,'k'=>455,'l'=>229,'m'=>799,
'n'=>525,'o'=>527,'p'=>525,'q'=>525,'r'=>349,'s'=>391,'t'=>335,'u'=>525,'v'=>452,'w'=>715,'x'=>433,'y'=>453,'z'=>395,'{'=>314,'|'=>460,'}'=>314,'~'=>498,chr(127)=>507,chr(128)=>507,chr(129)=>507,chr(130)=>250,chr(131)=>305,
chr(132)=>418,chr(133)=>690,chr(134)=>498,chr(135)=>498,chr(136)=>395,chr(137)=>1038,chr(138)=>459,chr(139)=>339,chr(140)=>867,chr(141)=>507,chr(142)=>468,chr(143)=>507,chr(144)=>507,chr(145)=>250,chr(146)=>250,chr(147)=>418,chr(148)=>418,chr(149)=>498,chr(150)=>498,chr(151)=>905,chr(152)=>450,chr(153)=>705,
chr(154)=>391,chr(155)=>339,chr(156)=>850,chr(157)=>507,chr(158)=>395,chr(159)=>487,chr(160)=>226,chr(161)=>326,chr(162)=>498,chr(163)=>507,chr(164)=>498,chr(165)=>507,chr(166)=>498,chr(167)=>498,chr(168)=>393,chr(169)=>834,chr(170)=>402,chr(171)=>512,chr(172)=>498,chr(173)=>306,chr(174)=>507,chr(175)=>394,
chr(176)=>339,chr(177)=>498,chr(178)=>336,chr(179)=>334,chr(180)=>292,chr(181)=>550,chr(182)=>586,chr(183)=>252,chr(184)=>307,chr(185)=>246,chr(186)=>422,chr(187)=>512,chr(188)=>636,chr(189)=>671,chr(190)=>675,chr(191)=>463,chr(192)=>579,chr(193)=>579,chr(194)=>579,chr(195)=>579,chr(196)=>579,chr(197)=>579,
chr(198)=>763,chr(199)=>533,chr(200)=>488,chr(201)=>488,chr(202)=>488,chr(203)=>488,chr(204)=>252,chr(205)=>252,chr(206)=>252,chr(207)=>252,chr(208)=>625,chr(209)=>646,chr(210)=>662,chr(211)=>662,chr(212)=>662,chr(213)=>662,chr(214)=>662,chr(215)=>498,chr(216)=>664,chr(217)=>642,chr(218)=>642,chr(219)=>642,
chr(220)=>642,chr(221)=>487,chr(222)=>517,chr(223)=>527,chr(224)=>479,chr(225)=>479,chr(226)=>479,chr(227)=>479,chr(228)=>479,chr(229)=>479,chr(230)=>773,chr(231)=>423,chr(232)=>498,chr(233)=>498,chr(234)=>498,chr(235)=>498,chr(236)=>229,chr(237)=>229,chr(238)=>229,chr(239)=>229,chr(240)=>525,chr(241)=>525,
chr(242)=>527,chr(243)=>527,chr(244)=>527,chr(245)=>527,chr(246)=>527,chr(247)=>498,chr(248)=>529,chr(249)=>525,chr(250)=>525,chr(251)=>525,chr(252)=>525,chr(253)=>453,chr(254)=>525,chr(255)=>453);
$enc = 'cp1252';
$uv = array(0=>array(0,128),128=>8364,130=>8218,131=>402,132=>8222,133=>8230,134=>array(8224,2),136=>710,137=>8240,138=>352,139=>8249,140=>338,142=>381,145=>array(8216,2),147=>array(8220,2),149=>8226,150=>array(8211,2),152=>732,153=>8482,154=>353,155=>8250,156=>339,158=>382,159=>376,160=>array(160,96));
$file = 'calibri.z';
$originalsize = 77252;
$subsetted = true;
?>
to:
font = {"type":"TrueType",
"name":"Calibri",
"desc":{"Ascent":750,etc...},
etc......
}
I thank all in advance!
P.S. I reuploaded this question (my previous was closed) to share my solution in case someone else needs it.
The solution i found was just writing the parsing myself:
import re
import regex
def parse_php(fontfile):
font_dict = {}
for item in php_chunks(fontfile):
key, attr = item.split(" = ")
attr = attr.replace("\t","").strip()
attr = re.sub("^(.*);",r"\1",attr)
# re.split("[,](?!'=>)",data["cw"])
if re.match("'(.*)'",attr):
attr = re.sub("'(.*)'",r"\1",attr)
try:
attr = eval(attr)
font_dict[key.replace("$","").strip()] = attr
except:
if "array" in attr:
if re.match("^array\(",attr):
attr_dict = {}
attr = re.sub("array\((.*)\)",r"\1",attr)
attr = regex.split("(?<!array\(\d*)[,](?!'=>)",attr)
for row in attr:
dict_key, dict_item = row.strip().split("=>")
try:
attr_dict[str(eval(dict_key))] = eval(dict_item)
except:
attr_dict[str(eval(dict_key))] = dict_item
font_dict[key.replace("$","").strip()] = attr_dict
else:
font_dict[key.replace("$","").strip()] = attr
return font_dict
def php_chunks(raw):
raw = raw.read()
chunk = ""
for idx, line in enumerate(raw.splitlines()):
if line.startswith("$"):
if idx != 1:
yield chunk
chunk = ""
chunk = "".join(line)
else:
chunk = "".join([chunk,line])

How to split a file into multiple files based on a repeated string?

i have a file and a want to split the based on the string "async" into different files. The expected output is a little messy. I try to use a word as key ("async") to divide the file but the generated files have the first line of its function with the context of the below function. For example, the file is:
'use strict';
const shim = require('fabric-shim');
const util = require('util');
let Chaincode = class {
async Init(stub) {
let ret = stub.getFunctionAndParameters();
console.info(ret);
console.info('=========== Instantiated Marbles Chaincode ===========');
return shim.success();
}
async Invoke(stub) {
console.info('Transaction ID: ' + stub.getTxID());
console.info(util.format('Args: %j', stub.getArgs()));
let ret = stub.getFunctionAndParameters();
console.info(ret);
let method = this[ret.fcn];
if (!method) {
console.log('no function of name:' + ret.fcn + ' found');
throw new Error('Received unknown function ' + ret.fcn + ' invocation');
}
try {
let payload = await method(stub, ret.params, this);
return shim.success(payload);
} catch (err) {
console.log(err);
return shim.error(err);
}
}
async initMarble(stub, args, thisClass) {
if (args.length != 4) {
throw new Error('Incorrect number of arguments. Expecting 4');
}
// ==== Input sanitation ====
console.info('--- start init marble ---')
if (args[0].lenth <= 0) {
throw new Error('1st argument must be a non-empty string');
}
if (args[1].lenth <= 0) {
throw new Error('2nd argument must be a non-empty string');
}
if (args[2].lenth <= 0) {
throw new Error('3rd argument must be a non-empty string');
}
if (args[3].lenth <= 0) {
throw new Error('4th argument must be a non-empty string');
}
let marbleName = args[0];
let color = args[1].toLowerCase();
let owner = args[3].toLowerCase();
let size = parseInt(args[2]);
if (typeof size !== 'number') {
throw new Error('3rd argument must be a numeric string');
}
let marbleState = await stub.getState(marbleName);
if (marbleState.toString()) {
throw new Error('This marble already exists: ' + marbleName);
}
// ==== Create marble object and marshal to JSON ====
let marble = {};
marble.docType = 'marble';
marble.name = marbleName;
marble.color = color;
marble.size = size;
marble.owner = owner;
await stub.putState(marbleName, Buffer.from(JSON.stringify(marble)));
let indexName = 'color~name'
let colorNameIndexKey = await stub.createCompositeKey(indexName, [marble.color, marble.name]);
console.info(colorNameIndexKey);
console.info('- end init marble');
}
i tried this:
import re
import os
filetype = '.js'
result = ''
count = 0
start = 0
name = 'functions'
matchedLine = ''
stringToMatch = 'async'
with open ('myjson.js', 'r') as f:
for x in f.read().split("\n"):
if stringToMatch in x:
if (start == 1):
with open (name + str(count) + '.js', 'w') as opf:
matchedLine = x
opf.write(matchedLine + '\n' + result)
opf.close()
result = ''
print (count)
count+= 1
matchedLine = ''
else:
start = 1
else:
if (result == ''):
result = x
else:
result = result + '\n' + x
but the output is a little bit messy
function0.js:
async Invoke(stub) {
'use strict';
const shim = require('fabric-shim');
const util = require('util');
let Chaincode = class {
let ret = stub.getFunctionAndParameters();
console.info(ret);
console.info('=========== Instantiated Marbles Chaincode ===========');
return shim.success();
}
function1.js:
async initMarble(stub, args, thisClass) {
console.info('Transaction ID: ' + stub.getTxID());
console.info(util.format('Args: %j', stub.getArgs()));
let ret = stub.getFunctionAndParameters();
console.info(ret);
let method = this[ret.fcn];
if (!method) {
console.log('no function of name:' + ret.fcn + ' found');
throw new Error('Received unknown function ' + ret.fcn + ' invocation');
}
try {
let payload = await method(stub, ret.params, this);
return shim.success(payload);
} catch (err) {
console.log(err);
return shim.error(err);
}
}
There must be many ways to do this. Here is one:
import re
class Writer:
def __init__(self):
self._num = 0
self._fh = None
def close(self):
if self._fh:
self._fh.close()
def start_file(self):
self.close()
self._fh = open("file{}.js".format(self._num), "w")
self._num += 1
def write(self, data):
if self._fh:
self._fh.write(data)
writer = Writer()
with open('myjson.js') as f:
for line in f:
if re.match(' *async ', line):
writer.start_file()
writer.write(line)
writer.close()
If your goal is to separate all the sections that have async code into individual files, one method you might try would be to count the curly brackets for open, and then closed. To do this, you would set a variable that positively increments for every { and negatively for each } e.g (not optimized/pretty, just explaining).
brackets = 0
buffer = ""
found_async = False
for line_of_code in code:
if "async" in line_of_code:
if "{" in line_of_code:
brackets += 1
if "}" in line_of_code:
brackets -= 1
buffer += line_of_code
if brackets == 0:
write_buffer_to_file_here
buffer = ""
As a concept, this will probably not work as is, but should give you an idea of what I'm trying to say.

Categories

Resources