Related
My goal is to run through all the *.py files in a directory and look at each call to a specific function test_func. This function has some optional parameters and I need to audit when the function is called with the optional parameters. My thought is to use the ast library (specifically ast.walk()).
I suppose this is a static analysis problem.
# function definition
def test_func(
name: str,
*,
user: Optional['User'] = None,
request: Optional[WebRequest] = None,
**kwargs
) -> bool:
pass
# somewhere in another file ...
test_func('name0')
test_func('name1', request=request)
test_func('name1')
test_func('name2', user=user)
# figure out something like below:
# name0 is never given any optional parameters
# name1 is sometimes given request
# name2 is always given user
Here is a POC :
import typing
from typing import Optional
class User: pass
class WebRequest: pass
# function definition
def test_func(
name: str,
*,
user: Optional['User'] = None,
request: Optional[WebRequest] = None,
**kwargs
) -> bool:
pass
# somewhere in another file ...
test_func('name0')
test_func('name1', request=WebRequest())
test_func('name1')
test_func('name2', user=User())
# figure out something like below:
# name0 is never given any optional parameters
# name1 is sometimes given request
# name2 is always given user
with open(__file__, "rt") as py_file:
py_code = py_file.read()
import collections
each_call_kwargs_names_by_arg0_value: typing.Dict[str, typing.List[typing.Tuple[str, ...]]] = collections.defaultdict(list)
import ast
tree = ast.parse(py_code)
for node in ast.walk(tree):
if isinstance(node, ast.Call):
if hasattr(node.func, "id"):
name = node.func.id
elif hasattr(node.func, "attr"):
name = node.func.attr
elif hasattr(node.func, "value"):
name = node.func.value.id
else:
raise NotImplementedError
print(name)
if name == "test_func":
arg0_value = typing.cast(ast.Str, node.args[0]).s
each_call_kwargs_names_by_arg0_value[arg0_value].append(
tuple(keyword.arg for keyword in node.keywords)
)
for arg0_value, each_call_kwargs_names in each_call_kwargs_names_by_arg0_value.items():
frequency = "NEVER" if all(len(call_args) == 0 for call_args in each_call_kwargs_names) else \
"ALWAYS" if all(len(call_args) != 0 for call_args in each_call_kwargs_names) else \
"SOMETIMES"
print(f"{arg0_value!r} {frequency}: {each_call_kwargs_names}")
# Output :
# 'name0' NEVER: [()]
# 'name1' SOMETIMES: [('request',), ()]
# 'name2' ALWAYS: [('user',)]
You can use a recursive generator function to traverse an ast of your Python code:
import ast
def get_calls(d, f = ['test_func']):
if isinstance(d, ast.Call) and d.func.id in f:
yield None if not d.args else d.args[0].value, [i.arg for i in d.keywords]
for i in getattr(d, '_fields', []):
vals = (m if isinstance((m:=getattr(d, i)), list) else [m])
yield from [j for k in vals for j in get_calls(k, f = f)]
Putting it all together:
import os, collections
d = collections.defaultdict(list)
for f in os.listdir(os.getcwd()):
if f.endswith('.py'):
with open(f) as f:
for a, b in get_calls(ast.parse(f.read())):
d[a].append(b)
r = {a:{'verdict':'never' if not any(b) else 'always' if all(b) else 'sometimes', 'params':[i[0] for i in b if i]}
for a, b in d.items()}
Output:
{'name0': {'verdict': 'never', 'params': []},
'name1': {'verdict': 'sometimes', 'params': ['request']},
'name2': {'verdict': 'always', 'params': ['user']}}
After going over de fora, i did not find something that could solve this issue properly. I want to convert a file written in php to a python dictionary. In this case this file is a converted TrueType Font-file.
<?php
$type = 'TrueType';
$name = 'Calibri';
$desc = array('Ascent'=>750,'Descent'=>-250,'CapHeight'=>632,'Flags'=>32,'FontBBox'=>'[-503 -313 1240 1026]','ItalicAngle'=>0,'StemV'=>70,'MissingWidth'=>507);
$up = -113;
$ut = 65;
$cw = array(
chr(0)=>507,chr(1)=>507,chr(2)=>507,chr(3)=>507,chr(4)=>507,chr(5)=>507,chr(6)=>507,chr(7)=>507,chr(8)=>507,chr(9)=>507,chr(10)=>507,chr(11)=>507,chr(12)=>507,chr(13)=>507,chr(14)=>507,chr(15)=>507,chr(16)=>507,chr(17)=>507,chr(18)=>507,chr(19)=>507,chr(20)=>507,chr(21)=>507,
chr(22)=>507,chr(23)=>507,chr(24)=>507,chr(25)=>507,chr(26)=>507,chr(27)=>507,chr(28)=>507,chr(29)=>507,chr(30)=>507,chr(31)=>507,' '=>226,'!'=>326,'"'=>401,'#'=>498,'$'=>507,'%'=>715,'&'=>682,'\''=>221,'('=>303,')'=>303,'*'=>498,'+'=>498,
','=>250,'-'=>306,'.'=>252,'/'=>386,'0'=>507,'1'=>507,'2'=>507,'3'=>507,'4'=>507,'5'=>507,'6'=>507,'7'=>507,'8'=>507,'9'=>507,':'=>268,';'=>268,'<'=>498,'='=>498,'>'=>498,'?'=>463,'#'=>894,'A'=>579,
'B'=>544,'C'=>533,'D'=>615,'E'=>488,'F'=>459,'G'=>631,'H'=>623,'I'=>252,'J'=>319,'K'=>520,'L'=>420,'M'=>855,'N'=>646,'O'=>662,'P'=>517,'Q'=>673,'R'=>543,'S'=>459,'T'=>487,'U'=>642,'V'=>567,'W'=>890,
'X'=>519,'Y'=>487,'Z'=>468,'['=>307,'\\'=>386,']'=>307,'^'=>498,'_'=>498,'`'=>291,'a'=>479,'b'=>525,'c'=>423,'d'=>525,'e'=>498,'f'=>305,'g'=>471,'h'=>525,'i'=>229,'j'=>239,'k'=>455,'l'=>229,'m'=>799,
'n'=>525,'o'=>527,'p'=>525,'q'=>525,'r'=>349,'s'=>391,'t'=>335,'u'=>525,'v'=>452,'w'=>715,'x'=>433,'y'=>453,'z'=>395,'{'=>314,'|'=>460,'}'=>314,'~'=>498,chr(127)=>507,chr(128)=>507,chr(129)=>507,chr(130)=>250,chr(131)=>305,
chr(132)=>418,chr(133)=>690,chr(134)=>498,chr(135)=>498,chr(136)=>395,chr(137)=>1038,chr(138)=>459,chr(139)=>339,chr(140)=>867,chr(141)=>507,chr(142)=>468,chr(143)=>507,chr(144)=>507,chr(145)=>250,chr(146)=>250,chr(147)=>418,chr(148)=>418,chr(149)=>498,chr(150)=>498,chr(151)=>905,chr(152)=>450,chr(153)=>705,
chr(154)=>391,chr(155)=>339,chr(156)=>850,chr(157)=>507,chr(158)=>395,chr(159)=>487,chr(160)=>226,chr(161)=>326,chr(162)=>498,chr(163)=>507,chr(164)=>498,chr(165)=>507,chr(166)=>498,chr(167)=>498,chr(168)=>393,chr(169)=>834,chr(170)=>402,chr(171)=>512,chr(172)=>498,chr(173)=>306,chr(174)=>507,chr(175)=>394,
chr(176)=>339,chr(177)=>498,chr(178)=>336,chr(179)=>334,chr(180)=>292,chr(181)=>550,chr(182)=>586,chr(183)=>252,chr(184)=>307,chr(185)=>246,chr(186)=>422,chr(187)=>512,chr(188)=>636,chr(189)=>671,chr(190)=>675,chr(191)=>463,chr(192)=>579,chr(193)=>579,chr(194)=>579,chr(195)=>579,chr(196)=>579,chr(197)=>579,
chr(198)=>763,chr(199)=>533,chr(200)=>488,chr(201)=>488,chr(202)=>488,chr(203)=>488,chr(204)=>252,chr(205)=>252,chr(206)=>252,chr(207)=>252,chr(208)=>625,chr(209)=>646,chr(210)=>662,chr(211)=>662,chr(212)=>662,chr(213)=>662,chr(214)=>662,chr(215)=>498,chr(216)=>664,chr(217)=>642,chr(218)=>642,chr(219)=>642,
chr(220)=>642,chr(221)=>487,chr(222)=>517,chr(223)=>527,chr(224)=>479,chr(225)=>479,chr(226)=>479,chr(227)=>479,chr(228)=>479,chr(229)=>479,chr(230)=>773,chr(231)=>423,chr(232)=>498,chr(233)=>498,chr(234)=>498,chr(235)=>498,chr(236)=>229,chr(237)=>229,chr(238)=>229,chr(239)=>229,chr(240)=>525,chr(241)=>525,
chr(242)=>527,chr(243)=>527,chr(244)=>527,chr(245)=>527,chr(246)=>527,chr(247)=>498,chr(248)=>529,chr(249)=>525,chr(250)=>525,chr(251)=>525,chr(252)=>525,chr(253)=>453,chr(254)=>525,chr(255)=>453);
$enc = 'cp1252';
$uv = array(0=>array(0,128),128=>8364,130=>8218,131=>402,132=>8222,133=>8230,134=>array(8224,2),136=>710,137=>8240,138=>352,139=>8249,140=>338,142=>381,145=>array(8216,2),147=>array(8220,2),149=>8226,150=>array(8211,2),152=>732,153=>8482,154=>353,155=>8250,156=>339,158=>382,159=>376,160=>array(160,96));
$file = 'calibri.z';
$originalsize = 77252;
$subsetted = true;
?>
to:
font = {"type":"TrueType",
"name":"Calibri",
"desc":{"Ascent":750,etc...},
etc......
}
I thank all in advance!
P.S. I reuploaded this question (my previous was closed) to share my solution in case someone else needs it.
The solution i found was just writing the parsing myself:
import re
import regex
def parse_php(fontfile):
font_dict = {}
for item in php_chunks(fontfile):
key, attr = item.split(" = ")
attr = attr.replace("\t","").strip()
attr = re.sub("^(.*);",r"\1",attr)
# re.split("[,](?!'=>)",data["cw"])
if re.match("'(.*)'",attr):
attr = re.sub("'(.*)'",r"\1",attr)
try:
attr = eval(attr)
font_dict[key.replace("$","").strip()] = attr
except:
if "array" in attr:
if re.match("^array\(",attr):
attr_dict = {}
attr = re.sub("array\((.*)\)",r"\1",attr)
attr = regex.split("(?<!array\(\d*)[,](?!'=>)",attr)
for row in attr:
dict_key, dict_item = row.strip().split("=>")
try:
attr_dict[str(eval(dict_key))] = eval(dict_item)
except:
attr_dict[str(eval(dict_key))] = dict_item
font_dict[key.replace("$","").strip()] = attr_dict
else:
font_dict[key.replace("$","").strip()] = attr
return font_dict
def php_chunks(raw):
raw = raw.read()
chunk = ""
for idx, line in enumerate(raw.splitlines()):
if line.startswith("$"):
if idx != 1:
yield chunk
chunk = ""
chunk = "".join(line)
else:
chunk = "".join([chunk,line])
I've written a script to pipe through data from the Kustomer API to our database, and although it works fine its a bit messy, was wondering if there's a more elegant solution to this. I'm defining the row of results im pushing through as a dictionary then pushing to MySQL, but the messy part comes when some of these values aren't available in the JSON all the time.
This has resulted in a try / except statements for each data point that may or may not be missing.
Is there a better way of doing this? Code below.
try:
record_data = {
'id': record['id'],
'created_at': str(datetime.strptime(record['attributes']['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7],
'last_activity_at': str(datetime.strptime(record['attributes']['lastActivityAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7],
'first_marked_done': None,
'last_marked_done': None,
'assigned_team': record['attributes']['assignedTeams'][0] if record['attributes']['assignedTeams'] != [] else None,
'conversation_type': None,
'conversation_category': None,
'conversation_subcategory': None,
'message_count': record['attributes']['messageCount'],
'note_count': record['attributes']['noteCount'],
'satisfaction': record['attributes']['satisfaction'],
'status': None,
'email': 1 if len(list(filter(lambda x: x == 'email', record['attributes']['channels']))) > 0 else 0,
'chat': 1 if len(list(filter(lambda x: x == 'chat', record['attributes']['channels']))) > 0 else 0,
'priority': record['attributes']['priority'],
'direction': 'outbound' if record['attributes']['direction'] == 'out' else 'in',
'nlp_score': None,
'nlp_sentiment': None,
'waiting_for': None,
'sla_breach': None,
'sla_status': None,
'breached_sla': None,
'breached_at': None
}
try:
record_data['status'] = record['attributes']['status']
except KeyError:
pass
try:
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
record_data['conversation_category'] = str(record['attributes']['custom']['categoryTree']).split('.')[0]
record_data['conversation_subcategory'] = str(record['attributes']['custom']['categoryTree']).split('.')[1] if len(str(record['attributes']['custom']['categoryTree']).split('.')) > 1 else None
except KeyError:
pass
try:
record_data['waiting_for'] = record['attributes']['custom']['typeStr']
except KeyError:
pass
try:
record_data['first_marked_done'] = str(datetime.strptime(record['attributes']['firstDone']['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
record_data['last_marked_done'] = str(datetime.strptime(record['attributes']['lastDone']['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
except KeyError:
pass
try:
record_data['sla_breach'] = 0 if record['attributes']['sla']['breached'] is False else 1
record_data['sla_status'] = record['attributes']['sla']['status']
if record_data['sla_breach'] == 1:
try:
record_data['breached_sla'] = record['attributes']['sla']['breach']['metric']
record_data['breached_at'] = record['attributes']['sla']['breach']['at']
except KeyError:
for m in record['attributes']['sla']['metrics']:
try:
if record['attributes']['sla']['metrics'][m]['breachAt'] == record['attributes']['sla']['summary']['firstBreachAt']:
record_data['breached_sla'] = m
record_data['breached_at'] = str(datetime.strptime(record['attributes']['sla']['summary']['firstBreachAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
except KeyError:
pass
except KeyError:
record_data['sla_breach'] = 0
print(record_data)
self.db.insert_update(KustomerConversations(**record_data))
except KeyError:
pass
First you should try, where possible, to use dict.get with a default value specified. Next you can consider contextmanager to make your code significantly cleaner. Consider this:
try:
record_data['status'] = record['attributes']['status']
except KeyError:
pass
try:
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
except KeyError:
pass
try:
record_data['waiting_for'] = record['attributes']['custom']['typeStr']
except KeyError:
pass
try:
record_data['first_marked_done'] = record['attributes']['firstDone']['createdAt']
except KeyError:
pass
Now rewritten, you can ensure consistent error handling without repeating logic:
from contextlib import contextmanager
#contextmanager
def error_handling():
try:
yield
except KeyError:
pass
with error_handling():
record_data['status'] = record['attributes']['status']
with error_handling():
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
with error_handling():
record_data['waiting_for'] = record['attributes']['custom']['typeStr']
with error_handling():
record_data['first_marked_done'] = record['attributes']['firstDone']['createdAt']
You can define an arbitrary number of functions like error_handling for various rules you wish to apply.
You can use function, that give you element from nested dicts, and doesn't raise an exception, if it doesnt' exists.
Like this quick draft:
def get_nested_dict_value(src_dict, *nested_keys, **kwargs):
"""
Get value of some nested dict by series of keys with default value.
Example:
instead of:
x = data['a']['b']['c']['d']
use
x = get_nested_dict_value(data, 'a', 'b', 'c', 'd')
or, if you need some non-None default value, add default=xxx kwarg:
x = get_nested_dict_value(data, 'a', 'b', 'c', 'd', default=0)
"""
default = kwargs.get("default", None)
pointer = src_dict
i = 0
for key in nested_keys:
i += 1
if key in pointer:
pointer = pointer[key]
if i == len(nested_keys):
return pointer
else:
return default
So, instead of:
try:
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
except Exception:
pass
You just type:
record_data['conversation_type'] = get_nested_dict_value(record, 'attributes', 'custom', 'typeStr')
The different naming conventions on the input and output sides make it hard to beat the clarity of explicit assignments. Preserving the exact semantics of your version (e.g., that it doesn't assign conversation_category in the absence of a typeStr even if categoryTree is available) excludes certain choices (like making a data structure to loop over with a try/except on each access); you might be able to do better with more assumptions about your input data.
Nonetheless, in addition to the dict.get already mentioned, you can use builtins (any, or, and dict) and introduce a helper function and a few temporary variables to make the code much more readable:
# this gives one digit of the hour for me...?
def ptime(s): return str(datetime.strptime(s,'%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
try:
attr=record['attributes']
cust=attr.get('custom',{}) # defer KeyErrors into the below
record_data = dict(
id = record['id'],
created_at = ptime(attr['createdAt']),
last_activity_at = ptime(attr['lastActivityAt']),
first_marked_done = None,
last_marked_done = None,
assigned_team = attr['assignedTeams'][0] or None,
conversation_type = None,
conversation_category = None,
conversation_subcategory = None,
message_count = attr['messageCount'],
note_count = attr['noteCount'],
satisfaction = attr['satisfaction'],
status = attr.get('status'),
email = int(any(x == 'email' for x in attr['channels'])),
chat = int(any(x == 'chat' for x in attr['channels'])),
priority = attr['priority'],
direction = 'outbound' if attr['direction'] == 'out' else 'in',
nlp_score = None,
nlp_sentiment = None,
waiting_for = cust.get('typeStr'),
sla_breach = 0,
sla_status = None,
breached_sla = None,
breached_at = None
)
try:
record_data['conversation_type'] = cust['typeStr']
cat=str(cust['categoryTree']).split('.')
record_data['conversation_category'] = cat[0]
record_data['conversation_subcategory'] = cat[1] if len(cat) > 1 else None
except KeyError: pass
try:
record_data['first_marked_done'] = ptime(attr['firstDone']['createdAt'])
record_data['last_marked_done'] = ptime(attr['lastDone']['createdAt'])
except KeyError: pass
try:
sla=attr['sla']
record_data['sla_breach'] = 0 if sla['breached'] is False else 1
record_data['sla_status'] = sla['status']
if record_data['sla_breach'] == 1:
try:
record_data['breached_sla'] = sla['breach']['metric']
record_data['breached_at'] = sla['breach']['at']
except KeyError:
for m,v in sla['metrics'].items():
try:
v=v['breachAt']
if v == sla['summary']['firstBreachAt']:
record_data['breached_sla'] = m
record_data['breached_at'] = ptime(v)
except KeyError: pass
except KeyError: pass
print(record_data)
self.db.insert_update(KustomerConversations(**record_data))
except KeyError: pass
While you might have a policy against it, in this case I recommend writing the remaining except KeyError: pass clauses on one line each: it helps the visual bracketing of the tentative code.
I want to convert the betacode in an existing .tex-File to normal greek letters.
For example: I want to replace:
\bcode{lo/gos}
with simple:
λόγος
And so on for all other glyphs. Fortunately there seems to be a python-script that is supposed to do just that. But, being completely inexperienced I simply don’t know how to run it.
Here is the code of the python sript:
# beta2unicode.py
#
# Version 2004-11-23
#
# James Tauber
# http://jtauber.com/
#
# You are free to redistribute this, but please inform me of any errors
#
# USAGE:
#
# trie = beta2unicodeTrie()
# beta = "LO/GOS\n";
# unicode, remainder = trie.convert(beta)
#
# - to get final sigma, string must end in \n
# - remainder will contain rest of beta if not all can be converted
class Trie:
def __init__(self):
self.root = [None, {}]
def add(self, key, value):
curr_node = self.root
for ch in key:
curr_node = curr_node[1].setdefault(ch, [None, {}])
curr_node[0] = value
def find(self, key):
curr_node = self.root
for ch in key:
try:
curr_node = curr_node[1][ch]
except KeyError:
return None
return curr_node[0]
def findp(self, key):
curr_node = self.root
remainder = key
for ch in key:
try:
curr_node = curr_node[1][ch]
except KeyError:
return (curr_node[0], remainder)
remainder = remainder[1:]
return (curr_node[0], remainder)
def convert(self, keystring):
valuestring = ""
key = keystring
while key:
value, key = self.findp(key)
if not value:
return (valuestring, key)
valuestring += value
return (valuestring, key)
def beta2unicodeTrie():
t = Trie()
t.add("*A", u"\u0391")
t.add("*B", u"\u0392")
t.add("*G", u"\u0393")
t.add("*D", u"\u0394")
t.add("*E", u"\u0395")
t.add("*Z", u"\u0396")
t.add("*H", u"\u0397")
t.add("*Q", u"\u0398")
t.add("*I", u"\u0399")
t.add("*K", u"\u039A")
t.add("*L", u"\u039B")
t.add("*M", u"\u039C")
t.add("*N", u"\u039D")
t.add("*C", u"\u039E")
t.add("*O", u"\u039F")
t.add("*P", u"\u03A0")
t.add("*R", u"\u03A1")
t.add("*S", u"\u03A3")
t.add("*T", u"\u03A4")
t.add("*U", u"\u03A5")
t.add("*F", u"\u03A6")
t.add("*X", u"\u03A7")
t.add("*Y", u"\u03A8")
t.add("*W", u"\u03A9")
t.add("A", u"\u03B1")
t.add("B", u"\u03B2")
t.add("G", u"\u03B3")
t.add("D", u"\u03B4")
t.add("E", u"\u03B5")
t.add("Z", u"\u03B6")
t.add("H", u"\u03B7")
t.add("Q", u"\u03B8")
t.add("I", u"\u03B9")
t.add("K", u"\u03BA")
t.add("L", u"\u03BB")
t.add("M", u"\u03BC")
t.add("N", u"\u03BD")
t.add("C", u"\u03BE")
t.add("O", u"\u03BF")
t.add("P", u"\u03C0")
t.add("R", u"\u03C1")
t.add("S\n", u"\u03C2")
t.add("S,", u"\u03C2,")
t.add("S.", u"\u03C2.")
t.add("S:", u"\u03C2:")
t.add("S;", u"\u03C2;")
t.add("S]", u"\u03C2]")
t.add("S#", u"\u03C2#")
t.add("S_", u"\u03C2_")
t.add("S", u"\u03C3")
t.add("T", u"\u03C4")
t.add("U", u"\u03C5")
t.add("F", u"\u03C6")
t.add("X", u"\u03C7")
t.add("Y", u"\u03C8")
t.add("W", u"\u03C9")
t.add("I+", U"\u03CA")
t.add("U+", U"\u03CB")
t.add("A)", u"\u1F00")
t.add("A(", u"\u1F01")
t.add("A)\\", u"\u1F02")
t.add("A(\\", u"\u1F03")
t.add("A)/", u"\u1F04")
t.add("A(/", u"\u1F05")
t.add("E)", u"\u1F10")
t.add("E(", u"\u1F11")
t.add("E)\\", u"\u1F12")
t.add("E(\\", u"\u1F13")
t.add("E)/", u"\u1F14")
t.add("E(/", u"\u1F15")
t.add("H)", u"\u1F20")
t.add("H(", u"\u1F21")
t.add("H)\\", u"\u1F22")
t.add("H(\\", u"\u1F23")
t.add("H)/", u"\u1F24")
t.add("H(/", u"\u1F25")
t.add("I)", u"\u1F30")
t.add("I(", u"\u1F31")
t.add("I)\\", u"\u1F32")
t.add("I(\\", u"\u1F33")
t.add("I)/", u"\u1F34")
t.add("I(/", u"\u1F35")
t.add("O)", u"\u1F40")
t.add("O(", u"\u1F41")
t.add("O)\\", u"\u1F42")
t.add("O(\\", u"\u1F43")
t.add("O)/", u"\u1F44")
t.add("O(/", u"\u1F45")
t.add("U)", u"\u1F50")
t.add("U(", u"\u1F51")
t.add("U)\\", u"\u1F52")
t.add("U(\\", u"\u1F53")
t.add("U)/", u"\u1F54")
t.add("U(/", u"\u1F55")
t.add("W)", u"\u1F60")
t.add("W(", u"\u1F61")
t.add("W)\\", u"\u1F62")
t.add("W(\\", u"\u1F63")
t.add("W)/", u"\u1F64")
t.add("W(/", u"\u1F65")
t.add("A)=", u"\u1F06")
t.add("A(=", u"\u1F07")
t.add("H)=", u"\u1F26")
t.add("H(=", u"\u1F27")
t.add("I)=", u"\u1F36")
t.add("I(=", u"\u1F37")
t.add("U)=", u"\u1F56")
t.add("U(=", u"\u1F57")
t.add("W)=", u"\u1F66")
t.add("W(=", u"\u1F67")
t.add("*A)", u"\u1F08")
t.add("*)A", u"\u1F08")
t.add("*A(", u"\u1F09")
t.add("*(A", u"\u1F09")
#
t.add("*(\A", u"\u1F0B")
t.add("*A)/", u"\u1F0C")
t.add("*)/A", u"\u1F0C")
t.add("*A(/", u"\u1F0F")
t.add("*(/A", u"\u1F0F")
t.add("*E)", u"\u1F18")
t.add("*)E", u"\u1F18")
t.add("*E(", u"\u1F19")
t.add("*(E", u"\u1F19")
#
t.add("*(\E", u"\u1F1B")
t.add("*E)/", u"\u1F1C")
t.add("*)/E", u"\u1F1C")
t.add("*E(/", u"\u1F1D")
t.add("*(/E", u"\u1F1D")
t.add("*H)", u"\u1F28")
t.add("*)H", u"\u1F28")
t.add("*H(", u"\u1F29")
t.add("*(H", u"\u1F29")
t.add("*H)\\", u"\u1F2A")
t.add(")\\*H", u"\u1F2A")
t.add("*)\\H", u"\u1F2A")
#
t.add("*H)/", u"\u1F2C")
t.add("*)/H", u"\u1F2C")
#
t.add("*)=H", u"\u1F2E")
t.add("(/*H", u"\u1F2F")
t.add("*(/H", u"\u1F2F")
t.add("*I)", u"\u1F38")
t.add("*)I", u"\u1F38")
t.add("*I(", u"\u1F39")
t.add("*(I", u"\u1F39")
#
#
t.add("*I)/", u"\u1F3C")
t.add("*)/I", u"\u1F3C")
#
#
t.add("*I(/", u"\u1F3F")
t.add("*(/I", u"\u1F3F")
#
t.add("*O)", u"\u1F48")
t.add("*)O", u"\u1F48")
t.add("*O(", u"\u1F49")
t.add("*(O", u"\u1F49")
#
#
t.add("*(\O", u"\u1F4B")
t.add("*O)/", u"\u1F4C")
t.add("*)/O", u"\u1F4C")
t.add("*O(/", u"\u1F4F")
t.add("*(/O", u"\u1F4F")
#
t.add("*U(", u"\u1F59")
t.add("*(U", u"\u1F59")
#
t.add("*(/U", u"\u1F5D")
#
t.add("*(=U", u"\u1F5F")
t.add("*W)", u"\u1F68")
t.add("*W(", u"\u1F69")
t.add("*(W", u"\u1F69")
#
#
t.add("*W)/", u"\u1F6C")
t.add("*)/W", u"\u1F6C")
t.add("*W(/", u"\u1F6F")
t.add("*(/W", u"\u1F6F")
t.add("*A)=", u"\u1F0E")
t.add("*)=A", u"\u1F0E")
t.add("*A(=", u"\u1F0F")
t.add("*W)=", u"\u1F6E")
t.add("*)=W", u"\u1F6E")
t.add("*W(=", u"\u1F6F")
t.add("*(=W", u"\u1F6F")
t.add("A\\", u"\u1F70")
t.add("A/", u"\u1F71")
t.add("E\\", u"\u1F72")
t.add("E/", u"\u1F73")
t.add("H\\", u"\u1F74")
t.add("H/", u"\u1F75")
t.add("I\\", u"\u1F76")
t.add("I/", u"\u1F77")
t.add("O\\", u"\u1F78")
t.add("O/", u"\u1F79")
t.add("U\\", u"\u1F7A")
t.add("U/", u"\u1F7B")
t.add("W\\", u"\u1F7C")
t.add("W/", u"\u1F7D")
t.add("A)/|", u"\u1F84")
t.add("A(/|", u"\u1F85")
t.add("H)|", u"\u1F90")
t.add("H(|", u"\u1F91")
t.add("H)/|", u"\u1F94")
t.add("H)=|", u"\u1F96")
t.add("H(=|", u"\u1F97")
t.add("W)|", u"\u1FA0")
t.add("W(=|", u"\u1FA7")
t.add("A=", u"\u1FB6")
t.add("H=", u"\u1FC6")
t.add("I=", u"\u1FD6")
t.add("U=", u"\u1FE6")
t.add("W=", u"\u1FF6")
t.add("I\\+", u"\u1FD2")
t.add("I/+", u"\u1FD3")
t.add("I+/", u"\u1FD3")
t.add("U\\+", u"\u1FE2")
t.add("U/+", u"\u1FE3")
t.add("A|", u"\u1FB3")
t.add("A/|", u"\u1FB4")
t.add("H|", u"\u1FC3")
t.add("H/|", u"\u1FC4")
t.add("W|", u"\u1FF3")
t.add("W|/", u"\u1FF4")
t.add("W/|", u"\u1FF4")
t.add("A=|", u"\u1FB7")
t.add("H=|", u"\u1FC7")
t.add("W=|", u"\u1FF7")
t.add("R(", u"\u1FE4")
t.add("*R(", u"\u1FEC")
t.add("*(R", u"\u1FEC")
# t.add("~", u"~")
# t.add("-", u"-")
# t.add("(null)", u"(null)")
# t.add("&", "&")
t.add("0", u"0")
t.add("1", u"1")
t.add("2", u"2")
t.add("3", u"3")
t.add("4", u"4")
t.add("5", u"5")
t.add("6", u"6")
t.add("7", u"7")
t.add("8", u"8")
t.add("9", u"9")
t.add("#", u"#")
t.add("$", u"$")
t.add(" ", u" ")
t.add(".", u".")
t.add(",", u",")
t.add("'", u"'")
t.add(":", u":")
t.add(";", u";")
t.add("_", u"_")
t.add("[", u"[")
t.add("]", u"]")
t.add("\n", u"")
return t
t = beta2unicodeTrie()
import sys
for line in file(sys.argv[1]):
a, b = t.convert(line)
if b:
print a.encode("utf-8"), b
raise Exception
print a.encode("utf-8")
And here is a little .tex-file with which it should work.
\documentclass[12pt]{scrbook}
\usepackage[polutonikogreek, ngerman]{babel}
\usepackage[ngerman]{betababel}
\usepackage{fontspec}
%\defaultfontfeatures{Ligatures=TeX}
%\newfontfeature{Microtype}{protrusion=default;expansion=default;}
\begin{document}
\bcode{lo/gos}
\end{document}
In case the script does not work: would it be possible to convert all the strings within the \bcode-Makro with something like regex? For example the "o/" to the ό and so on? What would be the weapon of choice here?
Do I have python installed?
Try python -V at a shell prompt. Your code is python 2 code, so you will a python 2 version.
I need to install Python
Most straight forward way if you don't need a complex environment (and you don't for this problem) is just to go to python.org. Don't forget you need python 2.
Running the program
Generally it will be as simple as:
python beta2unicode.py myfile.tex-file
And to capture the output:
python beta2unicode.py myfile.tex-file > myfile.not-tex-file
Does the script work?
Almost. You will need to replace the code at the end of the script that starts the same way this does, with this:
import sys
t = beta2unicodeTrie()
import re
BCODE = re.compile(r'\\bcode{[^}]*}')
for line in open(sys.argv[1]):
matches = BCODE.search(line)
for match in BCODE.findall(line):
bcode = match[7:-1]
a, b = t.convert(bcode.upper())
if b:
raise IOError("failed conversion '%s' in '%s'" % (b, line))
converted = a.encode("utf-8")
line = line.replace(match, converted)
print(line.rstrip())
Results
\documentclass[12pt]{scrbook}
\usepackage[polutonikogreek, ngerman]{babel}
\usepackage[ngerman]{betababel}
\usepackage{fontspec}
%\defaultfontfeatures{Ligatures=TeX}
%\newfontfeature{Microtype}{protrusion=default;expansion=default;}
\begin{document}
λόγοσ
\end{document}
I am writing a YAML file using https://pypi.python.org/pypi/ruamel.yaml
The code is like this:
import ruamel.yaml
from ruamel.yaml.comments import CommentedSeq
d = {}
for m in ['B1', 'B2', 'B3']:
d2 = {}
for f in ['A1', 'A2', 'A3']:
d2[f] = CommentedSeq(['test', 'test2'])
if f != 'A2':
d2[f].fa.set_flow_style()
d[m] = d2
with open('test.yml', "w") as f:
ruamel.yaml.dump(
d, f, Dumper=ruamel.yaml.RoundTripDumper,
default_flow_style=False, width=50, indent=8)
I just want to add comment at the top like:
# Data for Class A
Before the YAML data.
Within your with block, you can write anything you want to the file. Since you just need a comment at the top, add a call to f.write() before you call ruamel:
with open('test.yml', "w") as f:
f.write('# Data for Class A\n')
ruamel.yaml.dump(
d, f, Dumper=ruamel.yaml.RoundTripDumper,
default_flow_style=False, width=50, indent=8)
That is possible in principle, because you can round-trip such "start-of-file" comments, but it is not nicely supported in the current ruamel.yaml 0.10 and certainly not when "starting from scratch" (i.e. no changing an existing file). At the bottom is an easy an relatively nice solution but I would first like to present an ugly workaround and a step-wise how to get this done.
Ugly:
The ugly way to do this is to just add the comment to the file before you write the YAML data to it. That is insert:
f.write('# Data for Class A\n')
just before ruamel.yaml.dump(...)
Step by step:
To insert the comment on the data structure, so the above hack is not necessary, you first
need to make sure your d data is a CommentedMap type. If
you compare the difference of that d variable with one that has a the comment by loading the commented YAML back into c
import ruamel.yaml
from ruamel.yaml.comments import Comment, CommentedSeq, CommentedMap
d = CommentedMap() # <<<<< most important
for m in ['B1', 'B2', 'B3']:
d2 = {}
for f in ['A1', 'A2', 'A3']:
d2[f] = CommentedSeq(['test', 'test2'])
if f != 'A2':
d2[f].fa.set_flow_style()
d[m] = d2
yaml_str = ruamel.yaml.dump(d, Dumper=ruamel.yaml.RoundTripDumper,
default_flow_style=False, width=50, indent=8)
assert not hasattr(d, Comment.attrib) # no attribute on the CommentedMap
comment = 'Data for Class A'
commented_yaml_str = '# ' + comment + '\n' + yaml_str
c = ruamel.yaml.load(commented_yaml_str, Loader=ruamel.yaml.RoundTripLoader)
assert hasattr(c, Comment.attrib) # c has the attribute
print c.ca # and this is what it looks like
print d.ca # accessing comment attribute creates it empty
assert hasattr(d, Comment.attrib) # now the CommentedMap has the attribute
This prints:
Comment(comment=[None, [CommentToken(value=u'# Data for Class A\n')]],
items={})
Comment(comment=None,
items={})
A Comment has an attribute comment that needs to be set to a 2 element list that consist of the EOL comment (always only one) and a list of preceding line comments (in the form of CommentTokens)
To create a CommentToken you need a (fake) StartMark that tells which column it starts:
from ruamel.yaml.error import StreamMark
start_mark = StreamMark(None, None, None, 0, None, None) # column 0
Now you can create the token:
from ruamel.yaml.tokens import CommentToken
ct = CommentToken('# ' + comment + '\n', start_mark, None)
Assign the token as the first element of the preceding list on your CommentedMap:
d.ca.comment = [None, [ct]]
print d.ca # in case you want to check
gives you:
Comment(comment=[None, [CommentToken(value='# Data for Class A\n')]],
items={})
And finally:
print ruamel.yaml.dump(d, Dumper=ruamel.yaml.RoundTripDumper)
gives:
# Data for Class A
B1:
A1: [test, test2]
A3: [test, test2]
A2:
- test
- test2
B2:
A1: [test, test2]
A3: [test, test2]
A2:
- test
- test2
B3:
A1: [test, test2]
A3: [test, test2]
A2:
- test
- test2
Of course you don't need to create the c object, that is just for illustration.
What you should use:
To make the whole exercise somewhat easier you can just forget about the details and patch in the following method to CommentedBase once:
from ruamel.yaml.comments import CommentedBase
def set_start_comment(self, comment, indent=0):
"""overwrites any preceding comment lines on an object
expects comment to be without `#` and possible have mutlple lines
"""
from ruamel.yaml.error import StreamMark
from ruamel.yaml.tokens import CommentToken
if self.ca.comment is None:
pre_comments = []
self.ca.comment = [None, pre_comments]
else:
pre_comments = self.ca.comments[1]
if comment[-1] == '\n':
comment = comment[:-1] # strip final newline if there
start_mark = StreamMark(None, None, None, indent, None, None)
for com in comment.split('\n'):
pre_comments.append(CommentToken('# ' + com + '\n', start_mark, None))
if not hasattr(CommentedBase, 'set_start_comment'): # in case it is there
CommentedBase.set_start_comment = set_start_comment
and then just do:
d.set_start_comment('Data for Class A')