Locale-aware string formatting using .format() syntax - python

Is there a (general) way to do locale-aware string formatting in Python using the .format() {:} syntax? I know of locale.format_string(), but this only accepts the old % syntax. {:n} exists, but only works as a replacement of {:d}, not for the other formats.
My current approach is below, which I expect will break for most non-trivial cases.
import locale
import string
class LocaleFormatter(string.Formatter):
def format_field(self, value, format_spec):
if format_spec[-1] not in 'eEfFgGdiouxXcrs': # types copied from locale._percent_re
return super().format_field(value, format_spec)
grouping = ',' in format_spec or '_' in format_spec
format_spec = '%' + format_spec.replace(',', '').replace('_', '')
return locale.format_string(format_spec, value, grouping)
locale.setlocale(locale.LC_ALL, '')
fmt = LocaleFormatter()
fmt.format("Length: {:,.2f} mm, width: {:,.2f} mm", 1234.56, 7890) # expected format is 1.234,56 for most locales

You can achieve what you want converting the float to decimal and setting the precision as well as manually adding leading spaces if needed:
import decimal
import locale
import re
import string
class LocaleFormatter(string.Formatter):
def format_field(self, value, format_spec):
if format_spec[-1] not in 'eEfFgGdiouxXcrs': # types copied from locale._percent_re
return super().format_field(value, format_spec)
grouping = ',' in format_spec or '_' in format_spec
prec_re = re.match(r',?(?P<spec>(?P<width>\d+)?(.(?P<precision>\d+))?)?[eEfFgGdiouxXcrs]', format_spec)
if prec_re is not None and prec_re.group('spec') is not None:
space_len = prec_re.group('width')
after_dot = prec_re.group('precision')
if after_dot is not None:
pre_dot_value_len = len(str(int(value)))
ctx = decimal.Context(prec=int(after_dot) + pre_dot_value_len)
# prec turned out to be the length of the decimal repr, not precision
value = ctx.create_decimal(value)
if space_len is not None:
after_dot = 0 if after_dot is None else int(after_dot)
pre_dot = len(str(value))
how_many = pre_dot - after_dot - 1 # -1 for the dot character
if how_many > 0:
format_spec = how_many * ' ' + format_spec
format_spec = '%' + format_spec.replace(',', '').replace('_', '')
return locale.format_string(format_spec, value, grouping)
locale.setlocale(locale.LC_ALL, 'DE-DE')
fmt = LocaleFormatter()
res = fmt.format("Length: {:,.2f} mm, width: {:,2f} mm", 1234.567878, 7890) # expected format is 1.234,56 for most locales
print(res)
Which results in:
Length: 1.234,57 mm, width: 7.890,000000 mm
Please note that the value you suggested as the correct one after formatting is not properly rounded. The one above - is.

Related

How can I parse Python's triple-quote f-strings?

I have this code that parses and processes normal "f-string" template strings (See the usage part below for an example):
from string import Formatter
import sys
_conversions = {'a': ascii, 'r': repr, 's': str}
def z(template, locals_=None):
if locals_ is None:
previous_frame = sys._getframe(1)
previous_frame_locals = previous_frame.f_locals
locals_ = previous_frame_locals
# locals_ = globals()
result = []
parts = Formatter().parse(template)
for part in parts:
literal_text, field_name, format_spec, conversion = part
if literal_text:
result.append(literal_text)
if not field_name:
continue
value = eval(field_name, locals_) #.__format__()
if conversion:
value = _conversions[conversion](value)
if format_spec:
value = format(value, format_spec)
else:
value = str(value)
result.append(value)
res = ''.join(result)
return res
Usage:
a = 'World'
b = 10
z('Hello {a} --- {a:^30} --- {67+b} --- {a!r}')
# "Hello World --- World --- 77 --- 'World'"
But it doesn't work if the template string is something like this:
z('''
echo monkey {z("curl -s https://www.poemist.com/api/v1/randompoems | jq --raw-output '.[0].content'")} end | sed -e 's/monkey/start/'
echo --------------
''')
It gives this error:
File "<string>", line 1
z("curl -s https
^
SyntaxError: EOL while scanning string literal
I am willing to even copy code from Python's source code to get this to work, if it's not possible normally.
Thanks to the tip by #ForceBru, I finished this. The following code parses and processes source tripe-quote f-strings: (Ignore the process parts)
_conversions = {'a': ascii, 'r': repr, 's': str}
def zstring(self, template, locals_=None, getframe=1):
if locals_ is None:
previous_frame = sys._getframe(getframe)
previous_frame_locals = previous_frame.f_locals
locals_ = previous_frame_locals
def asteval(astNode):
if astNode is not None:
return eval(compile(ast.Expression(astNode), filename='<string>', mode='eval'), locals_)
else:
return None
def eatFormat(format_spec, code):
res = False
if format_spec:
flags = format_spec.split(':')
res = code in flags
format_spec = list(filter(lambda a: a != code,flags))
return ':'.join(format_spec), res
p = ast.parse(f"f'''{template}'''")
result = []
parts = p.body[0].value.values
for part in parts:
typ = type(part)
if typ is ast.Str:
result.append(part.s)
elif typ is ast.FormattedValue:
# print(part.__dict__)
value = asteval(part.value)
conversion = part.conversion
if conversion >= 0:
# parser doesn't support custom conversions
conversion = chr(conversion)
value = self._conversions[conversion](value)
format_spec = asteval(part.format_spec) or ''
# print(f"orig format: {format_spec}")
format_spec, fmt_eval = eatFormat(format_spec, 'e')
format_spec, fmt_bool = eatFormat(format_spec, 'bool')
# print(f"format: {format_spec}")
if format_spec:
value = format(value, format_spec)
if fmt_bool:
value = boolsh(value)
value = str(value)
if not fmt_eval:
value = self.zsh_quote(value)
result.append(value)
cmd = ''.join(result)
return cmd

Code conversion from Python to Lua almost completed

I found a Python script that I'm trying to convert to Lua. I believe I have it just about converted, but the code isn't quite working properly, so I need assistance as I do not know Python at all, and can only guess at the intentions. This is merely a color converter to convert RGB color to xterm 256. The table is quite huge, so I've truncated it for ease of reading.
Python code:
import sys, re
CLUT = [ # color look-up table
# 8-bit, RGB hex
# Primary 3-bit (8 colors). Unique representation!
('00', '000000'),
('01', '800000'),
('02', '008000'),
('03', '808000'),
('04', '000080'),
('05', '800080'),
('06', '008080'),
('07', 'c0c0c0'),
]
def _str2hex(hexstr):
return int(hexstr, 16)
def _strip_hash(rgb):
# Strip leading `#` if exists.
if rgb.startswith('#'):
rgb = rgb.lstrip('#')
return rgb
def _create_dicts():
short2rgb_dict = dict(CLUT)
rgb2short_dict = {}
for k, v in short2rgb_dict.items():
rgb2short_dict[v] = k
return rgb2short_dict, short2rgb_dict
def short2rgb(short):
return SHORT2RGB_DICT[short]
def print_all():
""" Print all 256 xterm color codes.
"""
for short, rgb in CLUT:
sys.stdout.write('\033[48;5;%sm%s:%s' % (short, short, rgb))
sys.stdout.write("\033[0m ")
sys.stdout.write('\033[38;5;%sm%s:%s' % (short, short, rgb))
sys.stdout.write("\033[0m\n")
print "Printed all codes."
print "You can translate a hex or 0-255 code by providing an argument."
def rgb2short(rgb):
""" Find the closest xterm-256 approximation to the given RGB value.
#param rgb: Hex code representing an RGB value, eg, 'abcdef'
#returns: String between 0 and 255, compatible with xterm.
>>> rgb2short('123456')
('23', '005f5f')
>>> rgb2short('ffffff')
('231', 'ffffff')
>>> rgb2short('0DADD6') # vimeo logo
('38', '00afd7')
"""
rgb = _strip_hash(rgb)
incs = (0x00, 0x5f, 0x87, 0xaf, 0xd7, 0xff)
# Break 6-char RGB code into 3 integer vals.
parts = [ int(h, 16) for h in re.split(r'(..)(..)(..)', rgb)[1:4] ]
res = []
for part in parts:
i = 0
while i < len(incs)-1:
s, b = incs[i], incs[i+1] # smaller, bigger
if s <= part <= b:
s1 = abs(s - part)
b1 = abs(b - part)
if s1 < b1: closest = s
else: closest = b
res.append(closest)
break
i += 1
#print '***', res
res = ''.join([ ('%02.x' % i) for i in res ])
equiv = RGB2SHORT_DICT[ res ]
#print '***', res, equiv
return equiv, res
RGB2SHORT_DICT, SHORT2RGB_DICT = _create_dicts()
#---------------------------------------------------------------------
if __name__ == '__main__':
import doctest
doctest.testmod()
if len(sys.argv) == 1:
print_all()
raise SystemExit
arg = sys.argv[1]
if len(arg) < 4 and int(arg) < 256:
rgb = short2rgb(arg)
sys.stdout.write('xterm color \033[38;5;%sm%s\033[0m -> RGB exact \033[38;5;%sm%s\033[0m' % (arg, arg, arg, rgb))
sys.stdout.write("\033[0m\n")
else:
short, rgb = rgb2short(arg)
sys.stdout.write('RGB %s -> xterm color approx \033[38;5;%sm%s (%s)' % (arg, short, short, rgb))
sys.stdout.write("\033[0m\n")
And my nearly complete translated Lua code:
CLUT = {
-- Primary 3-bit (8 colors). Unique representation!
['00'] = '000000',
['01'] = '800000',
['02'] = '008000',
['03'] = '808000',
['04'] = '000080',
['05'] = '800080',
['06'] = '008080',
['07'] = 'c0c0c0',
}
function _str2hex(hexstr)
return tonumber(hexstr, 16)
end
function _strip_hash(rgb)
-- Strip leading # if exists
return rgb:gsub("^#", "")
end
function _create_dicts()
short2rgb_dict = CLUT
rgb2short_dict = {}
for k,v in pairs(short2rgb_dict) do
rgb2short_dict[v] = k
end
return rgb2short_dict, short2rgb_dict
end
function short2rgb(short)
return short2rgb_dict[short]
end
function rgb2short(rgb)
-- Find closest xterm-256 approximation to the given RGB value
_create_dicts()
rgb = _strip_hash(rgb)
local res = ""
local equiv = ""
local incs = {"0x00", "0x5f", "0x87", "0xaf", "0xd7", "0xff"}
for part in string.gmatch(rgb, "(..)") do
part = tonumber(part, 16)
i = 1
while i < #incs - 1 do
s, b = tonumber(incs[i]), tonumber(incs[i+1])
if s <= part and part <= b then
s1 = math.abs(s - part)
b1 = math.abs(b - part)
end
if s1 < b1 then
closest = s
else
closest = b
res = res .. closest
break
end
i = i + 1
end
end
equiv = rgb2short_dict[res]
return equiv, res
end
I realize that I'm missing the printing portion of the code, but I wasn't sure if that was at all relevant, and I know some of the code I've translated is not correct at all, as the script would be working otherwise. The failures I get are with the rgb2short function with it not returning the proper equiv and res values. How far off am I with my revision? What changes do I need to make to make it absolutely work?
I wound up figuring it out on my own after some hardcore trial and error. The function rgb2short should have been:
function rgb2short(rgb)
-- Find closest xterm-256 approximation to the given RGB value
_create_dicts()
rgb = _strip_hash(rgb)
local res = ""
local equiv = ""
local incs = {"0x00", "0x5f", "0x87", "0xaf", "0xd7", "0xff"}
for part in string.gmatch(rgb, "(..)") do
part = tonumber(part, 16)
i = 1
while i < #incs-1 do
s, b = tonumber(incs[i]), tonumber(incs[i+1])
if s <= part and part <= b then
s1 = math.abs(s - part)
b1 = math.abs(b - part)
--break
--end
if s1 < b1 then
closest = s
else
closest = b
end
res = res .. string.format("%02x", closest)
break
end
i = i + 1
end
end
equiv = rgb2short_dict[res]
return equiv, res
end

How to print actual unicode characters in python dict or list [duplicate]

If you have a string as below, with unicode chars, you can print it, and get the unescaped version:
>>> s = "äåö"
>>> s
'\xc3\xa4\xc3\xa5\xc3\xb6'
>>> print s
äåö
but if we have a list containing the string above and print it:
>>> s = ['äåö']
>>> s
['\xc3\xa4\xc3\xa5\xc3\xb6']
>>> print s
['\xc3\xa4\xc3\xa5\xc3\xb6']
You still get escaped character sequences. How do you go about to get the content of the list unescaped, is it possible? Like this:
>>> print s
['äåö']
Also, if the strings are of the unicode type, how do you go about doing the same as above?
>>> s = u'åäö'
>>> s
u'\xe5\xe4\xf6'
>>> print s
åäö
>>> s = [u'åäö']
>>> s
[u'\xe5\xe4\xf6']
>>> print s
[u'\xe5\xe4\xf6']
When you print a string, you get the output of the __str__ method of the object - in this case the string without quotes. The __str__ method of a list is different, it creates a string containing the opening and closing [] and the string produced by the __repr__ method of each object contained within. What you're seeing is the difference between __str__ and __repr__.
You can build your own string instead:
print '[' + ','.join("'" + str(x) + "'" for x in s) + ']'
This version should work on both Unicode and byte strings in Python 2:
print u'[' + u','.join(u"'" + unicode(x) + u"'" for x in s) + u']'
Is this satisfactory?
>>> s = ['äåö', 'äå']
>>> print "\n".join(s)
äåö
äå
>>> print ", ".join(s)
äåö, äå
>>> s = [u'åäö']
>>> print ",".join(s)
åäö
In Python 2.x the default is what you're experiencing:
>>> s = ['äåö']
>>> s
['\xc3\xa4\xc3\xa5\xc3\xb6']
In Python 3, however, it displays properly:
>>> s = ['äåö']
>>> s
['äåö']
Another solution
s = ['äåö', 'äå']
encodedlist=', '.join(map(unicode, s))
print(u'[{}]'.format(encodedlist).encode('UTF-8'))
gives
[äåö, äå]
One can use this wrapper class:
#!/usr/bin/python
# -*- coding: utf-8 -*-
class ReprToStrString(str):
def __repr__(self):
return "'" + self.__str__() + "'"
class ReprToStr(object):
def __init__(self, printable):
if isinstance(printable, str):
self._printable = ReprToStrString(printable)
elif isinstance(printable, list):
self._printable = list([ReprToStr(item) for item in printable])
elif isinstance(printable, dict):
self._printable = dict(
[(ReprToStr(key), ReprToStr(value)) for (key, value) in printable.items()])
else:
self._printable = printable
def __repr__(self):
return self._printable.__repr__()
russian1 = ['Валенки', 'Матрёшка']
print russian1
# Output:
# ['\xd0\x92\xd0\xb0\xd0\xbb\xd0\xb5\xd0\xbd\xd0\xba\xd0\xb8', '\xd0\x9c\xd0\xb0\xd1\x82\xd1\x80\xd1\x91\xd1\x88\xd0\xba\xd0\xb0']
print ReprToStr(russian1)
# Output:
# ['Валенки', 'Матрёшка']
russian2 = {'Валенки': 145, 'Матрёшка': 100500}
print russian2
# Output:
# {'\xd0\x92\xd0\xb0\xd0\xbb\xd0\xb5\xd0\xbd\xd0\xba\xd0\xb8': 145, '\xd0\x9c\xd0\xb0\xd1\x82\xd1\x80\xd1\x91\xd1\x88\xd0\xba\xd0\xb0': 100500}
print ReprToStr(russian2)
# Output:
# {'Матрёшка': 100500, 'Валенки': 145}

Python : How to translate?

the program is when user input"8#15#23###23#1#19###9#20"
output should be "HOW WAS IT"
However,it could not work to show space(###).
enter code here
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
def from_abstract(s):
result = ''
for word in s.split('*'):
result = result +ABSTRACT_SHIFTED.get(word)
return result
This would do the trick:
#!/usr/bin/env python
InputString = "8#15#23###23#1#19###9#20"
InputString = InputString.replace("###", "##")
InputString = InputString.split("#")
DecodedMessage = ""
for NumericRepresentation in InputString:
if NumericRepresentation == "":
NumericRepresentation = " "
DecodedMessage += NumericRepresentation
continue
else:
DecodedMessage += chr(int(NumericRepresentation) + 64)
print(DecodedMessage)
Prints:
HOW WAS IT
you can also use a regex
import re
replacer ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
reversed = {value:key for key,value in replacer.items()}
# Reversed because regex is greedy and it will match 1 before 15
target = '8#15#23###23#1#19###9#20'
pattern = '|'.join(map(lambda x: x + '+', list(reversed.keys())[::-1]))
repl = lambda x: reversed[x.group(0)]
print(re.sub(pattern, string=target, repl=repl))
And prints:
HOW WAS IT
With a couple minimal changes to your code it works.
1) split on '#', not '*'
2) retrieve ' ' by default if a match isn't found
3) use '##' instead of '###'
def from_abstract(s):
result = ''
for word in s.replace('###','##').split('#'):
result = result +ABSTRACT_SHIFTED.get(word," ")
return result
Swap the key-value pairs of ABSTRACT and use simple split + join on input
ip = "8#15#23###23#1#19###9#20"
ABSTRACT = dict((v,k) for k,v in ABSTRACT.items())
''.join(ABSTRACT.get(i,' ') for i in ip.split('#')).replace(' ', ' ')
#'HOW WAS IT'
The biggest challenge here is that "#" is used as a token separator and as the space character, you have to know the context to tell which you've got at any given time, and that makes it difficult to simply split the string. So write a simple parser. This one will accept anything as the first character in a token and then grab everything until it sees the next "#".
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
user_input = "8#15#23###23#1#19###9#20"
def from_abstract(s):
result = []
while s:
print 'try', s
# tokens are terminated with #
idx = s.find("#")
# ...except at end of line
if idx == -1:
idx = len(s) - 1
token = s[:idx]
s = s[idx+1:]
result.append(ABSTRACT_SHIFTED.get(token, ' '))
return ''.join(result)
print from_abstract(user_input)

Converting exponential to float

This is my code, trying to convert the second field of the line from exponential into float.
outputrrd = processrrd.communicate()
(output, error) = outputrrd
output_lines = output.split('\n')
for line in output_lines:
m = re.search(r"(.*): ", line)
if m != None:
felder = line.split(': ')
epoch = felder[0].strip(':')
utc = epoch2normal(epoch).strip("\n")
#print felder[1]
data = float(felder[1])
float_data = data * 10000000
print float_data
resultslist.append( utc + ' ' + hostname + ' ' + float_data)
But, the program stops with this error:
File "/opt/omd/scripts/python/livestatus/rrdfetch-convert.py", line 156, in <module>
data = float(felder[1])
ValueError: invalid literal for float(): 6,0865000000e-01
Does anyone know the reason?
The easy way is replace! One simple example:
value=str('6,0865000000e-01')
value2=value.replace(',', '.')
float(value2)
0.60865000000000002
The reason is the use of comma in 6,0865000000e-01. This won't work because float() is not locale-aware. See PEP 331 for details.
Try locale.atof(), or replace the comma with a dot.
The float is correct, just use format to display it as you want, i.e.:
print(format(the_float, '.8f'))
I think it is useful to you:
def remove_exponent(value):
"""
>>>(Decimal('5E+3'))
Decimal('5000.00000000')
"""
decimal_places = 8
max_digits = 16
if isinstance(value, decimal.Decimal):
context = decimal.getcontext().copy()
context.prec = max_digits
return "{0:f}".format(value.quantize(decimal.Decimal(".1") ** decimal_places, context=context))
else:
return "%.*f" % (decimal_places, value)
Simply by casting string into float:
new_val = float('9.81E7')
This work for me, try it out.
def remove_exponent(value):
decial = value.split('e')
ret_val = format(((float(decial[0]))*(10**int(decial[1]))), '.8f')
return ret_val
I had a similar issue trying to convert from string in scientific/exponential notation to a float number (that can result also in exponential notation if too long)
num = '-8e-05'
def scientific_to_float(exponential):
split_word = 'e'
e_index = exponential.index('e')
base = float(exponential[:e_index])
exponent = float(exponential[e_index + 1:])
float_number = base * (10 ** exponent)
return float_number
scientific_to_float(num) # return -8e-05 float number

Categories

Resources