Python textwrap.dedent with comment does not work as expected - python

I'm using Python 3.6 on visual studio.
Below is python interactive log
>>> from textwrap import dedent
>>> dedent("\ta = 4\n")
'a = 4\n'
>>> dedent("\ta = 4\n#")
'\ta = 4\n#'
>>> dedent("\ta = 4\n\t#")
'a = 4\n#'
>>> dedent("\ta = 4\n\t\n#")
'\ta = 4\n\n#'
>>> dedent("\ta = 4\n\t\n\t#")
'a = 4\n\n#'
>>> dedent("\ta = 4\n\t#\n\t")
'a = 4\n#\n'
>>> dedent("\ta = 4\n\t\n#\t")
'\ta = 4\n\n#\t'
>>>
dedent properly unindents leading tab on first case, but when i added '#', leading tab does not unindents. Is this intended?

textwrap.dedent() assumes its argument is plain text, not Python source code. It doesn't interpret "#" as a comment.
These two examples will behave similarly:
>>> dedent("\ta = 4\n#")
'\ta = 4\n#'
>>> dedent("\ta = 4\nx")
'\ta = 4\nx'

Related

Python Case Insensitive Replace All of multiple strings

I want to replace all occurrences of a set of strings in a text line. I came up with this approach, but I am sure there is a better way of doing this:
myDict = {}
test = re.compile(re.escape('pig'), re.IGNORECASE)
myDict['car'] = test
test = re.compile(re.escape('horse'), re.IGNORECASE)
myDict['airplane'] = test
test = re.compile(re.escape('cow'), re.IGNORECASE)
myDict['bus'] = test
mystring = 'I have this Pig and that pig with a hOrse and coW'
for key in myDict:
regex_obj = myDict[key]
mystring = regex_obj.sub(key, mystring)
print mystring
I have this car and that car with a airplane and bus
Based on #Paul Rooney's answer below, ideally I would do this:
def init_regex():
rd = {'pig': 'car', 'horse':'airplane', 'cow':'bus'}
myDict = {}
for key,value in rd.iteritems():
pattern = re.compile(re.escape(key), re.IGNORECASE)
myDict[value] = pattern
return myDict
def strrep(mystring, patternDict):
for key in patternDict:
regex_obj = patternDict[key]
mystring = regex_obj.sub(key, mystring)
return mystring
Try
import itertools
import re
mystring = 'I have this Pig and that pig with a hOrse and coW'
rd = {'pig': 'car', 'horse':'airplane', 'cow':'bus'}
cachedict = {}
def strrep(orig, repdict):
for k,v in repdict.iteritems():
if k in cachedict:
pattern = cachedict[k]
else:
pattern = re.compile(k, re.IGNORECASE)
cachedict[k] = pattern
orig = pattern.sub(v, orig)
return orig
print strrep(mystring, rd)
This answer was initially written for python2, but for python 3 you would use repdict.items instead of repdict.iteritems.

pattern to dictionary of lists Python

I have a file like this
module1 instance1(.wire1 (connectionwire1), .wire2 (connectionwire2),.... ,wire100 (connectionwire100)) ; module 2 instance 2(.wire1 (newconnectionwire1), .wire2 (newconnectionwire2),.... ,wire99 (newconnectionwire99))
Ther wires are repeated along modules. There can be many modules.
I want to build a dictionary like this (not every wire in 2nd module is a duplicate).
[wire1:[(module1, instance1, connection1), (module2, instance2,newconnection1), wire2:[(module1 instance1 connection2),(module2, instance2,newconnection1)]... wire99:module2, instance2, connection99), ]
I am splitting the string on ; then splitting on , and then ( to get wire and connectionwire strings . I am not sure how to fill the data structure though so the wire is the key and module, instancename and connection are elements.
Goal- get this datastructure- [ wire: (module, instance, connectionwire) ]
filedata=file.read()
realindex=list(find_pos(filedata,';'))
tempindex=0
for l in realindex:
module=filedata[tempindex:l]
modulename=module.split()[0]
openbracketindex=module.find("(")
closebracketindex=module.strip("\n").find(");")
instancename=module[:openbracketindex].split()[1]
tempindex=l
tempwires=module[openbracketindex:l+1]
#got to split wires on commas
for tempw in tempwires.split(","):
wires=tempw
listofwires.append(wires)
Using the re module.
import re
from collections import defaultdict
s = "module1 instance1(.wire1 (connectionwire1), .wire2 (connectionwire2), .wire100 (connectionwire100)) ; module2 instance2(.wire1 (newconnectionwire1), .wire2 (newconnectionwire2), wire99 (newconnectionwire99))'
d = defaultdict(list)
module_pattern = r'(\w+)\s(\w+)\(([^;]+)'
mod_rex = re.compile(module_pattern)
wire_pattern = r'\.(\w+)\s\(([^\)]+)'
wire_rex = re.compile(wire_pattern)
for match in mod_rex.finditer(s):
#print '\n'.join(match.groups())
module, instance, wires = match.groups()
for match in wire_rex.finditer(wires):
wire, connection = match.groups()
#print '\t', wire, connection
d[wire].append((module, instance, connection))
for k, v in d.items():
print k, ':', v
Produces
wire1 : [('module1', 'instance1', 'connectionwire1'), ('module2', 'instance2', 'newconnectionwire1')]
wire2 : [('module1', 'instance1', 'connectionwire2'), ('module2', 'instance2', 'newconnectionwire2')]
wire100 : [('module1', 'instance1', 'connectionwire100')]
Answer provided by wwii using re is correct. I'm sharing an example of how you can solve your problem using pyparsing module which makes parsing human readable and easy to do.
from pyparsing import Word, alphanums, Optional, ZeroOrMore, Literal, Group, OneOrMore
from collections import defaultdict
s = 'module1 instance1(.wire1 (connectionwire1), .wire2 (connectionwire2), .wire100 (connectionwire100)) ; module2 instance2(.wire1 (newconnectionwire1), .wire2 (newconnectionwire 2), .wire99 (newconnectionwire99))'
connection = Word(alphanums)
wire = Word(alphanums)
module = Word(alphanums)
instance = Word(alphanums)
dot = Literal(".").suppress()
comma = Literal(",").suppress()
lparen = Literal("(").suppress()
rparen = Literal(")").suppress()
semicolon = Literal(";").suppress()
wire_connection = Group(dot + wire("wire") + lparen + connection("connection") + rparen + Optional(comma))
wire_connections = Group(OneOrMore(wire_connection))
module_instance = Group(module("module") + instance("instance") + lparen + ZeroOrMore(wire_connections("wire_connections")) + rparen + Optional(semicolon))
module_instances = OneOrMore(module_instance)
results = module_instances.parseString(s)
# create a dict
d = defaultdict(list)
for r in results:
m = r['module']
i = r['instance']
for wc in r['wire_connections']:
w = wc['wire']
c = wc['connection']
d[w].append((m, i, c))
print d
Output:
defaultdict(<type 'list'>, {'wire1': [('module1', 'instance1', 'connectionwire1'), ('module2', 'instance2', 'newconnectionwire1')], 'wire2': [('module1', 'instance1', 'connectionwire2'), ('module2', 'instance2', 'newconnectionwire2')], 'wire100': [('module1', 'instance1', 'connectionwire100')], 'wire99': [('module2', 'instance2', 'newconnectionwire99')]})

How to replace text in curly brackets with another text based on comparisons using Python Regex

I am quiet new to regular expressions. I have a string that looks like this:
str = "abc/def/([default], [testing])"
and a dictionary
dict = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
and using Python RE, I want str in this form, after comparisons of each element in dict to str:
str = "abc/def/(2.7, 2.1)"
Any help how to do it using Python RE?
P.S. its not the part of any assignment, instead it is the part of my project at work and I have spent many hours to figure out solution but in vain.
import re
st = "abc/def/([default], [testing], [something])"
dic = {'abc/def/[default]' : '2.7',
'abc/def/[testing]' : '2.1',
'bcd/xed/[something]' : '3.1'}
prefix_regex = "^[\w*/]*"
tag_regex = "\[\w*\]"
prefix = re.findall(prefix_regex, st)[0]
tags = re.findall(tag_regex, st)
for key in dic:
key_prefix = re.findall(prefix_regex, key)[0]
key_tag = re.findall(tag_regex, key)[0]
if prefix == key_prefix:
for tag in tags:
if tag == key_tag:
st = st.replace(tag, dic[key])
print st
OUTPUT:
abc/def/(2.7, 2.1, [something])
Here is a solution using re module.
Hypotheses :
there is a dictionary whose keys are composed of a prefix and a variable part, the variable part is enclosed in brackets ([])
the values are strings by which the variable parts are to be replaced in the string
the string is composed by a prefix, a (, a list of variable parts and a )
the variable parts in the string are enclosed in []
the variable parts in the string are separated by a comma followed by optional spaces
Python code :
import re
class splitter:
pref = re.compile("[^(]+")
iden = re.compile("\[[^]]*\]")
def __init__(self, d):
self.d = d
def split(self, s):
m = self.pref.match(s)
if m is not None:
p = m.group(0)
elts = self.iden.findall(s, m.span()[1])
return p, elts
return None
def convert(self, s):
p, elts = self.split(s)
return p + "(" + ", ".join((self.d[p + elt] for elt in elts)) + ")"
Usage :
s = "abc/def/([default], [testing])"
d = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
sp = splitter(d)
print(sp.convert(s))
output :
abc/def/(2.7, 2.1)
Regex is probably not required here. Hope this helps
lhs,rhs = str.split("/(")
rhs1,rhs2 = rhs.strip(")").split(", ")
lhs+="/"
print "{0}({1},{2})".format(lhs,dict[lhs+rhs1],dict[lhs+rhs2])
output
abc/def/(2.7,2.1)

Python - how to parse this with regex correctly? its parsing all the E.164 but except the local format

Its working for 0032, 32, +32 but not as 0487365060 (which is a valid term)
to_user = "0032487365060"
# ^(?:\+|00)(\d+)$ Parse the 0032, 32, +32 & 0487365060
match = re.search(r'^(?:\+|00)(\d+)$', to_user)
to_user = "32487365060"
match = re.search(r'^(?:\+|00)(\d+)$', to_user)
to_user = "+32487365060"
match = re.search(r'^(?:\+|00)(\d+)$', to_user)
Not working:
to_user = "0487365060"
match = re.search(r'^(?:\+|00)(\d+)$', to_user)
Your last example doesn't work because it does not start with either + or 00. If that is optional you need to mark it as such:
r'^(?:\+|00)?(\d+)$'
Note that neither does your second example match; it doesn't start with + or 00 either.
Demo:
>>> import re
>>> samples = ('0032487365060', '32487365060', '+32487365060', '0487365060')
>>> pattern = re.compile(r'^(?:\+|00)?(\d+)$')
>>> for sample in samples:
... match = pattern.search(sample)
... if match is not None:
... print 'matched:', match.group(1)
... else:
... print 'Sample {} did not match'.format(sample)
...
matched: 32487365060
matched: 32487365060
matched: 32487365060
matched: 0487365060
Taking account of the question AND the comment, and in absence of more info (particularly on the length of the sequence of digits that must follow the 32 part, and if it is always 32 or may be another sequence), what I finally understand you want cab be obtained with:
import re
for to_user in ("0032487365060",
"32487365060",
"+32487365060",
"0487365060"):
m = re.sub('^(?:\+32|0032|32|0)(\d{9})$','32\\1', to_user)
print m
Something like this #eyquem method, to cover all the international codes from + and 00 into without +, 00 only for Belgium it should be default 32+the number:
import re
for to_user in (# Belgium
"0032487365060",
"32487365060",
"+32487365060",
"0487365060",
# USA
"0012127773456",
"12127773456",
"+12127773456",
# UK
"004412345678",
"4412345678",
"+4412345678"):
m = re.sub('^(?:\+|00|32|0)(\d{9})$','32\\1', to_user)
m = m.replace("+","")
m = re.sub('^(?:\+|00)(\d+)$', '\\1', m)
print m
Output:
32487365060
32487365060
32487365060
32487365060
12127773456
12127773456
12127773456
4412345678
4412345678
4412345678
Execution Successful!
Why not to use phonenumbers lib
>>> phonenumbers.parse("0487365060", "BE")
PhoneNumber(country_code=32, national_number=487365060, extension=None, italian_leading_zero=None, number_of_leading_zeros=None, country_code_source=0, preferred_domestic_carrier_code=None)
and other 3 is ok to
>>> phonenumbers.parse("0032487365060", "BE")
PhoneNumber(country_code=32, national_number=487365060, extension=None, italian_leading_zero=None, number_of_leading_zeros=None, country_code_source=0, preferred_domestic_carrier_code=None)
>>> phonenumbers.parse("+320487365060", "BE")
PhoneNumber(country_code=32, national_number=487365060, extension=None, italian_leading_zero=None, number_of_leading_zeros=None, country_code_source=0, preferred_domestic_carrier_code=None)
>>> phonenumbers.parse("320487365060", "BE")
PhoneNumber(country_code=32, national_number=487365060, extension=None, italian_leading_zero=None, number_of_leading_zeros=None, country_code_source=0, preferred_domestic_carrier_code=None)

Displaying prices

I'm getting prices in different currencies and want to display Brazilian R$
My formatting doesn't work and the display looks like this:
Price: 1.15..000.,00 R$
For good flexibility I've stored the price as a string: price=db.StringProperty(verbose_name="price")
I tried to implement my own filter and it didn't work:
{{ ad.price|separate }} R$
def separate(n, sep='.'):
ln = list(str(n))
ln.reverse()
newn = []
while len(ln) > 3:
newn.extend(ln[:3])
newn.append(sep)
ln = ln[3:]
newn.extend(ln)
newn.reverse()
return "".join(newn)
Can you help me? Should I just remove the filter? Should I enforce some regex to the input instead? A link to my site is http://www.koolbusiness.com/servead/4252196
UPDATE: I'm considering using something like one of these filters:
import locale
locale.setlocale(locale.LC_ALL, '')
def currency(value): # doesn't work
locale.setlocale(locale.LC_ALL, '')
return locale.currency(value, grouping=True)
register.filter(currency)
def currencyWithoutUsingLocale(value): # needs adjustment
value=float(value)
symbol = '$'
thousand_sep = ''
decimal_sep = ''
# try to use settings if set
try:
symbol = settings.CURRENCY_SYMBOL
except AttributeError:
pass
try:
thousand_sep = settings.THOUSAND_SEPARATOR
decimal_sep = settings.DECIMAL_SEPARATOR
except AttributeError:
thousand_sep = ','
decimal_sep = '.'
intstr = str(int(value))
f = lambda x, n, acc=[]: f(x[:-n], n, [(x[-n:])]+acc) if x else acc
intpart = thousand_sep.join(f(intstr, 3))
return "%s%s%s%s" % (symbol, intpart, decimal_sep, ("%0.2f" % value)[-2:])
register.filter(currencyWithoutUsingLocale)
Storing the price as a string is the first problem. It should be a Decimal. If you look at the Python standard library documentation for Decimal, you will see this http://docs.python.org/library/decimal.html#recipes
That moneyfmt recipe should do what you want
Currency formatting is a feature of locale.
http://docs.python.org/library/locale.html#locale.currency
You can use locale._override_localeconv dict to set your own overwrites:
>>> import locale
>>> locale.setlocale(locale.LC_MONETARY, 'pt_BR.UTF-8')
>>> locale.currency(1234.56)
'1234,56 R$'
>>> locale._override_localeconv.update({'p_cs_precedes': 1, 'n_cs_precedes': 1})
>>> locale.currency(1234.56)
'R$ 1234,56'
You can use this locale._override_localeconv.update({'p_cs_precedes': 1, 'n_cs_precedes': 1}) line right after the import locale, if you want.

Categories

Resources