Regular expression for this function - python

I want to simplify this function, using regular expressions.
the sample input can be
text =' At&T, " < I am > , At&T so < < & & '
My code:
def replaceentity(text):
import re
import uuid
from cgi import escape
invalid_chars_map = {'&':'&', '<':'<', '>': '>', '"': """}
replace_values = {'<':'<', '>':'>'}
replaced_dict = {}
for key, value in replace_values.items():
text = text.replace(key, value)
print "after replace >>>>>> " + text
for word in text.split():
if word in invalid_chars_map.values():
print word
uid = str(uuid.uuid4())
text = text.replace(word, uid)
replaced_dict[uid] = word
text = escape(text)
for i in replaced_dict.keys():
text = text.replace(i, replaced_dict[i])
print text

Is this what you want?
>>> from cgi import escape
>>> escaped = escape("""'At&T, " < I am > , At&T so < &lt """)
>>> escaped
'\'At&T, " < I am > , At&T so &#60; &lt '

Related

How to get hex value according to skinid

I want to find and get hex value keyword from SkinId input.
Download example file for read by myself : Example file
import re, os
os.system('color 4')
def find_keyword(skinId):
with open("infos/116_JingKe_actorinfo.bytes", "rb") as f:
byte_string = f.read()
key_word = f"116_JingKe/{skinId}".encode('utf-8')
#key_word = b'TypeSystem.String'
matches = re.findall(key_word, byte_string)
start_keyword = b'JTPri'
end_keyword = b'LOD'
# Define the regular expression pattern to match the start and end keywords
#pattern = re.compile(start_keyword + b".*?" + end_keyword + b"\d*", re.DOTALL)
#pattern = re.compile(start_keyword + b".*?(?=LOD|Show\d)", re.DOTALL)
pattern = re.compile(start_keyword + b".*?(?=LOD|Show)" + end_keyword + b"\d*", re.DOTALL)
#pattern = re.compile(start_keyword + b".*?" + end_keyword, re.DOTALL)
# Find all occurrences of the pattern in the file contents
matches = pattern.findall(byte_string)
for match in matches:
if key_word in match:
print(match)
while True:
skinId = input(" >>> SKIN ID :")
find_keyword(skinId)
if skinId == 'x':
break
os.system('pause')
os.system('cls')
Result : https://i.stack.imgur.com/zOGNL.png
Desired result :
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD1'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD2'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD3'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show1
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show2'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show3'

Find and replace in string re.insensitive

I have this code to find matches in a string, im using in a search to mark the wordds that match my search, i need this to be case insensitive, the issue here is that it replaces the word by the one we search.
val_to_searchp = "this Text string has alot of teXt"
word = "TEXT"
pal2rep = str(":::")+word+str(":::")
val_to_search = re.sub(re.escape(word), pal2rep, val_to_searchp, flags=re.IGNORECASE)
this will return
"this :::TEXT::: string has alot of :::TEXT:::"
I need it to return
"this :::Text::: string has alot of :::teXt:::"
Also tryed with this but its not working very well :(
f = 0
s = 0
val_to_search = val_to_searchp
for m in re.finditer(str(word), str(val_to_searchp)):
inicio = int(m.start()+s)
fim = int(m.end()+f)
val_to_search = val_to_search[:inicio] \
+ str(":::") \
+ val_to_search[inicio:fim] \
+ str(":::") \
+ val_to_search[fim:].strip()
f = f+2
s = s+1
This is my actuall code
def findtext():
if len(str(findtext_inp.get('1.0', END)))>1:
val_to_searchp = str(respon_txt.get(1.0, END).replace(html.unescape('⛔'), "").strip())
respon_txt.delete(1.0, END)
word = str(findtext_inp.get('1.0', END).strip())
pal2rep = str(str(html.unescape('⛔'))+word+str(html.unescape('⛔')))
val_to_search = re.sub(re.escape(word), pal2rep, val_to_searchp, flags=re.IGNORECASE)
"""
f = 0
s = 0
for m in re.finditer(str(word), str(val_to_search)):
inicio = int(m.start()+s)
fim = int(m.end()+f)
val_to_search = val_to_search[:inicio] \
+ str(html.unescape('⛔')) \
+ val_to_search[inicio:fim] \
+ str(html.unescape('⛔')) \
+ val_to_search[fim:].strip()
f = f+2
s = s+1
"""
respon_txt.insert(1.0, val_to_search)#val_to_search.replace(findtext_inp.get('1.0', END).strip() , str(html.unescape('⛔')+findtext_inp.get('1.0', END).strip())+html.unescape('⛔')))
I'm sure there's a way to do this with RE but it's really trivial without the aid of that module.
val_to_searchp = "this Text string has alot of teXt\nThis also has a lot of text"
text = 'TEXT'
def func(s, txt):
txt = txt.lower()
result = []
for line in s.split('\n'):
for i, e in enumerate(t := line.split()):
if e.lower() == txt:
t[i] = f':::{e}:::'
result.append(' '.join(t))
return '\n'.join(result)
print(func(val_to_searchp, text))
Output:
this :::Text::: string has alot of :::teXt:::
This also has a lot of :::text:::
This is a rewrite of my original answer. In the comments for that answer you will see that the OP has changed his mind about how this needs to work. This now (hopefully) complies with the altered specification:
val_to_searchp = '''{\"configurationKey\":[{\"key\":\"GetMaxKeys\",\"readonly\":true,\"value\":\"20\"}'''
text = 'GetMaxKeys'
def func(s, txt):
result = []
sl = s.lower()
txt = txt.lower()
lt = len(txt)
offset = 0
while (i := sl[offset:].find(txt)) >= 0:
result.append(s[offset:i+offset])
offset += i
result.append(f':::{s[offset:offset+lt]}:::')
offset += lt
result.append(s[offset:])
return ''.join(result)
print(func(val_to_searchp, text))
Output:
{"configurationKey":[{"key":":::GetMaxKeys:::","readonly":true,"value":"20"}

python logic to extract pattern of strings in list

text = [('Automated', 'PROPN'), ('Unit', 'PROPN'), ('testing', 'NOUN'), ('design', 'NOUN'), ('and', 'CCONJ'), ('implementation', 'NOUN'), ('experience', 'NOUN')]
pattern =['NOUN', 'CCONJ', 'NOUN', 'NOUN']
#program should extract =>>>> design and implementation experience
#can be done using regex or list methods or string methods
'''
def get_matched(text,patterns): # patterns => list of cases
punctuations = ''',-'''
s = ""
for char in text:
if char not in punctuations:
s = s + char
if "/" in s : # replacing "/" with or because "/" is recognised as symbol in pos tags
s = s.replace('/', ' or ')
res = re.sub(' +', ' ', s)
doc = nlp(res)
words = []
for case in patterns:
matcher = Matcher(nlp.vocab)
matcher.add("matching_1",[case[1::]])
matches = matcher(doc)
for match_id,start,end in matches:
span = doc[start:end]
l = span.text.split()
if len(l) > 0 :
#print(l)
index = case[0]
try:
for i in range (len(index)):
temp = ""
for ind in index[i]:
temp = temp + l[ind] + " "
words.append(temp)
words.append(' '.join(l),text)
print("->>>>>>>>>>>",text)
except:
print(case,l)
return words
'''
i i done it with sapcy matching but i have toi write logic for it
temp = ""
for i in pos_tags:
temp = temp +i
for i in cases:
case = ''.join(i[0])
if temp.find(case) > 0:
case_lenght = len(i[0])
for ind in range(len(pos_tags)-(case_lenght-1)):
if pos_tags[ind:(case_lenght+ind)] == i[0]:
extracted_sent = text[ind:ind+case_lenght]
for tuple in i[1]:
word = ""
for index in tuple:
word = word + " " + text[index]
print(word)

Python : How to translate?

the program is when user input"8#15#23###23#1#19###9#20"
output should be "HOW WAS IT"
However,it could not work to show space(###).
enter code here
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
def from_abstract(s):
result = ''
for word in s.split('*'):
result = result +ABSTRACT_SHIFTED.get(word)
return result
This would do the trick:
#!/usr/bin/env python
InputString = "8#15#23###23#1#19###9#20"
InputString = InputString.replace("###", "##")
InputString = InputString.split("#")
DecodedMessage = ""
for NumericRepresentation in InputString:
if NumericRepresentation == "":
NumericRepresentation = " "
DecodedMessage += NumericRepresentation
continue
else:
DecodedMessage += chr(int(NumericRepresentation) + 64)
print(DecodedMessage)
Prints:
HOW WAS IT
you can also use a regex
import re
replacer ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
reversed = {value:key for key,value in replacer.items()}
# Reversed because regex is greedy and it will match 1 before 15
target = '8#15#23###23#1#19###9#20'
pattern = '|'.join(map(lambda x: x + '+', list(reversed.keys())[::-1]))
repl = lambda x: reversed[x.group(0)]
print(re.sub(pattern, string=target, repl=repl))
And prints:
HOW WAS IT
With a couple minimal changes to your code it works.
1) split on '#', not '*'
2) retrieve ' ' by default if a match isn't found
3) use '##' instead of '###'
def from_abstract(s):
result = ''
for word in s.replace('###','##').split('#'):
result = result +ABSTRACT_SHIFTED.get(word," ")
return result
Swap the key-value pairs of ABSTRACT and use simple split + join on input
ip = "8#15#23###23#1#19###9#20"
ABSTRACT = dict((v,k) for k,v in ABSTRACT.items())
''.join(ABSTRACT.get(i,' ') for i in ip.split('#')).replace(' ', ' ')
#'HOW WAS IT'
The biggest challenge here is that "#" is used as a token separator and as the space character, you have to know the context to tell which you've got at any given time, and that makes it difficult to simply split the string. So write a simple parser. This one will accept anything as the first character in a token and then grab everything until it sees the next "#".
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
user_input = "8#15#23###23#1#19###9#20"
def from_abstract(s):
result = []
while s:
print 'try', s
# tokens are terminated with #
idx = s.find("#")
# ...except at end of line
if idx == -1:
idx = len(s) - 1
token = s[:idx]
s = s[idx+1:]
result.append(ABSTRACT_SHIFTED.get(token, ' '))
return ''.join(result)
print from_abstract(user_input)

substitute only some numbers by their decrement

I have this string: a9*a9 + a10*a10
and I would like to have: a9*a8 + a10*a9
I think re.sub() from Python should be useful, but I am not familiar with group() that I've seen in some examples. Any help would be appreciated.
here's another solution method:
import re
s = 'a9*a9 + a10*a10 + a8*a8 + a255*a255 + b58*b58 + c58*c58'
string = re.sub('[ ]', '', s) # removed whitespace from string (optional:only if you are not sure how many space you can get in string)
x = string.split('+')
pattern = re.compile(r'([a-z])([\d]+)')
ans = ''
for element in x:
for letter, num in re.findall(pattern, element):
st = ''
for i in range(len(element.split('*'))):
st = st + '*' + (letter+str(int(num)-i))
# print(str(letter) + str(int(num)-i))
ans = ans + '+' + st[1:]
print(ans[1:])
Output :
a9*a8+a10*a9+a8*a7+a255*a254+b58*b57+c58*c57
Assuming the structure of the input is a\d*a\d + a\d*a\d + ... you can use a callback in the re.sub function:
import re
def decrement(match):
if match.group(1) != match.group(2):
return match.group(0)
return 'a{}*a{}'.format(match.group(1), str(int(match.group(2)) - 1))
re.sub(r'a(\d)\*a(\d)', decrement, 'a3*a3 + a5*a5 + a3*a7')
# a3*a2 + a5*a4 + a3*a7

Categories

Resources