substitute only some numbers by their decrement - python

I have this string: a9*a9 + a10*a10
and I would like to have: a9*a8 + a10*a9
I think re.sub() from Python should be useful, but I am not familiar with group() that I've seen in some examples. Any help would be appreciated.

here's another solution method:
import re
s = 'a9*a9 + a10*a10 + a8*a8 + a255*a255 + b58*b58 + c58*c58'
string = re.sub('[ ]', '', s) # removed whitespace from string (optional:only if you are not sure how many space you can get in string)
x = string.split('+')
pattern = re.compile(r'([a-z])([\d]+)')
ans = ''
for element in x:
for letter, num in re.findall(pattern, element):
st = ''
for i in range(len(element.split('*'))):
st = st + '*' + (letter+str(int(num)-i))
# print(str(letter) + str(int(num)-i))
ans = ans + '+' + st[1:]
print(ans[1:])
Output :
a9*a8+a10*a9+a8*a7+a255*a254+b58*b57+c58*c57

Assuming the structure of the input is a\d*a\d + a\d*a\d + ... you can use a callback in the re.sub function:
import re
def decrement(match):
if match.group(1) != match.group(2):
return match.group(0)
return 'a{}*a{}'.format(match.group(1), str(int(match.group(2)) - 1))
re.sub(r'a(\d)\*a(\d)', decrement, 'a3*a3 + a5*a5 + a3*a7')
# a3*a2 + a5*a4 + a3*a7

Related

How to get hex value according to skinid

I want to find and get hex value keyword from SkinId input.
Download example file for read by myself : Example file
import re, os
os.system('color 4')
def find_keyword(skinId):
with open("infos/116_JingKe_actorinfo.bytes", "rb") as f:
byte_string = f.read()
key_word = f"116_JingKe/{skinId}".encode('utf-8')
#key_word = b'TypeSystem.String'
matches = re.findall(key_word, byte_string)
start_keyword = b'JTPri'
end_keyword = b'LOD'
# Define the regular expression pattern to match the start and end keywords
#pattern = re.compile(start_keyword + b".*?" + end_keyword + b"\d*", re.DOTALL)
#pattern = re.compile(start_keyword + b".*?(?=LOD|Show\d)", re.DOTALL)
pattern = re.compile(start_keyword + b".*?(?=LOD|Show)" + end_keyword + b"\d*", re.DOTALL)
#pattern = re.compile(start_keyword + b".*?" + end_keyword, re.DOTALL)
# Find all occurrences of the pattern in the file contents
matches = pattern.findall(byte_string)
for match in matches:
if key_word in match:
print(match)
while True:
skinId = input(" >>> SKIN ID :")
find_keyword(skinId)
if skinId == 'x':
break
os.system('pause')
os.system('cls')
Result : https://i.stack.imgur.com/zOGNL.png
Desired result :
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD1'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD2'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD3'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show1
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show2'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show3'

Python : How to translate?

the program is when user input"8#15#23###23#1#19###9#20"
output should be "HOW WAS IT"
However,it could not work to show space(###).
enter code here
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
def from_abstract(s):
result = ''
for word in s.split('*'):
result = result +ABSTRACT_SHIFTED.get(word)
return result
This would do the trick:
#!/usr/bin/env python
InputString = "8#15#23###23#1#19###9#20"
InputString = InputString.replace("###", "##")
InputString = InputString.split("#")
DecodedMessage = ""
for NumericRepresentation in InputString:
if NumericRepresentation == "":
NumericRepresentation = " "
DecodedMessage += NumericRepresentation
continue
else:
DecodedMessage += chr(int(NumericRepresentation) + 64)
print(DecodedMessage)
Prints:
HOW WAS IT
you can also use a regex
import re
replacer ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
reversed = {value:key for key,value in replacer.items()}
# Reversed because regex is greedy and it will match 1 before 15
target = '8#15#23###23#1#19###9#20'
pattern = '|'.join(map(lambda x: x + '+', list(reversed.keys())[::-1]))
repl = lambda x: reversed[x.group(0)]
print(re.sub(pattern, string=target, repl=repl))
And prints:
HOW WAS IT
With a couple minimal changes to your code it works.
1) split on '#', not '*'
2) retrieve ' ' by default if a match isn't found
3) use '##' instead of '###'
def from_abstract(s):
result = ''
for word in s.replace('###','##').split('#'):
result = result +ABSTRACT_SHIFTED.get(word," ")
return result
Swap the key-value pairs of ABSTRACT and use simple split + join on input
ip = "8#15#23###23#1#19###9#20"
ABSTRACT = dict((v,k) for k,v in ABSTRACT.items())
''.join(ABSTRACT.get(i,' ') for i in ip.split('#')).replace(' ', ' ')
#'HOW WAS IT'
The biggest challenge here is that "#" is used as a token separator and as the space character, you have to know the context to tell which you've got at any given time, and that makes it difficult to simply split the string. So write a simple parser. This one will accept anything as the first character in a token and then grab everything until it sees the next "#".
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
user_input = "8#15#23###23#1#19###9#20"
def from_abstract(s):
result = []
while s:
print 'try', s
# tokens are terminated with #
idx = s.find("#")
# ...except at end of line
if idx == -1:
idx = len(s) - 1
token = s[:idx]
s = s[idx+1:]
result.append(ABSTRACT_SHIFTED.get(token, ' '))
return ''.join(result)
print from_abstract(user_input)

Python - surround instances of strings in a given list that are present in another string with HTML

I've written a function that surrounds a search term with a HTML element with given attributes. The idea is the resulting surrounded string is written to a log file later on with the search term highlighted.
def inject_html(needle, haystack, html_element="span", html_attrs={"class":"matched"}):
# Find all occurrences of a given string in some text
# Surround the occurrences with a HTML element and given HTML attributes
new_str = haystack
start_index = 0
while True:
try:
# Get the bounds
start = new_str.lower().index(needle.lower(), start_index)
end = start + len(needle)
# Needle is present, compose the HTML to inject
html_open = "<" + html_element + " " + " ".join(["%s=\"%s\""%(k,html_attrs[k]) for k in html_attrs]) + ">"
html_close = "</" + html_element + ">"
new_str = new_str[0:start] + html_open + new_str[start:end] + html_close + new_str[end:len(new_str)]
start_index = end + len(html_close) + len(html_open)
except ValueError as ex:
# String doesn't occur in text after index, break loop
break
return new_str
I want to open this up to accept an array of needles, locating and surrounding them with HTML in the haystack. I could easily do this by surrounding the code with another loop which iterates through the needles, locating and surrounding instances of the search term. Problem is, this doesn't protect from accidentally surrounding previously injected HTML code., e.g.
def inject_html(needles, haystack, html_element="span", html_attrs={"class":"matched"}):
# Find all occurrences of a given string in some text
# Surround the occurrences with a HTML element and given HTML attributes
new_str = haystack
for needle in needles:
start_index = 0
while True:
try:
# Get the bounds
start = new_str.lower().index(needle.lower(), start_index)
end = start + len(needle)
# Needle is present, compose the HTML to inject
html_open = "<" + html_element + " " + " ".join(["%s=\"%s\""%(k,html_attrs[k]) for k in html_attrs]) + ">"
html_close = "</" + html_element + ">"
new_str = new_str[0:start] + html_open + new_str[start:end] + html_close + new_str[end:len(new_str)]
start_index = end + len(html_close) + len(html_open)
except ValueError as ex:
# String doesn't occur in text after index, break loop
break
return new_str
search_strings = ["foo", "pan", "test"]
haystack = "Foobar"
print(inject_html(search_strings,haystack))
<s<span class="matched">pan</span> class="matched">Foo</span>bar
On the second iteration, the code searches for and surrounds the "pan" text from the "span" that was inserted in the previous iteration.
How would you recommend I change my original function to look for a list of needles without the risk of injecting HTML into undesired locations (such as within existing tags).
--- UPDATE ---
I got around this by maintaining a list of "immune" ranges (ones which have already been surrounded with HTML and therefore do not need to be checked again.
def inject_html(needles, haystack, html_element="span", html_attrs={"class":"matched"}):
# Find all occurrences of a given string in some text
# Surround the occurrences with a HTML element and given HTML attributes
immune = []
new_str = haystack
for needle in needles:
next_index = 0
while True:
try:
# Get the bounds
start = new_str.lower().index(needle.lower(), next_index)
end = start + len(needle)
if not any([(x[0] > start and x[0] < end) or (x[1] > start and x[1] < end) for x in immune]):
# Needle is present, compose the HTML to inject
html_open = "<" + html_element + " " + " ".join(["%s=\"%s\""%(k,html_attrs[k]) for k in html_attrs]) + ">"
html_close = "</" + html_element + ">"
new_str = new_str[0:start] + html_open + new_str[start:end] + html_close + new_str[end:len(new_str)]
next_index = end + len(html_close) + len(html_open)
# Add the highlighted range (and HTML code) to the list of immune ranges
immune.append([start, next_index])
except ValueError as ex:
# String doesn't occur in text after index, break loop
break
return new_str
It's not particularly Pythonic though, I'd be interested to see if anyone can come up with something cleaner.
I'd use something like this:
def inject_html(phrases, text_body, html_element_name="span", html_attrs={"class":"matched"}):
new_text_body = []
html_start_tag = "<" + html_element_name + " ".join(["%s=\"%s\""%(k,html_attrs[k]) for k in html_attrs]) + ">"
html_end_tag = "</" + html_element_name + ">"
text_body_lines = text_body.split("\n")
for line in text_body_lines:
for p in phrases:
if line.lower() == p.lower():
line = html_start_tag + p + html_end_tag
break
new_text_body.append(line)
return "\n".join(new_text_body)
It goes through line by line and replaces each line if the line is an exact match (case-insensitive).
ROUND TWO:
With the requirement that the match needs to be (1) case-insensitive and (2) matches multiple words/phrases on each line, I would use:
import re
def inject_html(phrases, text_body, html_element_name="span", html_attrs={"class": "matched"}):
html_start_tag = "<" + html_element_name + " " + " ".join(["%s=\"%s\"" % (k, html_attrs[k]) for k in html_attrs]) + ">"
html_end_tag = "</" + html_element_name + ">"
for p in phrases:
text_body = re.sub(r"({})".format(p), r"{}\1{}".format(html_start_tag, html_end_tag), text_body, flags=re.IGNORECASE)
return text_body
For each provided phrase p, this uses a case-insensitive re.sub() replacement to replace all instances of that phrase in the provided text. (p) matches the phrase via a regular expression group. \1 is a backfill operator that matches the found phrase, enclosing it in HTML tags.
text = """
Somewhat more than forty years ago, Mr Baillie Fraser published a
lively and instructive volume under the title _A Winter’s Journey
(Tatar) from Constantinople to Teheran. Political complications
had arisen between Russia and Turkey - an old story, of which we are
witnessing a new version at the present time. The English government
deemed it urgently necessary to send out instructions to our
representatives at Constantinople and Teheran.
"""
new = inject_html(["TEHERAN", "Constantinople"], text)
print(new)
> Somewhat more than forty years ago, Mr Baillie Fraser published a lively and instructive volume under the title _A Winter’s Journey (Tatar) from <span class="matched">Constantinople</span> to <span class="matched">Teheran</span>. Political complications had arisen between Russia and Turkey - an old story, of which we are witnessing a new version at the present time. The English government deemed it urgently necessary to send out instructions to our representatives at <span class="matched">Constantinople</span> and <span class="matched">Teheran</span>.

Python for loop - stripping line as you go

I am trying to strip a line of code so that only the comment at the end is saved. Because # signs can be included within "" marks, to do this I am trying to cycle through the line catching pairs of " marks so that it ignores any # marks within "" marks. When I use a code visualiser on my code below, after the second for loop it seems to go pack to processing s as if it has just stripped the first " mark. I can't see what I'm doing wrong here, because the print statement I have included on line 19 shows that s has been stripped to after the second ", but when the code returns to the top, it starts cycling again from after the first ". Any idea of what I am doing wrong here?
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
quoteCount = 0
for char in s:
if quoteCount%2 == 0:
if char == '#':
s = s[s.index('#'):]
break
if char == '"':
quoteCount = quoteCount + 1
s = s[s.index('"'):]
s = s.lstrip('"')
for char in s:
if char == '"':
quoteCount = quoteCount + 1
s = s[s.index('"'):]
s = s.lstrip('"')
print(s)
break
print(s)
If I understand your question correctly you only want to keep the last comment (#lots of hash(#) symbols here).
To do this you don't need the nested for loop.
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
quoteCount = 0
for char in s:
if quoteCount%2 == 0:
if char == '#':
s = s[s.index('#'):]
break
if char == '"':
quoteCount = quoteCount + 1
s = s[s.index('"'):]
s = s.lstrip('"')
print(s)
Easier to remove the quoted strings with a regular expression:
import re
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
pattern = r'"[^"]*"'
s = re.sub(pattern, '', s)
print s[s.index('#'):]
Output:
#lots of hash(#) symbols here
Your code is overly complicated so I suggest you use an alternative method to finding the comment like the already mentioned regex one or the one I came up with.
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
s = s[s.rfind('"') + 1:] # Get to the last quotation mark
if s.find('#') >= 0: # The first # sign should start the comment if there is one
s = s[s.find('#'):]
else:
s = '' # No comment was found
print(s)

.split(",") separating every character of a string

At some point of the program I ask it to take the user's text input and separate the text according to it's commas, and then I ",".join it again in a txt file. The idea is to have a list with all the comma separated information.
The problem is that, apparently, when I ",".join it, it separates every single character with commas, so if I've got the string info1,info2 it separates, getting info1 | info2, but then, when joining it back again it ends like i,n,f,o,1,,,i,n,f,o,2, which is highly unconfortable, since it get's the text back from the txt file to show it to the user later in the program. Can anyone help me with that?
categories = open('c:/digitalLibrary/' + connectedUser + '/category.txt', 'a')
categories.write(BookCategory + '\n')
categories.close()
categories = open('c:/digitalLibrary/' + connectedUser + '/category.txt', 'r')
categoryList = categories.readlines()
categories.close()
for category in BookCategory.split(','):
for readCategory in lastReadCategoriesList:
if readCategory.split(',')[0] == category.strip():
count = int(readCategory.split(',')[1])
count += 1
i = lastReadCategoriesList.index(readCategory)
lastReadCategoriesList[i] = category.strip() + "," + str(count).strip()
isThere = True
if not isThere:
lastReadCategoriesList.append(category.strip() + ",1")
isThere = False
lastReadCategories = open('c:/digitalLibrary/' + connectedUser + '/lastReadCategories.txt', 'w')
for category in lastReadCategoriesList:
if category.split(',')[0] != "" and category != "":
lastReadCategories.write(category + '\n')
lastReadCategories.close()
global finalList
finalList.append({"Title":BookTitle + '\n', "Author":AuthorName + '\n', "Borrowed":IsBorrowed + '\n', "Read":readList[len(readList)-1], "BeingRead":readingList[len(readingList)-1], "Category":BookCategory + '\n', "Collection":BookCollection + '\n', "Comments":BookComments + '\n'})
finalList = sorted(finalList, key=itemgetter('Title'))
for i in range(len(finalList)):
categoryList[i] = finalList[i]["Category"]
toAppend = (str(i + 1) + ".").ljust(7) + finalList[i]['Title'].strip()
s.append(toAppend)
categories = open('c:/digitalLibrary/' + connectedUser + '/category.txt', 'w')
for i in range(len(categoryList)):
categories.write(",".join(categoryList[i]))
categories.close()
You should pass ''.join() a list, you are passing in a single string instead.
Strings are sequences too, so ''.join() treats every character as a separate element instead:
>>> ','.join('Hello world')
'H,e,l,l,o, ,w,o,r,l,d'
>>> ','.join(['Hello', 'world'])
'Hello,world'

Categories

Resources