Python : How to translate? - python

the program is when user input"8#15#23###23#1#19###9#20"
output should be "HOW WAS IT"
However,it could not work to show space(###).
enter code here
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
def from_abstract(s):
result = ''
for word in s.split('*'):
result = result +ABSTRACT_SHIFTED.get(word)
return result

This would do the trick:
#!/usr/bin/env python
InputString = "8#15#23###23#1#19###9#20"
InputString = InputString.replace("###", "##")
InputString = InputString.split("#")
DecodedMessage = ""
for NumericRepresentation in InputString:
if NumericRepresentation == "":
NumericRepresentation = " "
DecodedMessage += NumericRepresentation
continue
else:
DecodedMessage += chr(int(NumericRepresentation) + 64)
print(DecodedMessage)
Prints:
HOW WAS IT

you can also use a regex
import re
replacer ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
reversed = {value:key for key,value in replacer.items()}
# Reversed because regex is greedy and it will match 1 before 15
target = '8#15#23###23#1#19###9#20'
pattern = '|'.join(map(lambda x: x + '+', list(reversed.keys())[::-1]))
repl = lambda x: reversed[x.group(0)]
print(re.sub(pattern, string=target, repl=repl))
And prints:
HOW WAS IT

With a couple minimal changes to your code it works.
1) split on '#', not '*'
2) retrieve ' ' by default if a match isn't found
3) use '##' instead of '###'
def from_abstract(s):
result = ''
for word in s.replace('###','##').split('#'):
result = result +ABSTRACT_SHIFTED.get(word," ")
return result

Swap the key-value pairs of ABSTRACT and use simple split + join on input
ip = "8#15#23###23#1#19###9#20"
ABSTRACT = dict((v,k) for k,v in ABSTRACT.items())
''.join(ABSTRACT.get(i,' ') for i in ip.split('#')).replace(' ', ' ')
#'HOW WAS IT'

The biggest challenge here is that "#" is used as a token separator and as the space character, you have to know the context to tell which you've got at any given time, and that makes it difficult to simply split the string. So write a simple parser. This one will accept anything as the first character in a token and then grab everything until it sees the next "#".
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
user_input = "8#15#23###23#1#19###9#20"
def from_abstract(s):
result = []
while s:
print 'try', s
# tokens are terminated with #
idx = s.find("#")
# ...except at end of line
if idx == -1:
idx = len(s) - 1
token = s[:idx]
s = s[idx+1:]
result.append(ABSTRACT_SHIFTED.get(token, ' '))
return ''.join(result)
print from_abstract(user_input)

Related

Extract words between the word "for" and the opening parenthesis "(" in an email subject line. Email subject line is the input

The client's name is after the word "for" and before the opening parenthesis "(" that starts the proposal number. I need to extract the client name to use to look up the deal in a future step. What would be the easiest way to set this up? Using Zapier Extract Pattern or to Use Zapier Code in Python?
I have tried this and it did not work. It seemed promising though.
input_data
client = Reminder: Leruths has sent you a proposal for Business Name (#642931)
import regex
rgx = regex.compile(r'(?si)(?|{0}(.*?){1}|{1}(.*?)
{0})'.format('for', '('))
s1 = 'client'
for s in [s1]:
m = rgx.findall
for x in m:
print x.strip()
I have also tried this and it did not work.
start = mystring.find( 'for' )
end = mystring.find( '(' )
if start != -1 and end != -1:
result = mystring[start+1:end]
I am looking for Business Name to be returned in my example.
Fastest way:
start = client.find('for')
end = client.find('(')
result = client[start+4:end-1]
print(result)
With regex:
result = re.search(r' for (.*) [(]', client)
print(result.group(1))
There is probably a cleaner way to do this, but here is another solution without regex
client = "Reminder: Leruths has sent you a proposal for Business Name (#642931)"
cs = client.split(" ")
name = ""
append = False
for word in cs:
if "for" == word:
append = True
elif word.startswith("("):
append = False
if append is True and word != "for":
name += (word + " ")
name = name.strip()
print(name)
Another method:
client = "Reminder: Leruths has sent you a proposal for Business Name (#642931)"
cs = client.split(" ")
name = ""
forindex = cs.index("for")
for i in range(forindex+1, len(cs)):
if cs[i].startswith("("):
break
name += cs[i] + " "
name = name.strip()
print(name)
Running the code below gives:
Regex method took 2.3912417888641357 seconds
Search word by word method took 4.78193998336792 seconds
Search with list index method took 3.1756017208099365 seconds
String indexing method took 0.8496286869049072 seconds
Code to check the fastest to get the name over a million tries:
import re
import time
client = "Reminder: Leruths has sent you a proposal for Business Name (#642931)"
def withRegex(client):
result = re.search(r' for (.*) [(]', client)
return(result.group(1))
def searchWordbyWord(client):
cs = client.split(" ")
name = ""
append = False
for word in cs:
if "for" == word:
append = True
elif word.startswith("("):
append = False
if append is True and word != "for":
name += (word + " ")
name = name.strip()
return name
def searchWithListIndex(client):
cs = client.split(" ")
name = ""
forindex = cs.index("for")
for i in range(forindex+1, len(cs)):
if cs[i].startswith("("):
break
name += cs[i] + " "
name = name.strip()
return name
def stringIndexing(client):
start = client.find('for')
end = client.find('(')
result = client[start+4:end-1]
return result
wr = time.time()
for x in range(1,1000000):
withRegex(client)
wr = time.time() - wr
print("Regex method took " + str(wr) + " seconds")
sw = time.time()
for x in range(1,1000000):
searchWordbyWord(client)
sw = time.time() - sw
print("Search word by word method took " + str(sw) + " seconds")
wl = time.time()
for x in range(1,1000000):
searchWithListIndex(client)
wl = time.time() - wl
print("Search with list index method took " + str(wl) + " seconds")
si = time.time()
for x in range(1,1000000):
stringIndexing(client)
si = time.time() - si
print("String indexing method took " + str(si) + " seconds")

How can I find the largest number from the given string in Python?

I have two string i.e. 'This is a test as146634546576 string 12312523' and 'This is a test as576 string 12344612523'
Now I want to print the largest numbers i.e. 146634546576 and 12344612523 respectively. I have written the following code but it's printing only 146634546576 and 576. Where it should be 12344612523 instead of 576!
def findLargestNumber(text):
front = -1
li = []
li1 = []
for i in range(len(text)):
if front == -1:
if text[i].isdigit():
front = i
else:
continue
else:
if text[i].isdigit():
continue
else:
li.append(int(text[front:i+1]))
front = -1
return max(li)
#print max(li)
for w in text.split():
li1.append(int(w))
return max(li1)
#print max(li1)
if max(li)>max(li1):
return max(li)
else:
return max(li1)
print findLargestNumber('This is a test as146634546576 string 12312523')
print findLargestNumber('This is a test as576 string 12344612523')
Use max() with re.findall:
import re
a = 'This is a test as576 string 12344612523'
print(max(map(int, re.findall(r'\d+', a))))
# 12344612523
import re
a = 'This is a test as146634546576 string 12312523'
b = 'This is a test as576 string 12344612523'
num_in_a = re.findall(r'[\d]+', a)
num_in_b = re.findall(r'[\d]+', b)
print(max(map(int, num_in_a)))
print(max(map(int, num_in_b)))
Output:
146634546576
12344612523
import re
pa = re.compile(r'(\d+)')
def findLargestNumber(text):
ma = pa.findall(text)
num = [int(x) for x in ma]
print(max(num))
findLargestNumber('This is a test as576 string 12344612523')
findLargestNumber('This is a test as146634546576 string 12312523')

substitute only some numbers by their decrement

I have this string: a9*a9 + a10*a10
and I would like to have: a9*a8 + a10*a9
I think re.sub() from Python should be useful, but I am not familiar with group() that I've seen in some examples. Any help would be appreciated.
here's another solution method:
import re
s = 'a9*a9 + a10*a10 + a8*a8 + a255*a255 + b58*b58 + c58*c58'
string = re.sub('[ ]', '', s) # removed whitespace from string (optional:only if you are not sure how many space you can get in string)
x = string.split('+')
pattern = re.compile(r'([a-z])([\d]+)')
ans = ''
for element in x:
for letter, num in re.findall(pattern, element):
st = ''
for i in range(len(element.split('*'))):
st = st + '*' + (letter+str(int(num)-i))
# print(str(letter) + str(int(num)-i))
ans = ans + '+' + st[1:]
print(ans[1:])
Output :
a9*a8+a10*a9+a8*a7+a255*a254+b58*b57+c58*c57
Assuming the structure of the input is a\d*a\d + a\d*a\d + ... you can use a callback in the re.sub function:
import re
def decrement(match):
if match.group(1) != match.group(2):
return match.group(0)
return 'a{}*a{}'.format(match.group(1), str(int(match.group(2)) - 1))
re.sub(r'a(\d)\*a(\d)', decrement, 'a3*a3 + a5*a5 + a3*a7')
# a3*a2 + a5*a4 + a3*a7

Python for loop - stripping line as you go

I am trying to strip a line of code so that only the comment at the end is saved. Because # signs can be included within "" marks, to do this I am trying to cycle through the line catching pairs of " marks so that it ignores any # marks within "" marks. When I use a code visualiser on my code below, after the second for loop it seems to go pack to processing s as if it has just stripped the first " mark. I can't see what I'm doing wrong here, because the print statement I have included on line 19 shows that s has been stripped to after the second ", but when the code returns to the top, it starts cycling again from after the first ". Any idea of what I am doing wrong here?
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
quoteCount = 0
for char in s:
if quoteCount%2 == 0:
if char == '#':
s = s[s.index('#'):]
break
if char == '"':
quoteCount = quoteCount + 1
s = s[s.index('"'):]
s = s.lstrip('"')
for char in s:
if char == '"':
quoteCount = quoteCount + 1
s = s[s.index('"'):]
s = s.lstrip('"')
print(s)
break
print(s)
If I understand your question correctly you only want to keep the last comment (#lots of hash(#) symbols here).
To do this you don't need the nested for loop.
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
quoteCount = 0
for char in s:
if quoteCount%2 == 0:
if char == '#':
s = s[s.index('#'):]
break
if char == '"':
quoteCount = quoteCount + 1
s = s[s.index('"'):]
s = s.lstrip('"')
print(s)
Easier to remove the quoted strings with a regular expression:
import re
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
pattern = r'"[^"]*"'
s = re.sub(pattern, '', s)
print s[s.index('#'):]
Output:
#lots of hash(#) symbols here
Your code is overly complicated so I suggest you use an alternative method to finding the comment like the already mentioned regex one or the one I came up with.
s = '("8# " + str" #9 " + line) #lots of hash(#) symbols here'
s = s[s.rfind('"') + 1:] # Get to the last quotation mark
if s.find('#') >= 0: # The first # sign should start the comment if there is one
s = s[s.find('#'):]
else:
s = '' # No comment was found
print(s)

Extract values from string

I want to extract certain values from a string in python.
snp_1_881627 AA=G;ALLELE=A;DAF_GLOBAL=0.473901;GENE_TRCOUNT_AFFECTED=1;GENE_TRCOUNT_TOTAL=1;SEVERE_GENE=ENSG00000188976;SEVERE_IMPACT=SYNONYMOUS_CODON;TR_AFFECTED=FULL;ANNOTATION_CLASS=REG_FEATURE,SYNONYMOUS_CODON,ACTIVE_CHROM,NC_TRANSCRIPT_VARIANT,NC_TRANSCRIPT_VARIANT;A_A_CHANGE=.,L,.,.,.;A_A_LENGTH=.,750,.,.,.;A_A_POS=.,615,.,.,.;CELL=GM12878,.,GM12878,.,.;CHROM_STATE=.,.,11,.,.;EXON_NUMBER=.,16/19,.,.,.;GENE_ID=.,ENSG00000188976,.,ENSG00000188976,ENSG00000188976;GENE_NAME=.,NOC2L,.,NOC2L,NOC2L;HGVS=.,c.1843N>T,.,n.3290N>T,n.699N>T;REG_ANNOTATION=H3K36me3,.,.,.,.;TR_BIOTYPE=.,PROTEIN_CODING,.,PROCESSED_TRANSCRIPT,PROCESSED_TRANSCRIPT;TR_ID=.,ENST00000327044,.,ENST00000477976,ENST00000483767;TR_LENGTH=.,2790,.,4201,1611;TR_POS=.,1893,.,3290,699;TR_STRAND=.,-1,.,-1,-1
Output:
GENE_ID GENE_NAME EXON_NUMBER SEVERE_IMPACT
snp_1_881627 ENSG00000188976 NOC2L 16/19 SYNONYMOUS_CODON
If the string has values for each of those variables(GENE_ID,GENE_NAME,EXON_NUMBER) existing then output, else "NA"(variables don't exist or their values don't exist).In some cases,these variables don't exist in the string.
Which string method should I use to accomplish this?Should I split my string before extracting any values?I have 10k rows to extract values for each snp_*
string=string.split(';')
P.S. I am a newbie in python
There are two general strategies for this - split and regex.
To use split, first split off the row label (snp_1_881627):
rowname, data = row.split()
Then, you can split data into the individual entries using the ; separator:
data = data.split(';')
Since you need to get the value of certain keys, we can turn it into a dictionary:
dataDictionary = {}
for entry in data:
entry = entry.split('=')
dataDictionary[entry[0]] = entry[1] if len(entry) > 1 else None
Then you can simply check if the keys are in dataDictionary, and if so grab their values.
Using split is nice in that it will index everything in the data string, making it easy to grab whichever ones you need.
If the ones you need will not change, then regex might be a better option:
>>> import re
>>> re.search('(?<=GENE_ID=)[^;]*', 'onevalue;GENE_ID=SOMETHING;othervalue').group()
'SOMETHING'
Here I'm using a "lookbehind" to match one of the keywords, then grabbing the value from the match using group(). Putting your keywords into a list, you could find all the values like this:
import re
...
keywords = ['GENE_ID', 'GENE_NAME', 'EXON_NUMBER', 'SEVERE_IMPACT']
desiredValues = {}
for keyword in keywords:
match = re.search('(?<={}=)[^;]*'.format(keyword), string_to_search)
desiredValues[keyword] = match.group() if match else DEFAULT_VALUE
I think this is going to be the solution you are looking for.
#input
user_in = 'snp_1_881627 AA=G;ALLELE=A;DAF_GLOBAL=0.473901;GENE_TRCOUNT_AFFECTED=1;GENE_TRCOUNT_TOTAL=1;SEVERE_GENE=ENSG00000188976;SEVERE_IMPACT=SYNONYMOUS_CODON;TR_AFFECTED=FULL;ANNOTATION_CLASS=REG_FEATURE,SYNONYMOUS_CODON,ACTIVE_CHROM,NC_TRANSCRIPT_VARIANT,NC_TRANSCRIPT_VARIANT;A_A_CHANGE=.,L,.,.,.;A_A_LENGTH=.,750,.,.,.;A_A_POS=.,615,.,.,.;CELL=GM12878,.,GM12878,.,.;CHROM_STATE=.,.,11,.,.;EXON_NUMBER=.,16/19,.,.,.;GENE_ID=.,ENSG00000188976,.,ENSG00000188976,ENSG00000188976;GENE_NAME=.,NOC2L,.,NOC2L,NOC2L;HGVS=.,c.1843N>T,.,n.3290N>T,n.699N>T;REG_ANNOTATION=H3K36me3,.,.,.,.;TR_BIOTYPE=.,PROTEIN_CODING,.,PROCESSED_TRANSCRIPT,PROCESSED_TRANSCRIPT;TR_ID=.,ENST00000327044,.,ENST00000477976,ENST00000483767;TR_LENGTH=.,2790,.,4201,1611;TR_POS=.,1893,.,3290,699;TR_STRAND=.,-1,.,-1,-1'
#set some empty vars
user_in = user_in.split(';')
final_output = ""
GENE_ID_FOUND = False
GENE_NAME_FOUND = False
EXON_NUMBER_FOUND = False
GENE_ID_OUTPUT = ''
GENE_NAME_OUTPUT = ''
EXON_NUMBER_OUTPUT = ''
SEVERE_IMPACT_OUTPUT = ''
for x in range(0, len(user_in)):
if x == 0:
first_line_count = 0
first_line_print = ''
while(user_in[0][first_line_count] != " "):
first_line_print += user_in[0][first_line_count]
first_line_count += 1
final_output += first_line_print + "\t"
else:
if user_in[x][0:11] == "SEVERE_GENE":
GENE_ID_OUTPUT += user_in[x][12:] + "\t"
GENE_ID_FOUND = True
if user_in[x][0:9] == "GENE_NAME":
GENE_NAME_OUTPUT += user_in[x][10:] + "\t"
GENE_NAME_FOUND = True
if user_in[x][0:11] == "EXON_NUMBER":
EXON_NUMBER_OUTPUT += user_in[x][12:] + "\t"
EXON_NUMBER_FOUND = True
if user_in[x][0:13] == "SEVERE_IMPACT":
SEVERE_IMPACT_OUTPUT += user_in[x][14:] + "\t"
if GENE_ID_FOUND == True:
final_output += GENE_ID_OUTPUT
else:
final_output += "NA"
if GENE_NAME_FOUND == True:
final_output += GENE_NAME_OUTPUT
else:
final_output += "NA"
if EXON_NUMBER_FOUND == True:
final_output += EXON_NUMBER_OUTPUT
else:
final_output += "NA"
final_output += SEVERE_IMPACT_OUTPUT
print(final_output)

Categories

Resources