Python: get the range between two dotted numbers - python

I am trying to get the range of numbers between two dotted numbers, like 2.1.0 and 2.1.3.
My requirement is that the first two numbers need to be the same (so not 2.1.0 to 2.2.0)
What I want to get out is:
['2.1.0', '2.1.1', '2.1.2', '2.1.3']
Here is what I have tried, and it works, but I want to know if there is a better way to do it.
start = "2.1.0"
end = "2.1.3"
def get_dotted_range(start,end):
start_parts = start.split(".")
end_parts = end.split(".")
# ensure the versions have the same number of dotted sections
if len(start_parts) != len(end_parts):
return False
# ensure first 2 numbers are the same
for i in range(0,len(start_parts[:-1])):
if start_parts[i] != end_parts[i]:
# part is different betwen start and end!
return False
new_parts = []
# ensure last digit end is higher than start
if int(end_parts[-1]) >= int(start_parts[-1]):
# append the version to the return list
for i in range(int(start_parts[-1]),int(end_parts[-1]) + 1):
new_parts.append("%s.%s.%s" % (start_parts[0],start_parts[1],i))
else:
return False # end is lower than start
return new_parts

start = "2.1.0"
end = "2.1.3"
startFirst, startMiddle, startLast = map(int, start.split("."))
_, _, endLast = map(int, end.split("."))
dottedRange = [".".join(map(str, [startFirst, startMiddle, x]))
for x in range(startLast, 1+endLast)]

start = "2.1.0"
end = "2.1.3"
def get_dotted_range(start, end):
# break into number-pieces
start = start.split(".")
end = end .split(".")
# remove last number from each
x = int(start.pop())
y = int(end .pop())
# make sure start and end have the same number of sections
# and all but the last number is the same
if start != end:
return []
else:
head = ".".join(start) + "."
return [head + str(i) for i in range(x, y + 1)]
then
In [67]: get_dotted_range(start, end)
Out[67]: ['2.1.0', '2.1.1', '2.1.2', '2.1.3']

One way:
def get_dotted_range(start, end):
sparts = start.split('.')
eparts = end.split('.')
prefix = '.'.join(sparts[0:-1])
slast = int(sparts[-1])
elast = int(eparts[-1])
return [prefix + '.' + str(i) for i in range(slast, elast + 1)]
print(get_dotted_range('2.1.0', '2.1.3'))
print(get_dotted_range('2.1.9', '2.1.12'))
results in:
['2.1.0', '2.1.1', '2.1.2', '2.1.3']
['2.1.9', '2.1.10', '2.1.11', '2.1.12']

start = "2.1.0"
end = "2.1.3"
# split once to get last value
s_spl, e_spl = start.rsplit(".",1), end.rsplit(".",1)
# get first part of string to join up later
pre = s_spl[0]
# make sure first two parts are identical
if pre == e_spl[0]:
# loop in range from last element of start
# up to and including last element of end
out = ["{}.{}".format(pre, i) for i in range(int(s_spl[1]), int(e_spl[1]) + 1)]
print(out)
print(out)
['2.1.0', '2.1.1', '2.1.2', '2.1.3']
So in a function we would return a list or False:
def get_dotted_range(start,end):
s_spl, e_spl = start.rsplit(".", 1), end.rsplit(".", 1)
pre = s_spl[0]
if pre == e_spl[0]:
return ["{}.{}".format(pre, i) for i in range(int(s_spl[1]), int(e_spl[1])+1)]
return False
You should also consider the cases where a user enters incorrect data that cannot be cast to an int, the format is incorrect or they enter an empty string so you get an error indexing etc...
def get_dotted_range(start, end):
try:
s_spl, e_spl = start.rsplit(".", 1), end.rsplit(".", 1)
if s_spl[0] == e_spl[0]:
pre = s_spl[0]
return ["{}.{}".format(pre, i) for i in range(int(s_spl[1]), int(e_spl[1]) + 1)]
except ValueError as e:
return "{}: Digit expected".format(e)
except IndexError as e:
return "{}: Input format should be d.d.d ".format(e)
return False
There are other cases you may want to catch like when a user enters the start and end backwards which will end up returning an empty.

Related

Inserting a newline every 40 characters isn't working (but no errors)

I'm trying to insert a newline every 40 characters. I'm very new to Python so I apologize if my code is messed up.
def wrap(msg):
pos = 0
end = msg
for i in end:
pos += 1
if pos % 40 == 0:
print(pos)
end = end[:pos] + "\n" + end[:pos + 2]
return end
This code doesn't work and there isn't any errors. I'm not sure why this happens. It seems to just return the full string??
def wrap(msg):
res = ''
t = len(msg)//40
for i in range(t):
start = i*40
end = (i+1)*40
res += msg[start:end]+'\n'
res += msg[t*40:(t+1)*40]
return res

adding and iterating through linked list backwards to produce backwards sum

I am trying to add two linked lists backward such that they produce a third. I am running into an issue in which I am missing the middle value in my expected out output. I don't understand why this is happening.
Example: 342 + 465 = 807(expected output)
input =
[2]->[4]->[3],
[5]->[6]->[4]
expected = [7]->[0]->[8]
actual = [7]->[8]
carryOver = 0
current1 = l1
current2 = l2
result = None
resultC = None
while current1 is not None:
val = current1.val + current2.val+ carryOver
if val>= 10:
carryOver = val//10
else:
carryOver = 0
val = val%10
if result == None:
result = ListNode(val)
resultC = result
else:
resultC.next = ListNode(val)
current1 = current1.next
current2 = current2.next
if carryOver != 0:
resultC.next = ListNode(carryOver)
return result
This might be what you are looking for.
def add_list(l1,l2):
max_length=max(len(l1),len(l2))
l1,l2=[[0]]*(max_length-len(l1))+l1,[[0]]*(max_length-len(l2))+l2 # padding zeros
l1,l2=l1[::-1],l2[::-1]#temporary reversing of list
carry=0
d=[]
for x in zip(l1,l2):
tempsum=x[0][0]+x[1][0]+carry
carry=0
if tempsum>9:
carry,tempsum=tempsum//10,tempsum%10
d.append([tempsum])
if carry !=0:
d.append([carry])
return d[::-1]

How to turn a linear string into a trie?

I am using the make me a hanzi open-source chinese character dataset. As part of this dataset there are strings which provide the decomposition of chinese characters into their individual units (called radicals). I want to turn the strings describing the decomposition of characters into tries (so that I can use networkx to render the decompositions).
For example for this database entry:
{"character":"⺳","definition":"net, network","pinyin":[],"decomposition":"⿱冖八","radical":"⺳","matches":[[0],[0],[1],[1]]}
The decomposition for this character would be.
- Node(1, char='⿱')
- Node(2, char='冖') # an edge connects '⿱' to '冖'
- Node(3, char='八') # an edge connects '⿱' to '八'
So far, I have come up with a script to turn the string decompositions into dictionaries (but not graphs).
decomposition_types = {
'top-bottom': '⿱',
'left-right': '⿰',
'diagonal-corners': '⿻',
'over-under': '⿺',
'under-over': '⿹',
'over-under-reversed': '⿸',
'top-bottom-middle': '⿳',
'left-right-middle': '⿲',
'inside-outside': '⿴',
'outside-inside': '⿵',
'outside-inside2': '⿷',
'inside-outside2': '⿶'
# 'unknown': '?'
}
decomposition_types_reversed = dict(((value, key) for key, value in decomposition_types.items()))
file = []
if not os.path.isfile('data/dictionary.json'):
with open('data/dictionary.txt') as d:
for line in d:
file.append(json.loads(line))
for i, item in enumerate(file):
item['id'] = i + 1
json.dump(file, open('data/dictionary.json', 'w+'))
else:
file = json.load(open('data/dictionary.json'))
def is_parsed(blocks):
for block in blocks:
if not block['is_unit']:
return False
return True
def search(character, dictionary=file):
for hanzi in dictionary:
if hanzi['character'] == character:
return hanzi
return False
def parse(decomp):
if len(decomp) == 1:
return {"spacing": '?'}
blocks = []
n_loops = 0
for item in decomp:
blocks.append({"char": item, "is_spacing": item in decomposition_types_reversed, "is_unit": False})
while not is_parsed(blocks):
for i, item in enumerate(blocks):
if "is_spacing" in item:
if item['is_spacing']:
next_items = decomposition_types_reversed[item['char']].count('-') + 1
can_match = True
for x in blocks[i + 1:i + 1 + next_items]:
try:
if x['char'] in decomposition_types_reversed:
can_match = False
except KeyError:
pass
if can_match:
blocks[i] = {"spacing": item['char'],
"chars": [l['char'] if 'char' in l else l for l in
blocks[i + 1:i + 1 + next_items]],
"is_unit": True}
del blocks[i + 1:i + 1 + next_items]
n_loops += 1
if n_loops > 10:
print(decomp)
sys.exit()
return blocks

How would I find the nearest space per every 2000 characters and add everything before it to a variable? (Python)

I'm currently writing a program that posts messages with a 2,000 character limit. The string I'm using is usually around 10,000 characters. However, the method I'm using to divide it up can divide the string up in the middle of a word - how would I only have it divide at the nearest space (BEFORE) 2k characters?
Here's the current code:
text = str(post.selftext)
title = await client.send_message(message.author, str(post.title))
if len(text) > 1990:
amountsplit = math.ceil(len(text) / 1990)
atatime = math.floor(len(text) / amountsplit)
donetimes = 0
lastone = 0
for i in range(amountsplit):
todonow = int(donetimes + 1) * atatime
tmp = await client.send_message(message.author, str(text[lastone:todonow]))
lastone = todonow
donetimes += 1
U can use rfind() method of str:
from __future__ import print_function
def split_message(message, character_limit=2000):
messages = []
while len(message) > character_limit:
split_index = message[:character_limit].rfind(" ")
if split_index == -1:
# No space found, just split at the character limit
split_index = character_limit
else:
# Else space is found, split after the space
split_index += 1
messages.append(message[:split_index])
message = message[split_index:]
messages.append(message)
return messages
# Test code
test_string = "this is a test string to see if this works right"
test_string = "thisisateststringwithnospaces"
for character_limit in range(1, 10):
print ("limit", character_limit, ": ", split_message(test_string, character_limit))
you can take the characters and read it from the end (with a decremental loop for until find space) It should look like :
for i in range(len(text)-1, 0, -1):
if text[i]==' ':
break
print "found the last space !"
EDIT
VeryLongText="the very long text..[..]....here"
text = VeryLongText[0:1999]
for i in range(len(text)-1, 0, -1):
if text[i]==' ':
Var=text[0:i]
break
print Var
Var will be your text (the first 2000 characters of the long text) until the last space
Use a generator.
def messages_from_post(post, limit=2000):
while len(post) > limit:
try:
i = post.rindex(' ', 0, limit + 1)
except ValueError:
i = limit
message, post = map(str.lstrip, [post[:i], post[i:]])
yield message
yield post

Extract values from string

I want to extract certain values from a string in python.
snp_1_881627 AA=G;ALLELE=A;DAF_GLOBAL=0.473901;GENE_TRCOUNT_AFFECTED=1;GENE_TRCOUNT_TOTAL=1;SEVERE_GENE=ENSG00000188976;SEVERE_IMPACT=SYNONYMOUS_CODON;TR_AFFECTED=FULL;ANNOTATION_CLASS=REG_FEATURE,SYNONYMOUS_CODON,ACTIVE_CHROM,NC_TRANSCRIPT_VARIANT,NC_TRANSCRIPT_VARIANT;A_A_CHANGE=.,L,.,.,.;A_A_LENGTH=.,750,.,.,.;A_A_POS=.,615,.,.,.;CELL=GM12878,.,GM12878,.,.;CHROM_STATE=.,.,11,.,.;EXON_NUMBER=.,16/19,.,.,.;GENE_ID=.,ENSG00000188976,.,ENSG00000188976,ENSG00000188976;GENE_NAME=.,NOC2L,.,NOC2L,NOC2L;HGVS=.,c.1843N>T,.,n.3290N>T,n.699N>T;REG_ANNOTATION=H3K36me3,.,.,.,.;TR_BIOTYPE=.,PROTEIN_CODING,.,PROCESSED_TRANSCRIPT,PROCESSED_TRANSCRIPT;TR_ID=.,ENST00000327044,.,ENST00000477976,ENST00000483767;TR_LENGTH=.,2790,.,4201,1611;TR_POS=.,1893,.,3290,699;TR_STRAND=.,-1,.,-1,-1
Output:
GENE_ID GENE_NAME EXON_NUMBER SEVERE_IMPACT
snp_1_881627 ENSG00000188976 NOC2L 16/19 SYNONYMOUS_CODON
If the string has values for each of those variables(GENE_ID,GENE_NAME,EXON_NUMBER) existing then output, else "NA"(variables don't exist or their values don't exist).In some cases,these variables don't exist in the string.
Which string method should I use to accomplish this?Should I split my string before extracting any values?I have 10k rows to extract values for each snp_*
string=string.split(';')
P.S. I am a newbie in python
There are two general strategies for this - split and regex.
To use split, first split off the row label (snp_1_881627):
rowname, data = row.split()
Then, you can split data into the individual entries using the ; separator:
data = data.split(';')
Since you need to get the value of certain keys, we can turn it into a dictionary:
dataDictionary = {}
for entry in data:
entry = entry.split('=')
dataDictionary[entry[0]] = entry[1] if len(entry) > 1 else None
Then you can simply check if the keys are in dataDictionary, and if so grab their values.
Using split is nice in that it will index everything in the data string, making it easy to grab whichever ones you need.
If the ones you need will not change, then regex might be a better option:
>>> import re
>>> re.search('(?<=GENE_ID=)[^;]*', 'onevalue;GENE_ID=SOMETHING;othervalue').group()
'SOMETHING'
Here I'm using a "lookbehind" to match one of the keywords, then grabbing the value from the match using group(). Putting your keywords into a list, you could find all the values like this:
import re
...
keywords = ['GENE_ID', 'GENE_NAME', 'EXON_NUMBER', 'SEVERE_IMPACT']
desiredValues = {}
for keyword in keywords:
match = re.search('(?<={}=)[^;]*'.format(keyword), string_to_search)
desiredValues[keyword] = match.group() if match else DEFAULT_VALUE
I think this is going to be the solution you are looking for.
#input
user_in = 'snp_1_881627 AA=G;ALLELE=A;DAF_GLOBAL=0.473901;GENE_TRCOUNT_AFFECTED=1;GENE_TRCOUNT_TOTAL=1;SEVERE_GENE=ENSG00000188976;SEVERE_IMPACT=SYNONYMOUS_CODON;TR_AFFECTED=FULL;ANNOTATION_CLASS=REG_FEATURE,SYNONYMOUS_CODON,ACTIVE_CHROM,NC_TRANSCRIPT_VARIANT,NC_TRANSCRIPT_VARIANT;A_A_CHANGE=.,L,.,.,.;A_A_LENGTH=.,750,.,.,.;A_A_POS=.,615,.,.,.;CELL=GM12878,.,GM12878,.,.;CHROM_STATE=.,.,11,.,.;EXON_NUMBER=.,16/19,.,.,.;GENE_ID=.,ENSG00000188976,.,ENSG00000188976,ENSG00000188976;GENE_NAME=.,NOC2L,.,NOC2L,NOC2L;HGVS=.,c.1843N>T,.,n.3290N>T,n.699N>T;REG_ANNOTATION=H3K36me3,.,.,.,.;TR_BIOTYPE=.,PROTEIN_CODING,.,PROCESSED_TRANSCRIPT,PROCESSED_TRANSCRIPT;TR_ID=.,ENST00000327044,.,ENST00000477976,ENST00000483767;TR_LENGTH=.,2790,.,4201,1611;TR_POS=.,1893,.,3290,699;TR_STRAND=.,-1,.,-1,-1'
#set some empty vars
user_in = user_in.split(';')
final_output = ""
GENE_ID_FOUND = False
GENE_NAME_FOUND = False
EXON_NUMBER_FOUND = False
GENE_ID_OUTPUT = ''
GENE_NAME_OUTPUT = ''
EXON_NUMBER_OUTPUT = ''
SEVERE_IMPACT_OUTPUT = ''
for x in range(0, len(user_in)):
if x == 0:
first_line_count = 0
first_line_print = ''
while(user_in[0][first_line_count] != " "):
first_line_print += user_in[0][first_line_count]
first_line_count += 1
final_output += first_line_print + "\t"
else:
if user_in[x][0:11] == "SEVERE_GENE":
GENE_ID_OUTPUT += user_in[x][12:] + "\t"
GENE_ID_FOUND = True
if user_in[x][0:9] == "GENE_NAME":
GENE_NAME_OUTPUT += user_in[x][10:] + "\t"
GENE_NAME_FOUND = True
if user_in[x][0:11] == "EXON_NUMBER":
EXON_NUMBER_OUTPUT += user_in[x][12:] + "\t"
EXON_NUMBER_FOUND = True
if user_in[x][0:13] == "SEVERE_IMPACT":
SEVERE_IMPACT_OUTPUT += user_in[x][14:] + "\t"
if GENE_ID_FOUND == True:
final_output += GENE_ID_OUTPUT
else:
final_output += "NA"
if GENE_NAME_FOUND == True:
final_output += GENE_NAME_OUTPUT
else:
final_output += "NA"
if EXON_NUMBER_FOUND == True:
final_output += EXON_NUMBER_OUTPUT
else:
final_output += "NA"
final_output += SEVERE_IMPACT_OUTPUT
print(final_output)

Categories

Resources