How do I merge ranged elements in a list? - python

testlist = ["13A", "13B", "13C", "23D", "5D", "9B", "9C", "9D"]
What I want the list to be:
["13A-C", "23D", "5D", "9B-D"]
Bonus points if you can sort it (5,9,13,23).
For those interested, this is my current WIP script:
testlist = ["13A", "13B", "13C", "23D", "5D", "9B", "9C", "9D"]
newlist = []
lenlist = len(testlist)
for i in range(lenlist):
#check values of first
indexnum = testlist[i][:-1]
indexchar = testlist[i][-1]
if i == 0:
newlist.append(testlist[i])
if indexnum == (testlist[i-1][:-1]):
newlistvalue = (indexnum + (testlist[i-1][-1]) + "-" + (testlist[i][-1]))
if ((indexchar == "B") and ((testlist[i-1][-1]) == "A")) or ((indexchar == "D") and ((testlist[i-1][-1]) == "C")):
newlist.append(newlistvalue)
lastval = newlist[len(newlist)-1][-1]
lastval2 = newlist[(len(newlist)-2)]
#print(lastval2)
if (indexchar == "C") and (lastval == "B"):
newlistvalue = lastval2[:-1] + indexchar
#print(newlistvalue)
newlist.pop()
newlist.pop()
#print(newlistvalue)
newlist.append(newlistvalue)
else:
newlist.append(testlist[i])
print (newlist)
#print (newlistvalue)

First you'd need to create a dict of the numbers and letters, I assume there will only be one letter in each string. Then you need to sort it and format it. You can use the following:
pairs = defaultdict(list)
for s in testlist:
pairs[s[:-1]].append(s[-1])
result = [f'{k}{"-".join(dict.fromkeys([v[0], v[-1]]))}'
for k, v in sorted(pairs.items(), key=lambda x: int(x[0]))]
['5D', '9B-D', '13A-C', '23D']

On the assumption that each string in the list ends with exactly one letter, you could do this:
import re
testlist = ["13A", "13C", "13B", "23D", "5D", "9B", "9C", "9D"]
def seq(lst):
return lst[0] if len(lst) == 1 else f'{lst[0]}-{lst[-1]}'
def key(e):
return int(re.search('\d+', e)[0])
d = {}
for e in testlist:
d.setdefault(int(e[:-1]), []).append(e[-1])
print(sorted([f'{k}{seq(sorted(v))}' for k, v in d.items()], key=key))
Output:
['5D', '9B-D', '13A-B', '23D']
Note:
Subtle change to OP's data to show that this code can handle out-of-sequence values

Related

I want to make a txt into a dict in python

so i have the following data:
Apples = 1
Bananas = 1
Box_Cashew =
{
Cashew = 1
}
Dragonsfruit = 2
Crate_box_epox=
{
box_epox =
{
epox = 2
}
}
and want to make a Dictionary from this txt, as it follows:
{'Apple':'1' , 'Bananas' : '1' , 'Box_Cashew' : {'Cashew':'1'} , 'Dragonsfruit' : '2', 'Crate_box_epox' : { 'box_epox' : {'epox':2}}}
i tried read line by line with the code below, but i dont know what to do when i got a dict within a dict.
edit:
#PrestonM and #juanpa.arrivillaga
The text file:
unit=9023
state=1411
flags=
{
1NobelChemistry=yes
1NobelLiterature=yes
1NobelMedicine=yes
}
worldmarket=
{
worldmarket_pool=
{
ammunition=204.50766
}
}
The code:
text_file = open("teste.v2", "r")
lines = text_file.readlines()
d={}
for line in lines:
try:
(key1, val) = line.replace('\t','').replace('\n','').split('=')
d[str(key1)] = val
except:
pass
result:
>>>d
{'unit':'9023' , 'state':'1411' , 'flags':{},'1NobelChemistry':'yes' , '1NobelLiterature':'yes' , '1NobelMedicine':'yes','worldmarket':{},'worldmarket_pool':{},'ammunition':'204.50766'}
desired result:
>>>d
{'unit':'9023' , 'state':'1411' , 'flags':{ '1NobelChemistry':'yes' , '1NobelLiterature':'yes' , '1NobelMedicine':'yes'},'worldmarket':{'worldmarket_pool':{'ammunition':'204.50766'}}}
The following seems to work in my tests. I hope the comments and text in the exceptions makes it clear what's being done.
In your code, you're simply adding everything to the same dictionary, which cannot produce the result you're after. As soon as { is encountered, you want to start adding key/value pairs to a new dictionary, that's actually stored in the old dictionary. To accomplish this, the code below keeps track of these dictionaries in a list, adding one if necessary, and removing one from the list to get back to the previous dictionary.
dictStack = [ { } ]
currentKey = None
for l in lines:
l = l.strip() # Remove whitespace at start/end
if not l: # skip empty line
continue
if l == "{":
if currentKey is None:
raise Exception("Current key not set!")
newDict = { }
dictStack[0][currentKey] = newDict
dictStack.insert(0, newDict)
currentKey = None
elif l == "}":
if currentKey is not None:
raise Exception("Current key is set, expecting {")
if len(dictStack) == 1:
raise Exception("Can't remove the final dict, there seems to be an extra '}'")
dictStack.pop(0)
else:
if currentKey is not None:
raise Exception("Current key is set, expecting {")
if not "=" in l:
raise Exception("Expecting '=' in '{}'".format(l))
key, value = l.split("=")
key, value = key.strip(), value.strip() # remove whitespace
if not value:
currentKey = key
else:
dictStack[0][key] = value
if len(dictStack) != 1:
raise Exception("Still more than one dict in the stack")
result = dictStack[0]
Here is my solution which uses recursion:
import re
def text2dict(text):
def f(ls, i):
d = {}
while i < len(ls):
if ls[i]=="}":
return d, i
m = re.match(r"(.*)=(.*)", ls[i])
k = m.group(1).strip()
v = m.group(2).strip()
if not len(v):
v, i = f(ls, i+2)
d[k] = v
i += 1
return d
return f([l.strip() for l in text.split("\n")], 0)
with open("file.txt") as f:
text = f.read()
print(text2dict(text))
def make_dict(text):
l = "{"
t = text.splitlines()
for j,i in enumerate(t):
if i != '':
line = i.replace(" ", "").split('=')
next = t[j + 1].replace(" ", "").split('=')[0] if len(t) > (j + 1) else "}"
if line[0] == "{" or line[0] == "}":
l += line[0]
else:
l += ("'"+line[0] + "':" + ("'" + line[1] + "'" + ("," if next != "}" else "") + "" if line[1] != '' else ""))
l += "}"
print(l)
make_dict(text)
Result:
{'unit':'9023','state':'1411','flags':{'1NobelChemistry':'yes','1NobelLiterature':'yes','1NobelMedicine':'yes'}'worldmarket':{'worldmarket_pool':{'ammunition':'204.50766'}}}

TypeError: can only concatenate list (not "int") to list [Using a Dictionary]

Essentially, I am creating a count by using a dictionary and everytime it sees a "1" in the text file, it adds one to the array.However, I keep getting an error
Letters = ["A","B,"C","D","E","F"]
d= {}
d["A"] = [0]
d["B"] = [0]
d["C"] = [0]
d["D"] = [0]
d["E"] = [0]
file = open('test1.txt','r')
for line in file:
line_array = line.strip("\n").split(",")
for x in range(5):
if line_array[x] == "1":
for y in Letters:
d[y][0] = d[y][0] + 1
BTW, the text file is formatted like this;
1,0,3,0,2
0,2,1,0,3
ETC
EDIT sorry, misworded
You never actually use your dictionary.
Letters= ["A","B","C","D","E"]
d= {key: 0 for key in Letters}
print(Letters)
file = open('test1.txt','r')
for line in file:
line_array = line.strip("\n").split(",")
for x in range(5):
if line_array[x] == "1":
for i, value in enumerate(Letters):
if i == x:
d[value] = d[value] + 1
#print(candidatescores) # No idea where this comes from

Removing the last character of a tuple list inside a 'for' loop?

for start_index in range(0, len(bravo_uncode_message),q):
new_list.extend(bravo_uncode_message[start_index:start_index + q])
new_list.append('0')
I couldn't be able to delete the last 0 that I append to my tuple list iI have tried new_list.pop() and new_list[:-1] and it works but it also deletes 64 more strings that I had and the number of strings is 128.
import binascii
response=raw_input("Decrypt a message or encrypt a message? D/E : ")
if response== "E":
message_to_encrypt=raw_input("Introduce message to encrypt: ")
key= raw_input("Insert key:")
abc=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
if any(word in key for word in abc):
print 'Not a valid key...'
elif len(key)!= 7:
print 'Invalid key'
else:
key=key*len(message_to_encrypt)
binary_message = bin(int(binascii.hexlify(message_to_encrypt), 16))
print binary_message
binary_message= list(binary_message)
del binary_message[0]
print binary_message
del binary_message[0]
recovery_number = binary_message[0]
print binary_message
del binary_message[8 - 1::8]
print binary_message
binary_message = ''.join(map(str,binary_message))
print binary_message
print key
a = binary_message
b = key
xored_message= int(a,2) ^ int(b,2)
print bin(xored_message)[2:].zfill(len(a))
print message_to_encrypt
elif response== "D":
encrypted_message = raw_input("Introduce message to decrypt: ")
beta_uncode_message = [tuple(map(int, format(i, '07b'))) for i in range(128)]
g=len(encrypted_message)/7
n=0
for i in beta_uncode_message:
n=n+1
if n<129:
alpha_uncode_message=beta_uncode_message[n-1]*g
x=tuple(map(str,alpha_uncode_message))
alpha_uncode_message=x
a=encrypted_message
b=alpha_uncode_message
b=''.join(map(str,b))
charlie_uncode_message = int(a, 2) ^ int(b, 2)
bravo_uncode_message=bin(charlie_uncode_message)[2:].zfill(len(a))
new_list = []
q = 7
for start_index in range(0, len(bravo_uncode_message),q):
new_list.extend(bravo_uncode_message[start_index:start_index + q])
new_list.append('0')
bravo_uncode_message = new_list
bravo_uncode_message = ''.join(map(str, bravo_uncode_message))
binary_axis = '0b'
bravo_uncode_message = binary_axis + bravo_uncode_message
print bravo_uncode_message
k = int(bravo_uncode_message, 2)
uncode_message = binascii.unhexlify('%x' % k)
print uncode_message
if n>129:
break
else:
break
Above is my code. The first part is the where I have a problem with when using new_list.pop() and new_list[:-1] — bravo_uncode_message loses 64 strings.
UPDATE:
Convert the binary string to int:
new_list = int(new_list, 2);
Then run new_list[:-1]
Afterward, convert new_list back to binary:
new_list = bin(new_list)
I haven't tried this code, I hope it works.

Cant make a txt output at proper format

This is my code. The problem is that the output looks like this
2015-06-03 19:32:11.225085
{'2015-01-21-20:56:45.mp3': 1}{'negative': -2}{'2015-01-15-21:28:23.mp3': 1}
i want to be like a dictionary.... like this one below so i can read it back as a dictionary and remove the keys from the first subset sum and go on output a second on and so on until no other subset sum exists...
2015-06-03 19:32:11.225085
{'2015-01-21-20:56:45.mp3': 1, 'negative': -2, '2015-01-15-21:28:23.mp3': 1}
Any ideas?
thanx in advanced
import os, sys,re,gzip, pickle
from itertools import combinations
import json
from datetime import datetime
mp3folder = raw_input('Please copy paste the mp3s path:')
lowerin = input('Please enter your total playlist time in NEGATIVE seconds and hit ENTER:')
r = {}
drk = os.listdir(mp3folder)
drifiles = list(drk)
r = dict.fromkeys(drifiles, 0)
for key in r.keys():
print ('Please enter the duration of...')
print(key)
r[key] = input('in seconds and hit ENTER:')
r['negative'] = lowerin
d = {}
neg = 0
pos = 0
dates = datetime.now()
dates = str(dates)
f = open("dict.txt",'ab')
f.write('\n'+dates+'\n')
f.close()
for (w,v) in r.iteritems():
if v > 0: pos += v
else: neg += v
sums = [0] * (pos - neg + 1)
for (w,v) in r.iteritems():
s = sums[:]
if not s[v - neg]: s[v - neg] = (w,)
for (i, w2) in enumerate(sums):
if w2 and not s[i + v]:
s[i + v] = w2 + (w,)
sums = s
if s[-neg]:
for x in s[-neg]:
d = dict([(x, r[x])])
file('dict.txt','a'+'\n').write(repr(d))
break
f = open('dict.txt','r')
filedata = f.read()
f.close()
newdata = filedata.replace("}{",", ")
f = open('lexiko.txt','w')
f.write(newdata)
f.close()
di = eval(open("lexiko.txt").read())
print di
this will do it

Extract values from string

I want to extract certain values from a string in python.
snp_1_881627 AA=G;ALLELE=A;DAF_GLOBAL=0.473901;GENE_TRCOUNT_AFFECTED=1;GENE_TRCOUNT_TOTAL=1;SEVERE_GENE=ENSG00000188976;SEVERE_IMPACT=SYNONYMOUS_CODON;TR_AFFECTED=FULL;ANNOTATION_CLASS=REG_FEATURE,SYNONYMOUS_CODON,ACTIVE_CHROM,NC_TRANSCRIPT_VARIANT,NC_TRANSCRIPT_VARIANT;A_A_CHANGE=.,L,.,.,.;A_A_LENGTH=.,750,.,.,.;A_A_POS=.,615,.,.,.;CELL=GM12878,.,GM12878,.,.;CHROM_STATE=.,.,11,.,.;EXON_NUMBER=.,16/19,.,.,.;GENE_ID=.,ENSG00000188976,.,ENSG00000188976,ENSG00000188976;GENE_NAME=.,NOC2L,.,NOC2L,NOC2L;HGVS=.,c.1843N>T,.,n.3290N>T,n.699N>T;REG_ANNOTATION=H3K36me3,.,.,.,.;TR_BIOTYPE=.,PROTEIN_CODING,.,PROCESSED_TRANSCRIPT,PROCESSED_TRANSCRIPT;TR_ID=.,ENST00000327044,.,ENST00000477976,ENST00000483767;TR_LENGTH=.,2790,.,4201,1611;TR_POS=.,1893,.,3290,699;TR_STRAND=.,-1,.,-1,-1
Output:
GENE_ID GENE_NAME EXON_NUMBER SEVERE_IMPACT
snp_1_881627 ENSG00000188976 NOC2L 16/19 SYNONYMOUS_CODON
If the string has values for each of those variables(GENE_ID,GENE_NAME,EXON_NUMBER) existing then output, else "NA"(variables don't exist or their values don't exist).In some cases,these variables don't exist in the string.
Which string method should I use to accomplish this?Should I split my string before extracting any values?I have 10k rows to extract values for each snp_*
string=string.split(';')
P.S. I am a newbie in python
There are two general strategies for this - split and regex.
To use split, first split off the row label (snp_1_881627):
rowname, data = row.split()
Then, you can split data into the individual entries using the ; separator:
data = data.split(';')
Since you need to get the value of certain keys, we can turn it into a dictionary:
dataDictionary = {}
for entry in data:
entry = entry.split('=')
dataDictionary[entry[0]] = entry[1] if len(entry) > 1 else None
Then you can simply check if the keys are in dataDictionary, and if so grab their values.
Using split is nice in that it will index everything in the data string, making it easy to grab whichever ones you need.
If the ones you need will not change, then regex might be a better option:
>>> import re
>>> re.search('(?<=GENE_ID=)[^;]*', 'onevalue;GENE_ID=SOMETHING;othervalue').group()
'SOMETHING'
Here I'm using a "lookbehind" to match one of the keywords, then grabbing the value from the match using group(). Putting your keywords into a list, you could find all the values like this:
import re
...
keywords = ['GENE_ID', 'GENE_NAME', 'EXON_NUMBER', 'SEVERE_IMPACT']
desiredValues = {}
for keyword in keywords:
match = re.search('(?<={}=)[^;]*'.format(keyword), string_to_search)
desiredValues[keyword] = match.group() if match else DEFAULT_VALUE
I think this is going to be the solution you are looking for.
#input
user_in = 'snp_1_881627 AA=G;ALLELE=A;DAF_GLOBAL=0.473901;GENE_TRCOUNT_AFFECTED=1;GENE_TRCOUNT_TOTAL=1;SEVERE_GENE=ENSG00000188976;SEVERE_IMPACT=SYNONYMOUS_CODON;TR_AFFECTED=FULL;ANNOTATION_CLASS=REG_FEATURE,SYNONYMOUS_CODON,ACTIVE_CHROM,NC_TRANSCRIPT_VARIANT,NC_TRANSCRIPT_VARIANT;A_A_CHANGE=.,L,.,.,.;A_A_LENGTH=.,750,.,.,.;A_A_POS=.,615,.,.,.;CELL=GM12878,.,GM12878,.,.;CHROM_STATE=.,.,11,.,.;EXON_NUMBER=.,16/19,.,.,.;GENE_ID=.,ENSG00000188976,.,ENSG00000188976,ENSG00000188976;GENE_NAME=.,NOC2L,.,NOC2L,NOC2L;HGVS=.,c.1843N>T,.,n.3290N>T,n.699N>T;REG_ANNOTATION=H3K36me3,.,.,.,.;TR_BIOTYPE=.,PROTEIN_CODING,.,PROCESSED_TRANSCRIPT,PROCESSED_TRANSCRIPT;TR_ID=.,ENST00000327044,.,ENST00000477976,ENST00000483767;TR_LENGTH=.,2790,.,4201,1611;TR_POS=.,1893,.,3290,699;TR_STRAND=.,-1,.,-1,-1'
#set some empty vars
user_in = user_in.split(';')
final_output = ""
GENE_ID_FOUND = False
GENE_NAME_FOUND = False
EXON_NUMBER_FOUND = False
GENE_ID_OUTPUT = ''
GENE_NAME_OUTPUT = ''
EXON_NUMBER_OUTPUT = ''
SEVERE_IMPACT_OUTPUT = ''
for x in range(0, len(user_in)):
if x == 0:
first_line_count = 0
first_line_print = ''
while(user_in[0][first_line_count] != " "):
first_line_print += user_in[0][first_line_count]
first_line_count += 1
final_output += first_line_print + "\t"
else:
if user_in[x][0:11] == "SEVERE_GENE":
GENE_ID_OUTPUT += user_in[x][12:] + "\t"
GENE_ID_FOUND = True
if user_in[x][0:9] == "GENE_NAME":
GENE_NAME_OUTPUT += user_in[x][10:] + "\t"
GENE_NAME_FOUND = True
if user_in[x][0:11] == "EXON_NUMBER":
EXON_NUMBER_OUTPUT += user_in[x][12:] + "\t"
EXON_NUMBER_FOUND = True
if user_in[x][0:13] == "SEVERE_IMPACT":
SEVERE_IMPACT_OUTPUT += user_in[x][14:] + "\t"
if GENE_ID_FOUND == True:
final_output += GENE_ID_OUTPUT
else:
final_output += "NA"
if GENE_NAME_FOUND == True:
final_output += GENE_NAME_OUTPUT
else:
final_output += "NA"
if EXON_NUMBER_FOUND == True:
final_output += EXON_NUMBER_OUTPUT
else:
final_output += "NA"
final_output += SEVERE_IMPACT_OUTPUT
print(final_output)

Categories

Resources