How to find all occurrences of a non - contiguous substring in Python? - python

Premise:
I am currently working on the following problem:
http://rosalind.info/problems/sseq/
I must find the index combinations of all occurrences of a substring in a string, where the substring is not necessarily contiguous.
My testing parameters:
Main_String = ACGGTTAACGTGACGGTTAAGSSGSSTSSTSSASSA
Substring = GGTTAA
Non-contiguous means that an occurrence of the Substring in the Main_String may look like this:
GGSTTSAASSS where the substring is GGTTAA and the MAIN_STRING - GGSTTSAASSS.
The substring, although cut by random characters (S in our case) is still to be found in the main string. As such, a possible answer would be (format:Letter((index + 1) in Main_String)) G(1)G(2)T(4)T(5)A(7)A(8) = 1, 2, 4, 5, 7, 8. That is easy enough to get for the first match. I need to get all possible variations though, so if we use my testing parameters from above, correct answers would be:
3, 4, 5, 6, 7, 8 and 3, 12, 17, 18, 19, 20 and 3, 15, 17, 18, 19, 20 and so on up to 21, 24, 27, 30, 33, 36.
Question:
I need an algorithm that can provide me with all possible variations of a non-contiguous substring in a given string.
Issue:
This is the code I have so far, which works to an extent, but does not return all possible variations, only some of them.
dna = ''
counter = -1
dna_subseq = ''
dna_subseq_indexes = []
with open('Rosalind_dna.txt', 'r') as file:
data = file.read().split('\n')
for line in data:
if line == '':
continue
if 'Rosalind' in line and counter < 1:
counter += 1
continue
elif 'Rosalind' not in line and counter < 1:
dna += line
elif 'Rosalind' not in line and counter >= 1:
dna_subseq += line
result = 0
dna_subseq_minus_start = dna_subseq[1:]
def find_next(start_parameter, base):
result_func = dna.find(base, start_parameter)
if result_func + 1 in dna_subseq_indexes_subcombo:
if result_func + 1 == 0:
return
find_next(start_parameter + 1, base)
else:
dna_subseq_indexes_subcombo.append(result_func + 1)
return
for index, value in enumerate(dna):
global_start = index
result = 0
while result != -1:
dna_subseq_indexes_subcombo = []
if value == dna_subseq[0]:
dna_subseq_indexes_subcombo.append(index + 1)
Flag = True
for base in dna_subseq_minus_start:
if Flag:
start = global_start
Flag = False
result = dna.find(base, start)
if result + 1 in dna_subseq_indexes_subcombo:
find_next(start + 1, base)
else:
dna_subseq_indexes_subcombo.append(result + 1)
start += 1
dna_subseq_indexes.append(dna_subseq_indexes_subcombo)
global_start += 1
else:
break
final_result = []
for x in dna_subseq_indexes:
test = x.copy()
test.sort()
if test == x:
final_result.append(x)
else:
continue
print(final_result)

I'm not sure your algorithm could find all solutions even if it would be fixed. I tried this logic instead :
Find the initial sequence closest to the left of dna and search all declinations going from right to left recursively. This way the solutions are automatically sorted.
dna = 'ACGGTTAACGTGACGGTTAAGSSGSSTSSTSSASSA'
dna_len = len(dna)
dna_subseq = 'GGTTAA'
subseq_len = len(dna_subseq)
count = 0
print_mode = True # Prints the solutions, set to False to collect them instead
# Finds a single solution starting from the previous one or from a null solution
def find_one_solution(prev_solution, subseq_start, dna_start):
global dna, dna_subseq, subseq_len, count, mode
searched = dna_subseq[subseq_start]
coll = prev_solution[:subseq_start]
subseq_idx = subseq_start
for i in range(dna_start, len(dna), 1):
letter = dna[i]
if letter == searched:
coll.append(i)
subseq_idx += 1
if (subseq_idx == subseq_len): break
else: searched = dna_subseq[subseq_idx]
if len(coll) < subseq_len: return None
count += 1
if (print_mode): print(coll)
return coll
# Recursive function
def find_all_solutions(solutions, solution, subseq_start, limit):
global dna, dna_subseq, subseq_len, print_mode
for start in range(subseq_len-1, limit-1, -1):
# last element
if start == subseq_len-1:
while True:
temp = find_one_solution(solution, start, solution[-1]+1)
if temp == None: break
else: solution = temp
if (not print_mode): solutions.append(solution)
# other elements
else:
# finds the next solution
temp = find_one_solution(solution, start, solution[start]+1)
if temp == None:
continue
else:
solution = temp
if (not print_mode): solutions.append(solution)
# and restarts from end with subseq_start as the left limit
find_all_solutions(solutions, solution, subseq_len-1, start)
def main():
all_solutions = []
# Finds the initial solution
initial_solution = [0] * subseq_len
initial_solution = find_one_solution(initial_solution, 0, initial_solution[0])
if initial_solution == None:
print("No solution found")
else:
if (not print_mode): all_solutions.append(initial_solution)
# Finds all other solutions
find_all_solutions(all_solutions, initial_solution, subseq_len-1, 0)
if (not print_mode): print(all_solutions)
print("Total count:", count)
if __name__=="__main__":
main()
#289 solutions found : [[2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 12], [2, 3, 4, 5, 6, 18], [2, 3, 4, 5, 6, 19], [2, 3, 4, 5, 6, 32], [2, 3, 4, 5, 6, 35], [2, 3, 4, 5, 7, 12], [2, 3, 4, 5, 7, 18], [2, 3, 4, 5, 7, 19], [2, 3, 4, 5, 7, 32], [2, 3, 4, 5, 7, 35], [2, 3, 4, 5, 12, 18], [2, 3, 4, 5, 12, 19], [2, 3, 4, 5, 12, 32], [2, 3, 4, 5, 12, 35], [2, 3, 4, 5, 18, 19], [2, 3, 4, 5, 18, 32], [2, 3, 4, 5, 18, 35], [2, 3, 4, 5, 19, 32], [2, 3, 4, 5, 19, 35], [2, 3, 4, 5, 32, 35], [2, 3, 4, 10, 12, 18], [2, 3, 4, 10, 12, 19], [2, 3, 4, 10, 12, 32], [2, 3, 4, 10, 12, 35], [2, 3, 4, 10, 18, 19], [2, 3, 4, 10, 18, 32], [2, 3, 4, 10, 18, 35], [2, 3, 4, 10, 19, 32], [2, 3, 4, 10, 19, 35], [2, 3, 4, 10, 32, 35], [2, 3, 4, 16, 18, 19], [2, 3, 4, 16, 18, 32], [2, 3, 4, 16, 18, 35], [2, 3, 4, 16, 19, 32], [2, 3, 4, 16, 19, 35], [2, 3, 4, 16, 32, 35], [2, 3, 4, 17, 18, 19], [2, 3, 4, 17, 18, 32], [2, 3, 4, 17, 18, 35], [2, 3, 4, 17, 19, 32], [2, 3, 4, 17, 19, 35], [2, 3, 4, 17, 32, 35], [2, 3, 4, 26, 32, 35], [2, 3, 4, 29, 32, 35], [2, 3, 5, 10, 12, 18], [2, 3, 5, 10, 12, 19], [2, 3, 5, 10, 12, 32], [2, 3, 5, 10, 12, 35], [2, 3, 5, 10, 18, 19], [2, 3, 5, 10, 18, 32], [2, 3, 5, 10, 18, 35], [2, 3, 5, 10, 19, 32], [2, 3, 5, 10, 19, 35], [2, 3, 5, 10, 32, 35], [2, 3, 5, 16, 18, 19], [2, 3, 5, 16, 18, 32], [2, 3, 5, 16, 18, 35], [2, 3, 5, 16, 19, 32], [2, 3, 5, 16, 19, 35], [2, 3, 5, 16, 32, 35], [2, 3, 5, 17, 18, 19], [2, 3, 5, 17, 18, 32], [2, 3, 5, 17, 18, 35], [2, 3, 5, 17, 19, 32], [2, 3, 5, 17, 19, 35], [2, 3, 5, 17, 32, 35], [2, 3, 5, 26, 32, 35], [2, 3, 5, 29, 32, 35], [2, 3, 10, 16, 18, 19], [2, 3, 10, 16, 18, 32], [2, 3, 10, 16, 18, 35], [2, 3, 10, 16, 19, 32], [2, 3, 10, 16, 19, 35], [2, 3, 10, 16, 32, 35], [2, 3, 10, 17, 18, 19], [2, 3, 10, 17, 18, 32], [2, 3, 10, 17, 18, 35], [2, 3, 10, 17, 19, 32], [2, 3, 10, 17, 19, 35], [2, 3, 10, 17, 32, 35], [2, 3, 10, 26, 32, 35], [2, 3, 10, 29, 32, 35], [2, 3, 16, 17, 18, 19], [2, 3, 16, 17, 18, 32], [2, 3, 16, 17, 18, 35], [2, 3, 16, 17, 19, 32], [2, 3, 16, 17, 19, 35], [2, 3, 16, 17, 32, 35], [2, 3, 16, 26, 32, 35], [2, 3, 16, 29, 32, 35], [2, 3, 17, 26, 32, 35], [2, 3, 17, 29, 32, 35], [2, 3, 26, 29, 32, 35], [2, 9, 10, 16, 18, 19], [2, 9, 10, 16, 18, 32], [2, 9, 10, 16, 18, 35], [2, 9, 10, 16, 19, 32], [2, 9, 10, 16, 19, 35], [2, 9, 10, 16, 32, 35], [2, 9, 10, 17, 18, 19], [2, 9, 10, 17, 18, 32], [2, 9, 10, 17, 18, 35], [2, 9, 10, 17, 19, 32], [2, 9, 10, 17, 19, 35], [2, 9, 10, 17, 32, 35], [2, 9, 10, 26, 32, 35], [2, 9, 10, 29, 32, 35], [2, 9, 16, 17, 18, 19], [2, 9, 16, 17, 18, 32], [2, 9, 16, 17, 18, 35], [2, 9, 16, 17, 19, 32], [2, 9, 16, 17, 19, 35], [2, 9, 16, 17, 32, 35], [2, 9, 16, 26, 32, 35], [2, 9, 16, 29, 32, 35], [2, 9, 17, 26, 32, 35], [2, 9, 17, 29, 32, 35], [2, 9, 26, 29, 32, 35], [2, 11, 16, 17, 18, 19], [2, 11, 16, 17, 18, 32], [2, 11, 16, 17, 18, 35], [2, 11, 16, 17, 19, 32], [2, 11, 16, 17, 19, 35], [2, 11, 16, 17, 32, 35], [2, 11, 16, 26, 32, 35], [2, 11, 16, 29, 32, 35], [2, 11, 17, 26, 32, 35], [2, 11, 17, 29, 32, 35], [2, 11, 26, 29, 32, 35], [2, 14, 16, 17, 18, 19], [2, 14, 16, 17, 18, 32], [2, 14, 16, 17, 18, 35], [2, 14, 16, 17, 19, 32], [2, 14, 16, 17, 19, 35], [2, 14, 16, 17, 32, 35], [2, 14, 16, 26, 32, 35], [2, 14, 16, 29, 32, 35], [2, 14, 17, 26, 32, 35], [2, 14, 17, 29, 32, 35], [2, 14, 26, 29, 32, 35], [2, 15, 16, 17, 18, 19], [2, 15, 16, 17, 18, 32], [2, 15, 16, 17, 18, 35], [2, 15, 16, 17, 19, 32], [2, 15, 16, 17, 19, 35], [2, 15, 16, 17, 32, 35], [2, 15, 16, 26, 32, 35], [2, 15, 16, 29, 32, 35], [2, 15, 17, 26, 32, 35], [2, 15, 17, 29, 32, 35], [2, 15, 26, 29, 32, 35], [2, 20, 26, 29, 32, 35], [2, 23, 26, 29, 32, 35], [3, 9, 10, 16, 18, 19], [3, 9, 10, 16, 18, 32], [3, 9, 10, 16, 18, 35], [3, 9, 10, 16, 19, 32], [3, 9, 10, 16, 19, 35], [3, 9, 10, 16, 32, 35], [3, 9, 10, 17, 18, 19], [3, 9, 10, 17, 18, 32], [3, 9, 10, 17, 18, 35], [3, 9, 10, 17, 19, 32], [3, 9, 10, 17, 19, 35], [3, 9, 10, 17, 32, 35], [3, 9, 10, 26, 32, 35], [3, 9, 10, 29, 32, 35], [3, 9, 16, 17, 18, 19], [3, 9, 16, 17, 18, 32], [3, 9, 16, 17, 18, 35], [3, 9, 16, 17, 19, 32], [3, 9, 16, 17, 19, 35], [3, 9, 16, 17, 32, 35], [3, 9, 16, 26, 32, 35], [3, 9, 16, 29, 32, 35], [3, 9, 17, 26, 32, 35], [3, 9, 17, 29, 32, 35], [3, 9, 26, 29, 32, 35], [3, 11, 16, 17, 18, 19], [3, 11, 16, 17, 18, 32], [3, 11, 16, 17, 18, 35], [3, 11, 16, 17, 19, 32], [3, 11, 16, 17, 19, 35], [3, 11, 16, 17, 32, 35], [3, 11, 16, 26, 32, 35], [3, 11, 16, 29, 32, 35], [3, 11, 17, 26, 32, 35], [3, 11, 17, 29, 32, 35], [3, 11, 26, 29, 32, 35], [3, 14, 16, 17, 18, 19], [3, 14, 16, 17, 18, 32], [3, 14, 16, 17, 18, 35], [3, 14, 16, 17, 19, 32], [3, 14, 16, 17, 19, 35], [3, 14, 16, 17, 32, 35], [3, 14, 16, 26, 32, 35], [3, 14, 16, 29, 32, 35], [3, 14, 17, 26, 32, 35], [3, 14, 17, 29, 32, 35], [3, 14, 26, 29, 32, 35], [3, 15, 16, 17, 18, 19], [3, 15, 16, 17, 18, 32], [3, 15, 16, 17, 18, 35], [3, 15, 16, 17, 19, 32], [3, 15, 16, 17, 19, 35], [3, 15, 16, 17, 32, 35], [3, 15, 16, 26, 32, 35], [3, 15, 16, 29, 32, 35], [3, 15, 17, 26, 32, 35], [3, 15, 17, 29, 32, 35], [3, 15, 26, 29, 32, 35], [3, 20, 26, 29, 32, 35], [3, 23, 26, 29, 32, 35], [9, 11, 16, 17, 18, 19], [9, 11, 16, 17, 18, 32], [9, 11, 16, 17, 18, 35], [9, 11, 16, 17, 19, 32], [9, 11, 16, 17, 19, 35], [9, 11, 16, 17, 32, 35], [9, 11, 16, 26, 32, 35], [9, 11, 16, 29, 32, 35], [9, 11, 17, 26, 32, 35], [9, 11, 17, 29, 32, 35], [9, 11, 26, 29, 32, 35], [9, 14, 16, 17, 18, 19], [9, 14, 16, 17, 18, 32], [9, 14, 16, 17, 18, 35], [9, 14, 16, 17, 19, 32], [9, 14, 16, 17, 19, 35], [9, 14, 16, 17, 32, 35], [9, 14, 16, 26, 32, 35], [9, 14, 16, 29, 32, 35], [9, 14, 17, 26, 32, 35], [9, 14, 17, 29, 32, 35], [9, 14, 26, 29, 32, 35], [9, 15, 16, 17, 18, 19], [9, 15, 16, 17, 18, 32], [9, 15, 16, 17, 18, 35], [9, 15, 16, 17, 19, 32], [9, 15, 16, 17, 19, 35], [9, 15, 16, 17, 32, 35], [9, 15, 16, 26, 32, 35], [9, 15, 16, 29, 32, 35], [9, 15, 17, 26, 32, 35], [9, 15, 17, 29, 32, 35], [9, 15, 26, 29, 32, 35], [9, 20, 26, 29, 32, 35], [9, 23, 26, 29, 32, 35], [11, 14, 16, 17, 18, 19], [11, 14, 16, 17, 18, 32], [11, 14, 16, 17, 18, 35], [11, 14, 16, 17, 19, 32], [11, 14, 16, 17, 19, 35], [11, 14, 16, 17, 32, 35], [11, 14, 16, 26, 32, 35], [11, 14, 16, 29, 32, 35], [11, 14, 17, 26, 32, 35], [11, 14, 17, 29, 32, 35], [11, 14, 26, 29, 32, 35], [11, 15, 16, 17, 18, 19], [11, 15, 16, 17, 18, 32], [11, 15, 16, 17, 18, 35], [11, 15, 16, 17, 19, 32], [11, 15, 16, 17, 19, 35], [11, 15, 16, 17, 32, 35], [11, 15, 16, 26, 32, 35], [11, 15, 16, 29, 32, 35], [11, 15, 17, 26, 32, 35], [11, 15, 17, 29, 32, 35], [11, 15, 26, 29, 32, 35], [11, 20, 26, 29, 32, 35], [11, 23, 26, 29, 32, 35], [14, 15, 16, 17, 18, 19], [14, 15, 16, 17, 18, 32], [14, 15, 16, 17, 18, 35], [14, 15, 16, 17, 19, 32], [14, 15, 16, 17, 19, 35], [14, 15, 16, 17, 32, 35], [14, 15, 16, 26, 32, 35], [14, 15, 16, 29, 32, 35], [14, 15, 17, 26, 32, 35], [14, 15, 17, 29, 32, 35], [14, 15, 26, 29, 32, 35], [14, 20, 26, 29, 32, 35], [14, 23, 26, 29, 32, 35], [15, 20, 26, 29, 32, 35], [15, 23, 26, 29, 32, 35], [20, 23, 26, 29, 32, 35]]
To test the validity of the algorithm, I used a simpler sequence from which all the solutions are easy to find manually :
dna = 'AAAAAAAA'
dna_subseq = 'AAAA'
Note that I worked with zero based indices because it's simpler, but you could easily add 1 to all results if you need it.

Related

how do I compare two lists in python by indexes

I've to compare two lists by indexes, if in list 1 index is equal to 0 than all elements in list2 are 0.
list1 = [0, 1, 1, 0] # that's for example
list2 = [[15, 19, 13, 15, 30, 14, 14], [14, 22, 30, 19, 29, 17, 15], [19, 21, 11, 25, 23, 23, 30], [15, 15, 25, 18, 22, 24, 29], [24, 30, 30, 11, 25, 18, 27]]
output:
list2 = [[0, 0, 0, 0, 0, 0, 0], [14, 22, 30, 19, 29, 17, 15], [19, 21, 11, 25, 23, 23, 30], [0, 0, 0, 0, 0, 0, 0]]
list1 = [0, 1, 1, 0]
list2 = [[15, 19, 13, 15, 30, 14, 14], [14, 22, 30, 19, 29, 17, 15], [19, 21, 11, 25, 23, 23, 30], [15, 15, 25, 18, 22, 24, 29], [24, 30, 30, 11, 25, 18, 27]]
new_list=[]
for i in list1:
for index,j in enumerate(list2):
if(i==index):
new_list.append([x*i for x in j] )
break
print(new_list)
This should also do:
list1 = [0, 1, 1, 0] # that's for example
list2 = [[15, 19, 13, 15, 30, 14, 14], [14, 22, 30, 19, 29, 17, 15], [19, 21, 11, 25, 23, 23, 30], [15, 15, 25, 18, 22, 24, 29], [24, 30, 30, 11, 25, 18, 27]]
result = [[0] * len(list2[index]) if index == 0 else list2[index] for index in list1]
print(result)
My take without any if statements
list1 = [0, 1, 1, 0]
list2 = [[15, 19, 13, 15, 30, 14, 14], [14, 22, 30, 19, 29, 17, 15], [19, 21, 11, 25, 23, 23, 30], [15, 15, 25, 18, 22, 24, 29], [24, 30, 30, 11, 25, 18, 27]]
output_list = []
for index, item in enumerate(list1):
output_list.append([element * item for element in list2[index]])
print(output_list)

New to Python - learning if statements

I would like to try and print Calendar in Matrix format for the whole year using the calendar.monthcalendar function
I'm trying to see if there is a way of incorporating either an if or a while loop so I don't have to run the code 12 times with a different month variable on the end?
So far I ran the below with a different variable to achieve the end result.
print(calendar.monthcalendar(2020,1))
I would like the end result to be a matrix of an entire year like below.
[[0, 0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11, 12], [13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, 0, 0]]
[[0, 0, 0, 0, 0, 1, 2], [3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16], [17, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 0]]
[[0, 0, 0, 0, 0, 0, 1], [2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22], [23, 24, 25, 26, 27, 28, 29], [30, 31, 0, 0, 0, 0, 0]]
[[0, 0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11, 12], [13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 0, 0, 0]]
[[0, 0, 0, 0, 1, 2, 3], [4, 5, 6, 7, 8, 9, 10], [11, 12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23, 24], [25, 26, 27, 28, 29, 30, 31]]
[[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14], [15, 16, 17, 18, 19, 20, 21], [22, 23, 24, 25, 26, 27, 28], [29, 30, 0, 0, 0, 0, 0]]
[[0, 0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11, 12], [13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, 0, 0]]
[[0, 0, 0, 0, 0, 1, 2], [3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16], [17, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 30], [31, 0, 0, 0, 0, 0, 0]]
[[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13], [14, 15, 16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26, 27], [28, 29, 30, 0, 0, 0, 0]]
[[0, 0, 0, 1, 2, 3, 4], [5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 18], [19, 20, 21,22, 23, 24, 25], [26, 27, 28, 29, 30, 31, 0]]
[[0, 0, 0, 0, 0, 0, 1], [2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22], [23, 24, 25, 26, 27, 28, 29], [30, 0, 0, 0, 0, 0, 0]]
[[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13], [14, 15, 16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26, 27], [28, 29, 30, 31, 0, 0, 0]]
You can use a for loop. In each iteration of the loop, the calendar.monthcalendar() function is called. The range() function assigns values to x, from 1 to 12 (13 excluded). Therefore x=1 to x=12 is placed into the calendar.monthcalendar() function 12 times.
for x in range(1,13):
print(calendar.monthcalendar(2020,x))
I would suggest a tutorial, book, or any learning resource because these are the basics of any language you need to grasp before actually starting programming.

Best way to split this list into smaller lists?

I've been trying to wrap my head around the best way to split this list of numbers up that are ordered but broken up in sections. Ex:
data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 29, 30, 31, 32, 33, 35, 36, 44, 45, 46, 47]
I'd like the output to be this..
sliced_data = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],[29, 30, 31, 32, 33],[35, 36],[44, 45, 46, 47]]
I've been trying a while look until it's empty but that isn't working too well..
Edit:
for each_half_hour in half_hour_blocks:
if next_number != each_half_hour:
skippers.append(half_hour_blocks[:next_number])
del half_hour_blocks[:next_number]
next_number = each_half_hour + 1
>>> data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 29, 30, 31, 32, 33, 35, 36, 44, 45, 46, 47]
>>> from itertools import groupby, count
>>> [list(g) for k,g in groupby(data, key=lambda i, c=count():i-next(c))]
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [29, 30, 31, 32, 33], [35, 36], [44, 45, 46, 47]]
I don't see why a while-loop wouldn't work here, unless you're going for something more efficient or succinct.
Something like:
slice = [data.pop(0)]
sliced_data = []
while data:
if data[0] == slice[-1] + 1:
slice.append(data.pop(0))
else:
sliced_data.append(slice)
slice = [data.pop(0)]
sliced_data.append(slice)

Move elements out of sublists into new list

I have a list that holds several sublist, each one of them with a given number of elements inside. I need to move all elements inside all sublists into another list, ie: remove the separation among elements imposed by the sublists.
This is a MWE if what I mean:
a = [[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]], [[17, 18, 19, 20], [21, 22, 23, 24]], [[25, 26, 27, 28], [26, 30, 31, 32], [33, 34, 35, 36]]]
b = []
for elem in a:
for item in elem:
b.append(item)
which results in:
[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24], [25, 26, 27, 28], [26, 30, 31, 32], [33, 34, 35, 36]]
I'm sure there's a more elegant and simpler way to do this in python.
Use itertools.chain.from_iterable:
>>> from itertools import chain
>>> list(chain.from_iterable(a))
[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24], [25, 26, 27, 28], [26, 30, 31, 32], [33, 34, 35, 36]]
Timing comparison:
Try this:
[item for sublist in a for item in sublist]

How to split a list into N random-but-min-sized chunks

For example: I want to split range(37) in n=5 chunks, which each chunk having
len(chunk) >= 4.
>>> def divide(lst, min_size, split_size):
it = iter(lst)
from itertools import islice
size = len(lst)
for i in range(split_size - 1,0,-1):
s = random.randint(min_size, size - min_size * i)
yield list(islice(it,0,s))
size -= s
yield list(it)
>>> list(divide(range(37), 4, 5))
[[0, 1, 2, 3], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], [23, 24, 25, 26, 27], [28, 29, 30, 31], [32, 33, 34, 35, 36]]
>>> list(divide(range(37), 4, 5))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [17, 18, 19, 20, 21, 22], [23, 24, 25, 26], [27, 28, 29, 30, 31], [32, 33, 34, 35, 36]]
>>> list(divide(range(37), 4, 5))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28], [29, 30, 31, 32], [33, 34, 35, 36]]
>>> list(divide(range(37), 4, 5))
[[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [17, 18, 19, 20, 21, 22, 23, 24], [25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36]]
>>>
For example you could initialy set each of n chunks size to 4 and then calculate: r = (m=37 mod n), if m>=20. And then just add 1 to the first chunk and decrease r, 1 to second chunk and decrease r....and repeat until r = 0. Then you have your chunks and you can fill them.
def divide(val, num=5, minSize=4):
''' Divides val into # num chunks with each being at least of size minSize.
It limits max size of a chunk using math.ceil(val/(num-len(chunks)))'''
import random
import math
chunks = []
for i in xrange(num-1):
maxSize = math.ceil(val/(num-len(chunks)))
newSize = random.randint(minSize, maxSize)
val = val - newSize
chunks.append(newSize)
chunks.append(val)
return chunks
Calling divide with different parameters:
>>> divide(37,5,4)
>>> [7, 5, 4, 10, 11]
>>> divide(37,5,4)
>>> [4, 5, 4, 10, 14]
>>> divide(50,6,5)
>>> [6, 8, 8, 5, 9, 14]

Categories

Resources