Find the year with the most number of people alive in Python - python
Given a list of people with their birth and end years (all between 1900 and 2000), find the year with the most number of people alive.
Here is my somewhat brute-force solution:
def most_populated(population, single=True):
years = dict()
for person in population:
for year in xrange(person[0], person[1]):
if year in years:
years[year] += 1
else:
years[year] = 0
return max(years, key=years.get) if single else \
[key for key, val in years.iteritems() if val == max(years.values())]
print most_populated([(1920, 1939), (1911, 1944),
(1920, 1955), (1938, 1939)])
print most_populated([(1920, 1939), (1911, 1944),
(1920, 1955), (1938, 1939), (1937, 1940)], False)
I'm trying to find a more efficient way to solve this problem in Python. Both - readability and efficiency counts. Moreover, for some reason my code won't print [1938, 1939] while it should.
Update
Input is a list of tuples, where first element of a tuple is a year when person was born, and second element of a tuple is the year of death.
Update 2
End year (2nd part of tuple) counts as well as a year of the person being alive (so If the person dies in Sept 1939 (we don't care about the month), he is actually alive in 1939, at least part of it). That should fix the 1939' missing in results.
Best solution?
While readability counts in favor of #joran-beasley, for bigger input most efficient algorithm was provided by #njzk2. Thanks #hannes-ovrén for providing analysis in IPython notebook on Gist
Another solution I just though of:
Create 2 tables, birthdates and deathdates.
Accumulate birth dates and death dates in those tables.
Browse those tables to accumulate the number of alive people at the time.
Grand total complexity is O(n)
Implementation
from collections import Counter
def most_populated(population, single=True):
birth = map(lambda x: x[0], population)
death = map(lambda x: x[1] + 1, population)
b = Counter(birth)
d = Counter(death)
alive = 0
years = {}
for year in range(min(birth), max(death) + 1):
alive = alive + b[year] - d[year]
years[year] = alive
return max(years, key=years.get) if single else \
[key for key, val in years.iteritems() if val == max(years.values())]
Better
from collections import Counter
from itertools import accumulate
import operator
def most_populated(population, single=True):
delta = Counter(x[0] for x in population)
delta.subtract(Counter(x[1]+1 for x in population))
start, end = min(delta.keys()), max(delta.keys())
years = list(accumulate(delta[year] for year in range(start, end)))
return max(enumerate(years), key=operator.itemgetter(1))[0] + start if single else \
[i + start for i, val in enumerate(years) if val == max(years)]
>>> from collections import Counter
>>> from itertools import chain
>>> def most_pop(pop):
... pop_flat = chain.from_iterable(range(i,j+1) for i,j in pop)
... return Counter(pop_flat).most_common()
...
>>> most_pop([(1920, 1939), (1911, 1944), (1920, 1955), (1938, 1939)])[0]
I would go like this:
Sort persons by birth year (unborn list)
Starting from the first born
Put that person in the alive list
Using an insertion sort by date of death (the list stays sorted, so use a binary search)
Until you reach a person that was not born that year
Then, starting from the person in the alive list that dies first, remove it from the list.
Put the size of the alive list in a dict
Increment the year
Loop until the unborn and alive lists are empty
Complexity should be around O((m + n) * log(m)) (each year is considered only once, and each person only twice, multiplied by the insertion cost in the alive list)
Implementation
from bisect import insort
def most_populated(population, single=True):
years = dict()
unborn = sorted(population, key=lambda x: -x[0])
alive = []
dead = []
for year in range(unborn[-1][0], max(population, key=lambda x: x[1])[1] + 1):
while unborn and unborn[-1][0] == year:
insort(alive, -unborn.pop()[1])
while alive and alive[-1] == -(year - 1):
dead.append(-alive.pop())
years[year] = len(alive)
return max(years, key=years.get) if single else \
[key for key, val in years.iteritems() if val == max(years.values())]
We can also use numpy slicing, which is quite neat, and should also be quite efficient:
import numpy as np
from collections import namedtuple
Person = namedtuple('Person', ('birth', 'death'))
people = [Person(1900,2000), Person(1950,1960), Person(1955, 1959)]
START_YEAR = 1900
END_YEAR = 2000
people_alive = np.zeros(END_YEAR - START_YEAR + 1) # Alive each year
for p in people:
a = p.birth - START_YEAR
b = p.death - START_YEAR + 1 # include year of death
people_alive[a:b] += 1
# Find indexes of maximum aliveness and convert to year
most_alive = np.flatnonzero(people_alive == people_alive.max()) + START_YEAR
EDIT It seems like the namedtuple adds a bit of overhead, so to speed up a bit more, remove the namedtuple and do
for birth, death in people: instead.
Just put the birth and death years into a dict. If it is birth, increase the value by 1. or vice versa.
Sort the dict by keys and iterate by reading the current number of the alive people.
Follow the 'maxAlive' an 'theYear' to get the first year with the highest number
years = {}
for p in people:
if p.birth in years:
years[p.birth] += 1
else:
years[p.birth] = 1
if p.death in years:
years[p.death] -= 1
else:
years[p.death] = -1
alive = 0
maxAlive = 0
theYear = people[0].birth
for year in sorted(years):
alive += years[year]
if alive > maxAlive:
maxAlive = alive
theYear = year
Without importing anything, and using a class for readability, here's my solution. Let me know what you think! I also made a separate function for getMaxBirthYear in case you're at an interview and someone wants you to code that out rather than using built in functions (I used them :) )
class Person:
def __init__(self, birth=None, death=None):
self.birth=birth
self.death=death
def getPopulationPeak(people):
maxBirthYear = getMaxBirthYear(people)
deltas = getDeltas(people, maxBirthYear)
currentSum = 0
maxSum = 0
maxYear = 0
for year in sorted(deltas.keys()):
currentSum += deltas[year]
if currentSum > maxSum:
maxSum = currentSum
maxYear = year
return maxYear, maxSum
def getMaxBirthYear(people):
return max(people, key=lambda x: x.birth).birth
def getDeltas(people, maxBirthYear):
deltas = dict()
for person in people:
if person.birth in deltas.keys():
deltas[person.birth] += 1
else:
deltas[person.birth] = 1
if person.death + 1 in deltas.keys():
deltas[person.death + 1] -= 1
elif person.death + 1 not in deltas.keys() and person.death <= maxBirthYear: # We can skip deaths after the last birth year
deltas[person.death + 1] = -1
return deltas
testPeople = [
Person(1750,1802),
Person(2000,2010),
Person(1645,1760),
Person(1985,2002),
Person(2000,2050),
Person(2005,2080),
]
print(getPopulationPeak(testPeople))
How about this one:
def max_pop(pop):
p = 0; max = (0,0)
for y,i in sorted(chain.from_iterable([((b,1), (d+1,-1)) for b,d in pop])):
p += i
if p > max[1]: max=(y,p)
return max
It's not affected by the year span but is nlogn in the |pop| (unless you'd roll out a radix sort which would be ~ 10n for a thousand year span and should be faster for |pop|>1000 ). Can't have both. A very general solution would have to scan first and decide which algo to use based on measured year span and |pop|.
my answer
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class AlogrimVarsta {
public static void main(String args[]) {
int startYear = 1890;
int stopYear = 2000;
List<Person> listPerson = new LinkedList<>();
listPerson.add(new Person(1910, 1940));
listPerson.add(new Person(1920, 1935));
listPerson.add(new Person(1900, 1950));
listPerson.add(new Person(1890, 1920));
listPerson.add(new Person(1890, 2000));
listPerson.add(new Person(1945, 2000));
Map<Integer, Integer> mapPersoaneCareAuTrait = new LinkedHashMap<>();
for (int x = startYear; x <= stopYear; x++) {
mapPersoaneCareAuTrait.put(x, 0);
}
for (int x = startYear; x <= stopYear; x++) {
for (Person per : listPerson) {
int value = mapPersoaneCareAuTrait.get(x);
if (per.getBorn() == x) {
mapPersoaneCareAuTrait.put(x, value + 1);
continue;
}
if (per.getDie() == x) {
mapPersoaneCareAuTrait.put(x, value + 1);
continue;
}
if ((per.getDie() - per.getBorn() > per.getDie() - x) && (per.getDie() - x > 0)) {
mapPersoaneCareAuTrait.put(x, value + 1);
continue;
}
}
}
for (Map.Entry<Integer, Integer> mapEntry : mapPersoaneCareAuTrait.entrySet()) {
System.out.println("an " + mapEntry.getKey() + " numar " + mapEntry.getValue());
}
}
static class Person {
final private int born;
final private int die;
public Person(int pBorn, int pDie) {
die = pDie;
born = pBorn;
}
public int getBorn() {
return born;
}
public int getDie() {
return die;
}
}
}
l = [(1920, 1939), (1911, 1944),(1920, 1955), (1938, 1939)]
union = set({})
for i in l:
temp = set(range(min(i), max(i) + 1))
if len(union) == 0:
union = temp
union = temp & union
print(union)
I came over the following code that is exactly what you need.
Let's say the range of years is 1900 - 2000
Steps of the algorithm
Construct an array X of 100 integers (all initialized to zero; 101 integers if the year 2000 is included).
For each of the N people, increment X[birth year - 1900] by one and decrement X[death year - 1900] by one.
Iterate through X, maintaining a sum of each element as you go. The year with the most people alive is 1900 plus the index where the sum is maximum.
Code (Python as requested)
def year_with_max_population(people):
population_changes = [0 for _ in xrange(1900, 2000)]
for person in people:
population_changes[person.birth_year - 1900] += 1
population_changes[person.death_year - 1900] -= 1
max_population = 0
max_population_index = 0
population = 0
for index, population_change in enumerate(population_changes):
population += population_change
if population > max_population:
max_population = population
max_population_index = index
return 1900 + max_population_index
credit 'Brian Schmitz' here
Related
How can I optimize a code that is using xarray for better performance?
I'm trying to extract climate data from various .nc files I have but the process is taking extremely long, I suspect it has something to do with the fact that I'm trying to extract the data for every day of June, July, August for the next 79 years. But I'm a novice programmer and I realize there might've been a few oversights by me (efficiency wise) that might've resulted in a slightly better performance. This is the snippet def calculateTemp(coords, year, model): """ takes in all coordinates of a line between two grid stations and the year converts the year into date takes average of temperature of each day of the month of June for each coordinate and then takes average of all coordinates to find average temp for that line for the month of June """ print(year) # coords represents a list of different sets of coordinates between two grids temp3 = 0 # sum of all temps of all coordinates for i in range(0, len(coords)): temp2 = 0 counter = 0 # this loop represents that the 15 years data is being extracted for # each coordinate set and average of those 15 years is being taken for p in range(0, 15): temp1 = 0 # sum of all temps for one coordinate in all days of June, tuly, august if year+ p < 100: # this loop represents the months of jun, jul, aug for j in range(6, 9): # 30 days of each month for k in range(1, 31): if k < 10: # this if-else makes a string of date date = '20'+str(year+p)+'-0'+str(j)+'-0'+str(k) else: date = '20'+str(year+p)+'-0'+str(j)+'-'+str(k) # there are 3 variants of the climate model # for years upto 2040, between 2041-2070 # and between 2071 and 2099 # hence this if else block if year+p < 41: temp1 += model[0]['tasmax'].sel( lon=coords[i][1], lat=coords[i][0], time=date, method='nearest').data[0] elif year+p >= 41 and year+p <71: temp1 += model[1]['tasmax'].sel( lon=coords[i][1], lat=coords[i][0], time=date, method='nearest').data[0] else: temp1 += model[2]['tasmax'].sel( lon=coords[i][1], lat=coords[i][0], time=date, method='nearest').data[0] counter += 1 avg = temp1/(len(range(0,30))*len(range(6,9))) temp2 += avg temp3 += temp2/counter Tamb = temp3/len(coords) return Tamb Is there anyway I can increase the performance of this code and optimize it?
I just replaced the innermost loops k in range(1,31)and j in range(6,9)into a dict comprehension to generate all the dates and corresponding value from your model. Then simply averaged these values for every value of p and then for every coord in coords. Give this a shot. Dicts should make the processing faster. Also check if the averages are exactly how you are calculating them in your function. def build_date(year,p,j,k): return '20'+str(year+p)+'-0'+str(j)+'-0'+str(k) if k<10 else '20'+str(year+p)+'-0'+str(j)+'-'+str(k) def calculateTemp(coords, year, model): func2 = lambda x,date:model[x]['tasmax'].sel(lon=coords[i][1], lat=coords[i][0], time=date, method='nearest').data[0] print(year) out = {} for i in range(len(coords)): inner = {} for p in range(0,15): if year + p < 100: dates = {build_date(year,p,j,k):func2(0,build_date(year,p,j,k)) if year+p<41 \ else func2(1,build_date(year,p,j,k)) if (year+p >= 41 and year+p <71) \ else func2(2,build_date(year,p,j,k)) for j in range(6,9) \ for k in range(1,31) } inner[p] = sum([v for k,v in dates.items()])/len(dates) out[i] = inner coord_averages = {k : sum(v.values())/len(v) for k,v in out.items() } Tamb = sum([v for k,v in coord_averages.items()])/len(coord_averages) return Tamb
How to optimize an O(N*M) to be O(n**2)?
I am trying to solve USACO's Milking Cows problem. The problem statement is here: https://train.usaco.org/usacoprob2?S=milk2&a=n3lMlotUxJ1 Given a series of intervals in the form of a 2d array, I have to find the longest interval and the longest interval in which no milking was occurring. Ex. Given the array [[500,1200],[200,900],[100,1200]], the longest interval would be 1100 as there is continuous milking and the longest interval without milking would be 0 as there are no rest periods. I have tried looking at whether utilizing a dictionary would decrease run times but I haven't had much success. f = open('milk2.in', 'r') w = open('milk2.out', 'w') #getting the input farmers = int(f.readline().strip()) schedule = [] for i in range(farmers): schedule.append(f.readline().strip().split()) #schedule = data minvalue = 0 maxvalue = 0 #getting the minimums and maximums of the data for time in range(farmers): schedule[time][0] = int(schedule[time][0]) schedule[time][1] = int(schedule[time][1]) if (minvalue == 0): minvalue = schedule[time][0] if (maxvalue == 0): maxvalue = schedule[time][1] minvalue = min(schedule[time][0], minvalue) maxvalue = max(schedule[time][1], maxvalue) filled_thistime = 0 filled_max = 0 empty_max = 0 empty_thistime = 0 #goes through all the possible items in between the minimum and the maximum for point in range(minvalue, maxvalue): isfilled = False #goes through all the data for each point value in order to find the best values for check in range(farmers): if point >= schedule[check][0] and point < schedule[check][1]: filled_thistime += 1 empty_thistime = 0 isfilled = True break if isfilled == False: filled_thistime = 0 empty_thistime += 1 if (filled_max < filled_thistime) : filled_max = filled_thistime if (empty_max < empty_thistime) : empty_max = empty_thistime print(filled_max) print(empty_max) if (filled_max < filled_thistime): filled_max = filled_thistime w.write(str(filled_max) + " " + str(empty_max) + "\n") f.close() w.close() The program works fine, but I need to decrease the time it takes to run.
A less pretty but more efficient approach would be to solve this like a free list, though it is a bit more tricky since the ranges can overlap. This method only requires looping through the input list a single time. def insert(start, end): for existing in times: existing_start, existing_end = existing # New time is a subset of existing time if start >= existing_start and end <= existing_end: return # New time ends during existing time elif end >= existing_start and end <= existing_end: times.remove(existing) return insert(start, existing_end) # New time starts during existing time elif start >= existing_start and start <= existing_end: # existing[1] = max(existing_end, end) times.remove(existing) return insert(existing_start, end) # New time is superset of existing time elif start <= existing_start and end >= existing_end: times.remove(existing) return insert(start, end) times.append([start, end]) data = [ [500,1200], [200,900], [100,1200] ] times = [data[0]] for start, end in data[1:]: insert(start, end) longest_milk = 0 longest_gap = 0 for i, time in enumerate(times): duration = time[1] - time[0] if duration > longest_milk: longest_milk = duration if i != len(times) - 1 and times[i+1][0] - times[i][1] > longest_gap: longes_gap = times[i+1][0] - times[i][1] print(longest_milk, longest_gap)
As stated in the comments, if the input is sorted, the complexity could be O(n), if that's not the case we need to sort it first and the complexity is O(nlog n): lst = [ [300,1000], [700,1200], [1500,2100] ] from itertools import groupby longest_milking = 0 longest_idle = 0 l = sorted(lst, key=lambda k: k[0]) for v, g in groupby(zip(l[::1], l[1::1]), lambda k: k[1][0] <= k[0][1]): l = [*g][0] if v: mn, mx = min(i[0] for i in l), max(i[1] for i in l) if mx-mn > longest_milking: longest_milking = mx-mn else: mx = max((i2[0] - i1[1] for i1, i2 in zip(l[::1], l[1::1]))) if mx > longest_idle: longest_idle = mx # corner case, N=1 (only one interval) if len(lst) == 1: longest_milking = lst[0][1] - lst[0][0] print(longest_milking) print(longest_idle) Prints: 900 300 For input: lst = [ [500,1200], [200,900], [100,1200] ] Prints: 1100 0
Python: logic error in calculating a year (leap)
So I this code which is suppose to return a list with the closest leap year of a list of years. For example: calling the function with [1995 1750 2018] should return 1996 1748 2016 Which it does for that set of numbers. The problem I am having is that when a leap year is in the input for example 2008 it does not give me back the closest leap year to 2008. I get back 2008. Any suggestions as to how I can modify the code to make it work? code def is_leap(year): leap = False if year % 4 == 0: if year % 100 != 0 or year % 400 == 0: leap = True return leap major_b = [] major_f = [] newLst = [] def year_forward(yearBounds): for item in yearBounds: counter = 0 while not is_leap(item): item = item + 1 counter += 1 major_f.append([item, counter]) return major_f def year_backward(yearBounds): for item in yearBounds: counter = 0 while not is_leap(item): item = item - 1 counter -= 1 major_b.append([item,counter]) return major_b def findLastLeapYears(yearBounds): forward = year_forward(yearBounds) backward = year_backward(yearBounds) counter = 0 for item in forward: if abs(item[1]) < abs(backward[counter][1]): newLst.append (str(item[0])) counter+=1 elif abs(item[1]) == abs(backward[counter][1]): if item[0] < backward[counter][0]: newLst.append (str(item[0])) counter += 1 else: newLst.append (str(backward[counter][0])) counter += 1 else: newLst.append (str(backward[counter][0])) counter+=1 return newLst
I'd avoid trying to roll your own leap year detection code. Use calendar.isleap to determine whether a year is a leap year or not. Then go in a loop, like this: import calendar def find_nearest_leap(year): offset = 1 while True: if calendar.isleap(year - offset): return year - offset if calendar.isleap(year + offset): return year + offset offset += 1 To find the list of nearest leap years for a list of values, do this: nearest_leap_years = [find_nearest_leap(year) for year in years] Where years is the list of years you are interested in. I'm also assuming the nearest leap year isn't the year itself, which seems to be a constraint of the problem...
Creating histogram bins from Django queries
I'm trying to create bins with the count of prices to be used for a histogram. I want the bins to be 0-1000, 1000-2000, 2000-3000 and so forth. If I just do group by I get way to many different bins. The code I've written seems to end in a infinite loop (or at least the script is still running after an hour). I'm not sure how to do it correctly. Here is the code I wrote: from itertools import zip_longest def price_histogram(area_id, agency_id): # Get prices and total count for competitors query = HousePrice.objects.filter(area_id=area_id, cur_price__range=(1000,30000)).exclude(agency_id=agency_id) count = query.values('cur_price').annotate(count=Count('cur_price')).order_by('cur_price') total = query.count() # Get prices and total count for selected agency query_agency = HousePrice.objects.filter(area_id=area_id, agency_id=agency_id, cur_price__range=(1000,30000)) count_agency = query_agency.values('cur_price').annotate(count=Count('cur_price')).order_by('cur_price') total_agency = query_agency.count() # Make list for x and y values x_comp = [] y_comp = [] x_agency = [] y_agency = [] bin_start = 0 bin_end = 1000 _count_comp = 0 _count_agency = 0 for row_comp, row_agency in zip_longest(count, count_agency, fillvalue={}): while bin_start < int(row_comp['cur_price']) < bin_end: _count_comp += row_comp['count'] _count_agency += row_agency.get('count', 0) bin_start += 1000 bin_end += 1000 x_comp.append(str(bin_start) + "-" + str(bin_end) + " USD") x_agency.append(str(bin_start) + "-" + str(bin_end) + " USD") y_comp.append(_count_comp/total) y_agency.append(_count_agency/total_agency) return {'x_comp': x_comp, 'y_comp': y_comp, 'x_agency': x_agency, 'y_agency': y_agency} I'm using Python 3.5 and Django 1.10.
I'm a little late, but maybe the django-pivot library does what you want. from django_pivot.histogram import histogram query = HousePrice.objects.filter(area_id=area_id, cur_price__range=(1000,30000)).exclude(agency_id=agency_id hist = histogram(query, cur_price, bins=[1000:30000:1000])
Find longest sequence of 0's in the integer list
A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,0,0,0,0,0,-2,-3,-4,-5,0,0,0] Return initial and ending index of longest sequence of 0's in the list. As, longest sequence of 0's in above list is 0,0,0,0,0,0,0,0 so it should return 12,19 as starting and ending index.Please help with some one line python code. I tried : k = max(len(list(y)) for (c,y) in itertools.groupby(A) if c==0) print(k) which return 8 as the max length. Now, how to find start and end index of longest sequence?
you can first use enumerate to zip the item with index, and then itertools.groupby(list,operator.itemgetter(1)) to group by item, filter only 0s using list(y) for (x,y) in list if x == 0, and at last max(list, key=len) to get the longest sequence. import itertools,operator r = max((list(y) for (x,y) in itertools.groupby((enumerate(A)),operator.itemgetter(1)) if x == 0), key=len) print(r[0][0]) # prints 12 print(r[-1][0]) # prints 19
You can try this: A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,0,0,0,0,0,2,-3,-4,-5,0,0,0] count = 0 prev = 0 indexend = 0 for i in range(0,len(A)): if A[i] == 0: count += 1 else: if count > prev: prev = count indexend = i count = 0 print("The longest sequence of 0's is "+str(prev)) print("index start at: "+ str(indexend-prev)) print("index ends at: "+ str(indexend-1)) Output: The longest sequence of 0's ist 8 index start at: 12 index ends at: 19
A nice concise native python approach target = 0 A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,0,0,0,0,0,2,-3,-4,-5,0,0,0] def longest_seq(A, target): """ input list of elements, and target element, return longest sequence of target """ cnt, max_val = 0, 0 # running count, and max count for e in A: cnt = cnt + 1 if e == target else 0 # add to or reset running count max_val = max(cnt, max_val) # update max count return max_val
Now that you have the length, find that k-length sequence of 0's in the original list. Expanding the stuff you'll eventually work into one line: # k is given in your post k_zeros = [0]*k for i in range(len(A)-k): if A[i:i+k] == k_zeros: break # i is the start index; i+k-1 is the end Can you wrap this into a single statement now?
Ok, as one long disgusting line! "-".join([sorted([list(y) for c,y in itertools.groupby([str(v)+"_"+str(i) for i,v in enumerate(A)], lambda x: x.split("_")[0]) if c[0] == '0'],key=len)[-1][a].split("_")[1] for a in [0,-1]]) It keeps track of indices by turning [1,2,0...] into ["1_0","2_1","0_2",..] and then doing some splitting and parsing. Yes it's very ugly and you should go with one of the other answers but I wanted to share
This solution i submitted in Codility with 100 percent efficieny. class Solution { public int solution(int N) { int i = 0; int gap = 0; `bool startZeroCount = false; List<int> binaryArray = new List<int>(); while (N > 0) { binaryArray.Add(N % 2); N = N / 2; i++; } List<int> gapArr = new List<int>(); for (int j = i-1; j >= 0; j--) { if (binaryArray[j] == 1) { if(startZeroCount) { gapArr.Add(gap); gap = 0; } startZeroCount = true; } else if(binaryArray[j] == 0) { if (startZeroCount) gap++; } } gapArr.Sort(); if (gapArr.Count != 0) return gapArr[gapArr.Count - 1]; else return 0;enter code here } }
A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,2,-3,-4,-5,0,0,0,0] count = 0 prev = 0 indexend = 0 indexcount = 0 for i in range(0,len(A)): if A[i] == 0: count += 1 indexcount = i else: if count > prev: prev = count indexend = i count = 0 if count > prev: prev = count indexend = indexcount print("The longest sequence of 0's is "+str(prev)) print("index start at: "+ str(indexend-prev)) print("index ends at: "+ str(indexend-1)) To also consider if longest 0's sequecnces are at the end. Output The longest sequence of 0's is 4 index start at: 18 index ends at: 21
If you would like to completely avoid Python iteration you can do it with Numpy. E.g., for very long sequences, using for loops may be relatively slow. This method will use pre-compiled C for-loops under the hood. The disadvantage is that you have multiple for-loops here. Nonetheless, overall, below algorithm should be a speed gain on longer sequences. import numpy as np def longest_sequence(bool_array): where_not_true = np.where(~bool_array)[0] lengths_plus_1 = np.diff(np.hstack((-1,where_not_true,len(bool_array)))) index = np.cumsum(np.hstack((0,lengths_plus_1))) start_in_lngth = np.argmax(lengths_plus_1) start = index[ start_in_lngth] length = lengths_plus_1[start_in_lngth] - 1 return start, length t = np.array((0,1,0,1,1,1,0,0,1,1,0,1)) print(longest_sequence(t==0)) print(longest_sequence(t==1)) p = np.array((0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1)) print(longest_sequence(p==0)) print(longest_sequence(p==1))