Find the year with the most number of people alive in Python - python

Given a list of people with their birth and end years (all between 1900 and 2000), find the year with the most number of people alive.
Here is my somewhat brute-force solution:
def most_populated(population, single=True):
years = dict()
for person in population:
for year in xrange(person[0], person[1]):
if year in years:
years[year] += 1
else:
years[year] = 0
return max(years, key=years.get) if single else \
[key for key, val in years.iteritems() if val == max(years.values())]
print most_populated([(1920, 1939), (1911, 1944),
(1920, 1955), (1938, 1939)])
print most_populated([(1920, 1939), (1911, 1944),
(1920, 1955), (1938, 1939), (1937, 1940)], False)
I'm trying to find a more efficient way to solve this problem in Python. Both - readability and efficiency counts. Moreover, for some reason my code won't print [1938, 1939] while it should.
Update
Input is a list of tuples, where first element of a tuple is a year when person was born, and second element of a tuple is the year of death.
Update 2
End year (2nd part of tuple) counts as well as a year of the person being alive (so If the person dies in Sept 1939 (we don't care about the month), he is actually alive in 1939, at least part of it). That should fix the 1939' missing in results.
Best solution?
While readability counts in favor of #joran-beasley, for bigger input most efficient algorithm was provided by #njzk2. Thanks #hannes-ovrén for providing analysis in IPython notebook on Gist

Another solution I just though of:
Create 2 tables, birthdates and deathdates.
Accumulate birth dates and death dates in those tables.
Browse those tables to accumulate the number of alive people at the time.
Grand total complexity is O(n)
Implementation
from collections import Counter
def most_populated(population, single=True):
birth = map(lambda x: x[0], population)
death = map(lambda x: x[1] + 1, population)
b = Counter(birth)
d = Counter(death)
alive = 0
years = {}
for year in range(min(birth), max(death) + 1):
alive = alive + b[year] - d[year]
years[year] = alive
return max(years, key=years.get) if single else \
[key for key, val in years.iteritems() if val == max(years.values())]
Better
from collections import Counter
from itertools import accumulate
import operator
def most_populated(population, single=True):
delta = Counter(x[0] for x in population)
delta.subtract(Counter(x[1]+1 for x in population))
start, end = min(delta.keys()), max(delta.keys())
years = list(accumulate(delta[year] for year in range(start, end)))
return max(enumerate(years), key=operator.itemgetter(1))[0] + start if single else \
[i + start for i, val in enumerate(years) if val == max(years)]

>>> from collections import Counter
>>> from itertools import chain
>>> def most_pop(pop):
... pop_flat = chain.from_iterable(range(i,j+1) for i,j in pop)
... return Counter(pop_flat).most_common()
...
>>> most_pop([(1920, 1939), (1911, 1944), (1920, 1955), (1938, 1939)])[0]

I would go like this:
Sort persons by birth year (unborn list)
Starting from the first born
Put that person in the alive list
Using an insertion sort by date of death (the list stays sorted, so use a binary search)
Until you reach a person that was not born that year
Then, starting from the person in the alive list that dies first, remove it from the list.
Put the size of the alive list in a dict
Increment the year
Loop until the unborn and alive lists are empty
Complexity should be around O((m + n) * log(m)) (each year is considered only once, and each person only twice, multiplied by the insertion cost in the alive list)
Implementation
from bisect import insort
def most_populated(population, single=True):
years = dict()
unborn = sorted(population, key=lambda x: -x[0])
alive = []
dead = []
for year in range(unborn[-1][0], max(population, key=lambda x: x[1])[1] + 1):
while unborn and unborn[-1][0] == year:
insort(alive, -unborn.pop()[1])
while alive and alive[-1] == -(year - 1):
dead.append(-alive.pop())
years[year] = len(alive)
return max(years, key=years.get) if single else \
[key for key, val in years.iteritems() if val == max(years.values())]

We can also use numpy slicing, which is quite neat, and should also be quite efficient:
import numpy as np
from collections import namedtuple
Person = namedtuple('Person', ('birth', 'death'))
people = [Person(1900,2000), Person(1950,1960), Person(1955, 1959)]
START_YEAR = 1900
END_YEAR = 2000
people_alive = np.zeros(END_YEAR - START_YEAR + 1) # Alive each year
for p in people:
a = p.birth - START_YEAR
b = p.death - START_YEAR + 1 # include year of death
people_alive[a:b] += 1
# Find indexes of maximum aliveness and convert to year
most_alive = np.flatnonzero(people_alive == people_alive.max()) + START_YEAR
EDIT It seems like the namedtuple adds a bit of overhead, so to speed up a bit more, remove the namedtuple and do
for birth, death in people: instead.

Just put the birth and death years into a dict. If it is birth, increase the value by 1. or vice versa.
Sort the dict by keys and iterate by reading the current number of the alive people.
Follow the 'maxAlive' an 'theYear' to get the first year with the highest number
years = {}
for p in people:
if p.birth in years:
years[p.birth] += 1
else:
years[p.birth] = 1
if p.death in years:
years[p.death] -= 1
else:
years[p.death] = -1
alive = 0
maxAlive = 0
theYear = people[0].birth
for year in sorted(years):
alive += years[year]
if alive > maxAlive:
maxAlive = alive
theYear = year

Without importing anything, and using a class for readability, here's my solution. Let me know what you think! I also made a separate function for getMaxBirthYear in case you're at an interview and someone wants you to code that out rather than using built in functions (I used them :) )
class Person:
def __init__(self, birth=None, death=None):
self.birth=birth
self.death=death
def getPopulationPeak(people):
maxBirthYear = getMaxBirthYear(people)
deltas = getDeltas(people, maxBirthYear)
currentSum = 0
maxSum = 0
maxYear = 0
for year in sorted(deltas.keys()):
currentSum += deltas[year]
if currentSum > maxSum:
maxSum = currentSum
maxYear = year
return maxYear, maxSum
def getMaxBirthYear(people):
return max(people, key=lambda x: x.birth).birth
def getDeltas(people, maxBirthYear):
deltas = dict()
for person in people:
if person.birth in deltas.keys():
deltas[person.birth] += 1
else:
deltas[person.birth] = 1
if person.death + 1 in deltas.keys():
deltas[person.death + 1] -= 1
elif person.death + 1 not in deltas.keys() and person.death <= maxBirthYear: # We can skip deaths after the last birth year
deltas[person.death + 1] = -1
return deltas
testPeople = [
Person(1750,1802),
Person(2000,2010),
Person(1645,1760),
Person(1985,2002),
Person(2000,2050),
Person(2005,2080),
]
print(getPopulationPeak(testPeople))

How about this one:
def max_pop(pop):
p = 0; max = (0,0)
for y,i in sorted(chain.from_iterable([((b,1), (d+1,-1)) for b,d in pop])):
p += i
if p > max[1]: max=(y,p)
return max
It's not affected by the year span but is nlogn in the |pop| (unless you'd roll out a radix sort which would be ~ 10n for a thousand year span and should be faster for |pop|>1000 ). Can't have both. A very general solution would have to scan first and decide which algo to use based on measured year span and |pop|.

my answer
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class AlogrimVarsta {
public static void main(String args[]) {
int startYear = 1890;
int stopYear = 2000;
List<Person> listPerson = new LinkedList<>();
listPerson.add(new Person(1910, 1940));
listPerson.add(new Person(1920, 1935));
listPerson.add(new Person(1900, 1950));
listPerson.add(new Person(1890, 1920));
listPerson.add(new Person(1890, 2000));
listPerson.add(new Person(1945, 2000));
Map<Integer, Integer> mapPersoaneCareAuTrait = new LinkedHashMap<>();
for (int x = startYear; x <= stopYear; x++) {
mapPersoaneCareAuTrait.put(x, 0);
}
for (int x = startYear; x <= stopYear; x++) {
for (Person per : listPerson) {
int value = mapPersoaneCareAuTrait.get(x);
if (per.getBorn() == x) {
mapPersoaneCareAuTrait.put(x, value + 1);
continue;
}
if (per.getDie() == x) {
mapPersoaneCareAuTrait.put(x, value + 1);
continue;
}
if ((per.getDie() - per.getBorn() > per.getDie() - x) && (per.getDie() - x > 0)) {
mapPersoaneCareAuTrait.put(x, value + 1);
continue;
}
}
}
for (Map.Entry<Integer, Integer> mapEntry : mapPersoaneCareAuTrait.entrySet()) {
System.out.println("an " + mapEntry.getKey() + " numar " + mapEntry.getValue());
}
}
static class Person {
final private int born;
final private int die;
public Person(int pBorn, int pDie) {
die = pDie;
born = pBorn;
}
public int getBorn() {
return born;
}
public int getDie() {
return die;
}
}
}

l = [(1920, 1939), (1911, 1944),(1920, 1955), (1938, 1939)]
union = set({})
for i in l:
temp = set(range(min(i), max(i) + 1))
if len(union) == 0:
union = temp
union = temp & union
print(union)

I came over the following code that is exactly what you need.
Let's say the range of years is 1900 - 2000
Steps of the algorithm
Construct an array X of 100 integers (all initialized to zero; 101 integers if the year 2000 is included).
For each of the N people, increment X[birth year - 1900] by one and decrement X[death year - 1900] by one.
Iterate through X, maintaining a sum of each element as you go. The year with the most people alive is 1900 plus the index where the sum is maximum.
Code (Python as requested)
def year_with_max_population(people):
population_changes = [0 for _ in xrange(1900, 2000)]
for person in people:
population_changes[person.birth_year - 1900] += 1
population_changes[person.death_year - 1900] -= 1
max_population = 0
max_population_index = 0
population = 0
for index, population_change in enumerate(population_changes):
population += population_change
if population > max_population:
max_population = population
max_population_index = index
return 1900 + max_population_index
credit 'Brian Schmitz' here

Related

How can I optimize a code that is using xarray for better performance?

I'm trying to extract climate data from various .nc files I have but the process is taking extremely long, I suspect it has something to do with the fact that I'm trying to extract the data for every day of June, July, August for the next 79 years. But I'm a novice programmer and I realize there might've been a few oversights by me (efficiency wise) that might've resulted in a slightly better performance.
This is the snippet
def calculateTemp(coords, year, model):
"""
takes in all coordinates of a line between two grid stations and the year
converts the year into date
takes average of temperature of each day of the month of June for each
coordinate and then takes average of all coordinates to find average temp
for that line for the month of June
"""
print(year)
# coords represents a list of different sets of coordinates between two grids
temp3 = 0 # sum of all temps of all coordinates
for i in range(0, len(coords)):
temp2 = 0
counter = 0
# this loop represents that the 15 years data is being extracted for
# each coordinate set and average of those 15 years is being taken
for p in range(0, 15):
temp1 = 0 # sum of all temps for one coordinate in all days of June, tuly, august
if year+ p < 100:
# this loop represents the months of jun, jul, aug
for j in range(6, 9):
# 30 days of each month
for k in range(1, 31):
if k < 10:
# this if-else makes a string of date
date = '20'+str(year+p)+'-0'+str(j)+'-0'+str(k)
else:
date = '20'+str(year+p)+'-0'+str(j)+'-'+str(k)
# there are 3 variants of the climate model
# for years upto 2040, between 2041-2070
# and between 2071 and 2099
# hence this if else block
if year+p < 41:
temp1 += model[0]['tasmax'].sel(
lon=coords[i][1], lat=coords[i][0], time=date, method='nearest').data[0]
elif year+p >= 41 and year+p <71:
temp1 += model[1]['tasmax'].sel(
lon=coords[i][1], lat=coords[i][0], time=date, method='nearest').data[0]
else:
temp1 += model[2]['tasmax'].sel(
lon=coords[i][1], lat=coords[i][0], time=date, method='nearest').data[0]
counter += 1
avg = temp1/(len(range(0,30))*len(range(6,9)))
temp2 += avg
temp3 += temp2/counter
Tamb = temp3/len(coords)
return Tamb
Is there anyway I can increase the performance of this code and optimize it?
I just replaced the innermost loops k in range(1,31)and j in range(6,9)into a dict comprehension to generate all the dates and corresponding value from your model. Then simply averaged these values for every value of p and then for every coord in coords.
Give this a shot. Dicts should make the processing faster. Also check if the averages are exactly how you are calculating them in your function.
def build_date(year,p,j,k):
return '20'+str(year+p)+'-0'+str(j)+'-0'+str(k) if k<10 else '20'+str(year+p)+'-0'+str(j)+'-'+str(k)
def calculateTemp(coords, year, model):
func2 = lambda x,date:model[x]['tasmax'].sel(lon=coords[i][1],
lat=coords[i][0],
time=date,
method='nearest').data[0]
print(year)
out = {}
for i in range(len(coords)):
inner = {}
for p in range(0,15):
if year + p < 100:
dates = {build_date(year,p,j,k):func2(0,build_date(year,p,j,k)) if year+p<41 \
else func2(1,build_date(year,p,j,k)) if (year+p >= 41 and year+p <71) \
else func2(2,build_date(year,p,j,k))
for j in range(6,9) \
for k in range(1,31) }
inner[p] = sum([v for k,v in dates.items()])/len(dates)
out[i] = inner
coord_averages = {k : sum(v.values())/len(v) for k,v in out.items() }
Tamb = sum([v for k,v in coord_averages.items()])/len(coord_averages)
return Tamb

How to optimize an O(N*M) to be O(n**2)?

I am trying to solve USACO's Milking Cows problem. The problem statement is here: https://train.usaco.org/usacoprob2?S=milk2&a=n3lMlotUxJ1
Given a series of intervals in the form of a 2d array, I have to find the longest interval and the longest interval in which no milking was occurring.
Ex. Given the array [[500,1200],[200,900],[100,1200]], the longest interval would be 1100 as there is continuous milking and the longest interval without milking would be 0 as there are no rest periods.
I have tried looking at whether utilizing a dictionary would decrease run times but I haven't had much success.
f = open('milk2.in', 'r')
w = open('milk2.out', 'w')
#getting the input
farmers = int(f.readline().strip())
schedule = []
for i in range(farmers):
schedule.append(f.readline().strip().split())
#schedule = data
minvalue = 0
maxvalue = 0
#getting the minimums and maximums of the data
for time in range(farmers):
schedule[time][0] = int(schedule[time][0])
schedule[time][1] = int(schedule[time][1])
if (minvalue == 0):
minvalue = schedule[time][0]
if (maxvalue == 0):
maxvalue = schedule[time][1]
minvalue = min(schedule[time][0], minvalue)
maxvalue = max(schedule[time][1], maxvalue)
filled_thistime = 0
filled_max = 0
empty_max = 0
empty_thistime = 0
#goes through all the possible items in between the minimum and the maximum
for point in range(minvalue, maxvalue):
isfilled = False
#goes through all the data for each point value in order to find the best values
for check in range(farmers):
if point >= schedule[check][0] and point < schedule[check][1]:
filled_thistime += 1
empty_thistime = 0
isfilled = True
break
if isfilled == False:
filled_thistime = 0
empty_thistime += 1
if (filled_max < filled_thistime) :
filled_max = filled_thistime
if (empty_max < empty_thistime) :
empty_max = empty_thistime
print(filled_max)
print(empty_max)
if (filled_max < filled_thistime):
filled_max = filled_thistime
w.write(str(filled_max) + " " + str(empty_max) + "\n")
f.close()
w.close()
The program works fine, but I need to decrease the time it takes to run.
A less pretty but more efficient approach would be to solve this like a free list, though it is a bit more tricky since the ranges can overlap. This method only requires looping through the input list a single time.
def insert(start, end):
for existing in times:
existing_start, existing_end = existing
# New time is a subset of existing time
if start >= existing_start and end <= existing_end:
return
# New time ends during existing time
elif end >= existing_start and end <= existing_end:
times.remove(existing)
return insert(start, existing_end)
# New time starts during existing time
elif start >= existing_start and start <= existing_end:
# existing[1] = max(existing_end, end)
times.remove(existing)
return insert(existing_start, end)
# New time is superset of existing time
elif start <= existing_start and end >= existing_end:
times.remove(existing)
return insert(start, end)
times.append([start, end])
data = [
[500,1200],
[200,900],
[100,1200]
]
times = [data[0]]
for start, end in data[1:]:
insert(start, end)
longest_milk = 0
longest_gap = 0
for i, time in enumerate(times):
duration = time[1] - time[0]
if duration > longest_milk:
longest_milk = duration
if i != len(times) - 1 and times[i+1][0] - times[i][1] > longest_gap:
longes_gap = times[i+1][0] - times[i][1]
print(longest_milk, longest_gap)
As stated in the comments, if the input is sorted, the complexity could be O(n), if that's not the case we need to sort it first and the complexity is O(nlog n):
lst = [ [300,1000],
[700,1200],
[1500,2100] ]
from itertools import groupby
longest_milking = 0
longest_idle = 0
l = sorted(lst, key=lambda k: k[0])
for v, g in groupby(zip(l[::1], l[1::1]), lambda k: k[1][0] <= k[0][1]):
l = [*g][0]
if v:
mn, mx = min(i[0] for i in l), max(i[1] for i in l)
if mx-mn > longest_milking:
longest_milking = mx-mn
else:
mx = max((i2[0] - i1[1] for i1, i2 in zip(l[::1], l[1::1])))
if mx > longest_idle:
longest_idle = mx
# corner case, N=1 (only one interval)
if len(lst) == 1:
longest_milking = lst[0][1] - lst[0][0]
print(longest_milking)
print(longest_idle)
Prints:
900
300
For input:
lst = [ [500,1200],
[200,900],
[100,1200] ]
Prints:
1100
0

Python: logic error in calculating a year (leap)

So I this code which is suppose to return a list with the closest leap year of a list of years.
For example: calling the function with [1995 1750 2018] should return
1996 1748 2016
Which it does for that set of numbers.
The problem I am having is that when a leap year is in the input for example 2008 it does not give me back the closest leap year to 2008. I get back 2008.
Any suggestions as to how I can modify the code to make it work?
code
def is_leap(year):
leap = False
if year % 4 == 0:
if year % 100 != 0 or year % 400 == 0:
leap = True
return leap
major_b = []
major_f = []
newLst = []
def year_forward(yearBounds):
for item in yearBounds:
counter = 0
while not is_leap(item):
item = item + 1
counter += 1
major_f.append([item, counter])
return major_f
def year_backward(yearBounds):
for item in yearBounds:
counter = 0
while not is_leap(item):
item = item - 1
counter -= 1
major_b.append([item,counter])
return major_b
def findLastLeapYears(yearBounds):
forward = year_forward(yearBounds)
backward = year_backward(yearBounds)
counter = 0
for item in forward:
if abs(item[1]) < abs(backward[counter][1]):
newLst.append (str(item[0]))
counter+=1
elif abs(item[1]) == abs(backward[counter][1]):
if item[0] < backward[counter][0]:
newLst.append (str(item[0]))
counter += 1
else:
newLst.append (str(backward[counter][0]))
counter += 1
else:
newLst.append (str(backward[counter][0]))
counter+=1
return newLst
I'd avoid trying to roll your own leap year detection code. Use calendar.isleap to determine whether a year is a leap year or not.
Then go in a loop, like this:
import calendar
def find_nearest_leap(year):
offset = 1
while True:
if calendar.isleap(year - offset):
return year - offset
if calendar.isleap(year + offset):
return year + offset
offset += 1
To find the list of nearest leap years for a list of values, do this:
nearest_leap_years = [find_nearest_leap(year) for year in years]
Where years is the list of years you are interested in.
I'm also assuming the nearest leap year isn't the year itself, which seems to be a constraint of the problem...

Creating histogram bins from Django queries

I'm trying to create bins with the count of prices to be used for a histogram.
I want the bins to be 0-1000, 1000-2000, 2000-3000 and so forth. If I just do group by I get way to many different bins.
The code I've written seems to end in a infinite loop (or at least the script is still running after an hour). I'm not sure how to do it correctly. Here is the code I wrote:
from itertools import zip_longest
def price_histogram(area_id, agency_id):
# Get prices and total count for competitors
query = HousePrice.objects.filter(area_id=area_id, cur_price__range=(1000,30000)).exclude(agency_id=agency_id)
count = query.values('cur_price').annotate(count=Count('cur_price')).order_by('cur_price')
total = query.count()
# Get prices and total count for selected agency
query_agency = HousePrice.objects.filter(area_id=area_id, agency_id=agency_id, cur_price__range=(1000,30000))
count_agency = query_agency.values('cur_price').annotate(count=Count('cur_price')).order_by('cur_price')
total_agency = query_agency.count()
# Make list for x and y values
x_comp = []
y_comp = []
x_agency = []
y_agency = []
bin_start = 0
bin_end = 1000
_count_comp = 0
_count_agency = 0
for row_comp, row_agency in zip_longest(count, count_agency, fillvalue={}):
while bin_start < int(row_comp['cur_price']) < bin_end:
_count_comp += row_comp['count']
_count_agency += row_agency.get('count', 0)
bin_start += 1000
bin_end += 1000
x_comp.append(str(bin_start) + "-" + str(bin_end) + " USD")
x_agency.append(str(bin_start) + "-" + str(bin_end) + " USD")
y_comp.append(_count_comp/total)
y_agency.append(_count_agency/total_agency)
return {'x_comp': x_comp, 'y_comp': y_comp, 'x_agency': x_agency, 'y_agency': y_agency}
I'm using Python 3.5 and Django 1.10.
I'm a little late, but maybe the django-pivot library does what you want.
from django_pivot.histogram import histogram
query = HousePrice.objects.filter(area_id=area_id, cur_price__range=(1000,30000)).exclude(agency_id=agency_id
hist = histogram(query, cur_price, bins=[1000:30000:1000])

Find longest sequence of 0's in the integer list

A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,0,0,0,0,0,-2,-3,-4,-5,0,0,0]
Return initial and ending index of longest sequence of 0's in the list.
As, longest sequence of 0's in above list is 0,0,0,0,0,0,0,0 so it should return 12,19 as starting and ending index.Please help with some one line python code.
I tried :
k = max(len(list(y)) for (c,y) in itertools.groupby(A) if c==0)
print(k)
which return 8 as the max length.
Now, how to find start and end index of longest sequence?
you can first use enumerate to zip the item with index,
and then itertools.groupby(list,operator.itemgetter(1)) to group by item,
filter only 0s using list(y) for (x,y) in list if x == 0,
and at last max(list, key=len) to get the longest sequence.
import itertools,operator
r = max((list(y) for (x,y) in itertools.groupby((enumerate(A)),operator.itemgetter(1)) if x == 0), key=len)
print(r[0][0]) # prints 12
print(r[-1][0]) # prints 19
You can try this:
A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,0,0,0,0,0,2,-3,-4,-5,0,0,0]
count = 0
prev = 0
indexend = 0
for i in range(0,len(A)):
if A[i] == 0:
count += 1
else:
if count > prev:
prev = count
indexend = i
count = 0
print("The longest sequence of 0's is "+str(prev))
print("index start at: "+ str(indexend-prev))
print("index ends at: "+ str(indexend-1))
Output:
The longest sequence of 0's ist 8
index start at: 12
index ends at: 19
A nice concise native python approach
target = 0
A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,0,0,0,0,0,2,-3,-4,-5,0,0,0]
def longest_seq(A, target):
""" input list of elements, and target element, return longest sequence of target """
cnt, max_val = 0, 0 # running count, and max count
for e in A:
cnt = cnt + 1 if e == target else 0 # add to or reset running count
max_val = max(cnt, max_val) # update max count
return max_val
Now that you have the length, find that k-length sequence of 0's in the original list. Expanding the stuff you'll eventually work into one line:
# k is given in your post
k_zeros = [0]*k
for i in range(len(A)-k):
if A[i:i+k] == k_zeros:
break
# i is the start index; i+k-1 is the end
Can you wrap this into a single statement now?
Ok, as one long disgusting line!
"-".join([sorted([list(y) for c,y in itertools.groupby([str(v)+"_"+str(i) for i,v in enumerate(A)], lambda x: x.split("_")[0]) if c[0] == '0'],key=len)[-1][a].split("_")[1] for a in [0,-1]])
It keeps track of indices by turning [1,2,0...] into ["1_0","2_1","0_2",..] and then doing some splitting and parsing.
Yes it's very ugly and you should go with one of the other answers but I wanted to share
This solution i submitted in Codility with 100 percent efficieny.
class Solution {
public int solution(int N) {
int i = 0;
int gap = 0;
`bool startZeroCount = false;
List<int> binaryArray = new List<int>();
while (N > 0)
{
binaryArray.Add(N % 2);
N = N / 2;
i++;
}
List<int> gapArr = new List<int>();
for (int j = i-1; j >= 0; j--)
{
if (binaryArray[j] == 1)
{
if(startZeroCount)
{
gapArr.Add(gap);
gap = 0;
}
startZeroCount = true;
}
else if(binaryArray[j] == 0)
{
if (startZeroCount)
gap++;
}
}
gapArr.Sort();
if (gapArr.Count != 0)
return gapArr[gapArr.Count - 1];
else return 0;enter code here
}
}
A = [1,2,0,0,3,4,5,-1,0,2,-1,-3,0,0,0,2,-3,-4,-5,0,0,0,0]
count = 0
prev = 0
indexend = 0
indexcount = 0
for i in range(0,len(A)):
if A[i] == 0:
count += 1
indexcount = i
else:
if count > prev:
prev = count
indexend = i
count = 0
if count > prev:
prev = count
indexend = indexcount
print("The longest sequence of 0's is "+str(prev))
print("index start at: "+ str(indexend-prev))
print("index ends at: "+ str(indexend-1))
To also consider if longest 0's sequecnces are at the end.
Output
The longest sequence of 0's is 4
index start at: 18
index ends at: 21
If you would like to completely avoid Python iteration you can do it with Numpy. E.g., for very long sequences, using for loops may be relatively slow. This method will use pre-compiled C for-loops under the hood. The disadvantage is that you have multiple for-loops here. Nonetheless, overall, below algorithm should be a speed gain on longer sequences.
import numpy as np
def longest_sequence(bool_array):
where_not_true = np.where(~bool_array)[0]
lengths_plus_1 = np.diff(np.hstack((-1,where_not_true,len(bool_array))))
index = np.cumsum(np.hstack((0,lengths_plus_1)))
start_in_lngth = np.argmax(lengths_plus_1)
start = index[ start_in_lngth]
length = lengths_plus_1[start_in_lngth] - 1
return start, length
t = np.array((0,1,0,1,1,1,0,0,1,1,0,1))
print(longest_sequence(t==0))
print(longest_sequence(t==1))
p = np.array((0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,1))
print(longest_sequence(p==0))
print(longest_sequence(p==1))

Categories

Resources