Split intervals longer than a threshold - python

I have a list of tuples, each defining an interval (start, end).
I would like to split the intervals which are longer than a certain threshold.
Example:
Initial list: segs = [(0,100),(120,140),(160,200)]
Threshold: 30
Desired output:
split_segs = [(0,30),(30,60),(60,90),(90,100),(120,140),(160,190),(190,200)]
I come up with this code.
thr = 30.
split_segs = []
for a,b in segs:
if b-a < thr:
split_segs.extend([(a,b)])
else:
n = int((b-a)/thr)
for i in range(n):
if b-(a + (i+1)*thr) < thr:
split_segs.extend([(a+(i+1)*thr, b)])
else:
split_segs.extend([(a+i*thr, a+(i+1)*thr)])
It works but looks very clumsy to me. Any better or more pythonic solution?

You can do this slightly more elegantly by extending with a range that has a step of threshold:
segs = [(0,100),(120,140),(160,200)]
threshold = 30
split_segs = []
for seg in segs:
(a, b) = seg
diff = b - a
if diff <= threshold:
split_segs.append(seg)
else:
split_segs.extend((n - threshold, n) for n in range(a + threshold, b + 1, threshold))
if diff % threshold:
# complete the gap
split_segs.append((b - diff % threshold, b))
print(split_segs)

This is a recursive solution for your problem:
segs = [(0,100),(120,140),(160,200)]
threshold = 30
def divide(to_divide):
divided = []
if to_divide[1] - to_divide[0] > threshold:
divided.append((to_divide[0], to_divide[0] + threshold))
divided.extend(divide((to_divide[0] + threshold, to_divide[1])))
return divided
else:
return [to_divide]
divided = [el for x in segs for el in divide(x)]
print(divided)
The output will be:
[(0, 30), (30, 60), (60, 90), (90, 100), (120, 140), (160, 190), (190, 200)]
UPDATE: if you prefere a non-recursive solution, this is a possible one:
segs = [(0,100),(120,140),(160,200)]
threshold = 30
def divide(to_divide):
divided = []
divided.extend((to_divide[0] + i * threshold, to_divide[0] + (i+1) * threshold) for i in range((to_divide[1] - to_divide[0]) // threshold))
if divided:
if divided[-1][1] != to_divide[1]:
divided.append((divided[-1][1], to_divide[1]))
else:
divided.append((to_divide[0], to_divide[1]))
return divided
divided = [el for x in segs for el in divide(x)]
print(divided)

Related

calculate the range and quartiles

[![Marks
Freq
0-10
5
10-20
13
20-30
20
30-40
32
40-50
60
I want to calculate quartile and range of above data please help using python also represent suitable plot using Matplotlib
[1]: https://i.stack.imgur.com/x0cNf.png
I used this formula to solve it
# Formula for finding "i"th quartile:
# Q_i = L + h/f (i*N/4 - c.f)
data = {
(0, 10): 5,
(10, 20): 13,
(20, 30): 20,
(30, 40): 32,
(40, 50): 60
}
i = 1 # Quartile you want to find
x = (i * sum(data.values())) / 4 # Precalculate i*N/4
c_f = [sum(list(data.values())[:n]) for n in range(1, len(data) + 1)] # Cumulative frequencies
# Calculate class which the quartile is in
# (L = lower, u = upper, f = frequency, c = cumulative frequency)
for ((L, u), f), c in zip(data.items(), c_f):
if c >= x:
break
h = u - L # Class size
C_f = c - f
Q_i = L + ((h/f) * (x - C_f))
print('Quartile ', i, ': ', Q_i, sep='')
Output: Quartile 1: 27.25

Compare value in a 2d array to nearby values

I'm looking for a way to compare each value in a 2D array to values surrounding it and returning which values are close to the value of interest (within a threshold).
The ways I've explored involve iterating through each element of a 2D array, but I feel this is not the fastest or optimal way to do it.
The input would be a 2D array (size: i x j), and the output would be two 3D arrays (k x i x j) where the "extra" dimension is there to store the i and j indices of the nearby elements that are within a threshold.
Some code to illustrate what I am doing at the moment:
import numpy as np
from tqdm import tqdm
np.random.seed(seed=10)
arr = np.random.random((100, 100)) # Some 2D input array
threshold = 0.5
# Arrays for the row and col indices
i_all, j_all = np.mgrid[0:arr.shape[0],
0:arr.shape[1]]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint = (3, 3)
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
for p, element in enumerate(tqdm(arr.flatten())): # Iterate through each element
i, j = np.unravel_index(p, arr.shape)
# Create mask of elements to compare to
mask = ((i_all >= (i - (footprint[0] - 1) / 2)) &
(i_all <= (i + (footprint[0] - 1) / 2)) &
(j_all >= (j - (footprint[1] - 1) / 2)) &
(j_all <= (j + (footprint[1] - 1) / 2)))
# Create mask of those within the threshold
close_mask = abs(arr[mask] - element) <= threshold
if np.nansum(close_mask) < np.product(footprint): # If at edges need to pad to be able to index into output arrays
output_i[:, i, j] = np.pad(i_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
output_j[:, i, j] = np.pad(j_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
else: # Don't need to pad here
output_i[:, i, j] = i_all[mask][close_mask]
output_j[:, i, j] = j_all[mask][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
Which works fine for small arrays but is very slow when arrays have ~10^6 elements. The other idea I had was sliding the array over itself to compare values. This might be faster but I'm curious if there are any other ideas or built-in functions that can do a similar thing.
I do not know where, but I am prette sure your method has some bug. When you look at results, last (100x100) subarrays have all indices present.
What I wrote gives results that look better, is ~1000x faster, but still requires some testing from you. I might have made some error.
def faster_method(arr, threshold, footprint):
temp_arr = np.full((arr.shape[0] + footprint[0] - 1, arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2: footprint[0] // 2 + arr.shape[0],
footprint[1] // 2: footprint[1] // 2 + arr.shape[1]] = arr
temp_i_all, temp_j_all = np.mgrid[-(footprint[0] // 2): arr.shape[0] + footprint[0] // 2,
-(footprint[1] // 2): arr.shape[1] + footprint[1] // 2]
footprint_size = np.product(footprint)
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_idx = 0
for neighbour_vertical_position in range(footprint[0]):
for neighbour_horizontal_position in range(footprint[0]):
if neighbour_vertical_position == footprint[0] // 2 and neighbour_horizontal_position == footprint[1] // 2:
# center point, not a neighbour, so we can keep np.nan for it everywhere
continue
current_neighbour = temp_arr[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_i_all = temp_i_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_j_all = temp_j_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
is_close_array = np.abs(arr - current_neighbour) > threshold
output_i[output_idx] = current_i_all + 0 / is_close_array
output_j[output_idx] = current_j_all + 0 / is_close_array
return output_i, output_j
Using dankal444's answer I managed to get this working:
def slidingCompare(arr, footprint=(3, 3), threshold=0.5):
"""
arr: 2D array | input
footprint: tuple | search window dimensions (must be odd)
threshold: float | Threshold for neighbours to be close
"""
import numpy as np
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
temp_arr = np.full((arr.shape[0] + footprint[0] - 1,
arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2:footprint[0] // 2 + arr.shape[0],
footprint[1] // 2:footprint[1] // 2 + arr.shape[1]] = arr
# Arrays for the row and col indices
i_all, j_all = np.mgrid[-(footprint[0] // 2):arr.shape[0]+(footprint[0] // 2),
-(footprint[1] // 2):arr.shape[1]+(footprint[1] // 2)]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_ix = np.arange(footprint_size).reshape(footprint)
for vert_pos in np.arange(footprint[0]):
for horiz_pos in np.arange(footprint[1]):
neighbour = temp_arr[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]]
close_mask = abs(arr - neighbour) <= threshold
output_i[output_ix[vert_pos, horiz_pos], close_mask] = i_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
output_j[output_ix[vert_pos, horiz_pos], close_mask] = j_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
return output_i, output_j

How many boxes can we put to the big one?

I'm trying to work on simple algorithms.
I have 600 snacks and I have two kind of boxes 45 snacks inside and 60 snacks. We need to receive all the amount of options that we can do with this small boxes
I have this kind of code, but some how it doesn't work in a right way.
k = 0
for x in range(0,601):
for y in range(0, int(600 // 45) + 1):
for z in range(0, int(600 // 60) +1):
if x +45 * y + 45 * z == 600:
print(x,'45=',y,'60=',z)
k=k+1
print(k)
If I got you right it is simple math. You have 600 items and want use these 600 items in boxes size of 45 and size of 60. I don’t know what you use x for?
k=0
for y in range(0,20):
for z in range(0,20):
if 45 * y + 60 * z == 600
print('45=',y,'60=',z)
k = k + 1
print(k)
Result will be:
45= 0 60= 10
45= 4 60= 7
45= 8 60= 4
45= 12 60= 1
4
At a first glance, z seems to represent the box that can hold 60 snacks. So the line of code if x +45 * y + 45 * z == 600: does not seem ok. The multiplication factor for z should be 60, i.e., if x +45 * y + 60 * z == 600:
The answer is (EDIT: the solution is rewritten as functions):
Both functions return list of tuples with found combinations.
Both functions iterate only throw one box size and filter by second one.
The length of the list that return both functions is the amount of options
def box_comb(snacks, boxA, boxB):
res = []
for a in range(snacks // boxA + 1): # Iterate by boxA
free_space = snacks - boxA * a
if free_space % boxB == 0: # Filter by boxB
b = free_space // boxB # Calculate the count of boxB
res.append((a, b))
return res
# Try this
comb = box_comb(snacks=600,
boxA=60,
boxB=45)
print(comb)
print(f"Number of combinations = {len(comb)}")
The output:
[(1, 12), (4, 8), (7, 4), (10, 0)]
Number of combinations = 4
Single line solution:
The same algorithm written as single line solution
def box_comb_2(snacks, boxA, boxB):
return [(a, (snacks - a * boxA) // boxB) for a in range(snacks // boxA + 1) \
if (snacks - a * boxA) % boxB == 0]
# try this
comb = box_comb_2(snacks=600,
boxA=60,
boxB=45)
print(comb)
print(f"Number of combinations = {len(comb)}")
The output is
[(1, 12), (4, 8), (7, 4), (10, 0)]
Number of combinations = 4

Check if a set of points described a triangle

I tried to solve this question but couldn't find a simple solution without passing all rows and find which numbers are on the same line.
Is there a simple way to find triangles?
this is my solution for finding a triangle:
How can I change it to be more "pythonic"? (or even better method for solving it)
from sympy.solvers import solve
from sympy import Symbol
from collections import Counter
vals = [8,17,19] # the triangle
dicl = [] #list of dics
for v in vals:
dic = {}
dic['val'] = v
v1 = v
done = 0
stepsb = 0
while done == 0: #going backword untill reaching the big triabgle edges
x = Symbol('x')
k = solve((x**2 + x)/2 +1 - v1, x)
k = list(filter(lambda x:x>0, k))
if k[0]%1 == 0:
done = 1
else:
v1 -= 1
stepsb += 1
dic['line'] = k[0]
dic['stepsb'] = stepsb #dist from the left edge
dic['stepsf'] = (k[0]**2 + 3*k[0] + 2)/2 - v #dist from the right edge
dicl.append(dic)
print(dic)
lines = [l['line'] for l in dicl]
mc = Counter(lines).most_common(1)[0][0] #finding the numbers on the same line
minv = min([l['val'] for l in dicl if l['line'] == mc])
maxv = max([l['val'] for l in dicl if l['line'] == mc])
stb = [l['stepsb'] for l in dicl if l['val'] == minv][0]
stf = [l['stepsf'] for l in dicl if l['val'] == maxv][0]
for k in dicl:
if k['stepsb'] == stb and k['stepsf'] == stf:
print("good")
break
A first step could be to search for a formula that translates the one-dimensional point number t to an x,y coordinate.
So, search for an n such that n*(n+1)/2 < t:
from sympy import solve, Eq
from sympy.abc import n, t
f = Eq(n * (n + 1), 2 * t)
print(solve(f, n))
This shows as positive root: (sqrt(8*t + 1) - 1)/2.
To be strict smaller, a formula that copes with small approximation errors, could be:
floor((sqrt(8*t + 1) - 1)/2 - 0.0000001
The following idea is, given a list of indices:
convert them to xy coordinates
find their center (sum and divide by the length of the list)
find the distances of each xy to the center
check that all distances are equal
To convert to an xy position, note that the height of an equilateral triangle with base 1 is sqrt(3)/2, so the distances between the y-positions should be multiplied by that factor. The x-positions need to be centered which can be achieved by subtracting n/2.
import math
def find_xy(t):
# convert the numerical position into an xy coordinate in the plane
# first find largest n such that n*(n+1)/2 < t
n = math.floor((math.sqrt(8 * t + 1) - 1) / 2 - 0.0000001)
return (n + 1) * math.sqrt(3) / 2, t - n * (n + 1) // 2 - n/2
def sq_dist(p, q):
return (p[0] - q[0]) ** 2 + (p[1] - q[1]) ** 2
def center(points):
# find the center of a list of points
l = len(points)
x = sum(p[0] for p in points)
y = sum(p[1] for p in points)
return x / l, y / l
def is_regular(tri_points):
points = [find_xy(t) for t in tri_points]
cent = center(points)
dists = [sq_dist(cent, p) for p in points]
return max(dists) - min(dists) < 0.000001
Note that this code finds geometric figures for which all the points lie on a circle. This doesn't work for the parallelogram. The actual question also has some extra criteria: all edges should follow the grid lines, and all edges need to be equal in length.
Therefore, it is useful to have 3 coordinates for each point: the row, the column and the diagonal (the 3 directions of the grid).
The length in each direction, is just the maximum minus the minimum for that direction. These lengths are called d_r, d_c and d_d in the code below.
Checking for a valid triangle, the 3 lengths need to be equal. One way to check this, is to check that the minimum of the lengths is equal to the maximum.
For a valid parallelogram, two lengths need to be equal, and the third should be the double. Checking that the maximum length is twice the minimum length should cover this. But, because this can already be reached using 3 points, we should also check that for a given direction, there are exactly 2 points at the minimum and 2 at the maximum. Summing all points and comparing twice the sum of maximum and minimum should accomplish this.
For a valid hexagon, the 3 lengths should be equal. So, the same test as for the triangle: the minimum of the lengths equal to the maximum. And also the test on the sums is needed, as 4 points can already fulfil the length conditions.
import math
def find_row_col_diag(t):
# convert the numerical position into an row,col,diag coordinate in the plane
# first find largest n such that n*(n+1)/2 < t
n = math.floor((math.sqrt(8 * t + 1) - 1) / 2 - 0.0000001)
row, col = n + 1, t - n * (n + 1) // 2
return row, col, row - col
def check_valid_figure(tri_points):
points = [find_row_col_diag(t) for t in tri_points]
rs = [r for (r, c, d) in points]
cs = [c for (r, c, d) in points]
ds = [d for (r, c, d) in points]
sum_r = sum(rs)
min_r = min(rs)
max_r = max(rs)
d_r = max_r - min_r
sum_c = sum(cs)
min_c = min(cs)
max_c = max(cs)
d_c = max_c - min_c
sum_d = sum(ds)
min_d = min(ds)
max_d = max(ds)
d_d = max_d - min_d
if len(points) == 3:
is_ok = max(d_r, d_c, d_d) == min(d_r, d_c, d_d)
elif len(points) == 4:
is_ok = max(d_r, d_c, d_d) == 2 * min(d_r, d_c, d_d) \
and sum_r == 2 * (min_r + max_r) and sum_c == 2 * (min_c + max_c) and sum_d == 2 * (min_d + max_d)
elif len(points) == 6:
is_ok = max(d_r, d_c, d_d) == min(d_r, d_c, d_d) \
and len(set(rs)) == 3 and len(set(cs)) == 3 and len(set(ds)) == 3
else:
is_ok = False
print(" ".join([str(t) for t in tri_points]), end=" ")
if is_ok:
print("are the vertices of a",
"triangle" if len(points) == 3 else "parallelogram" if len(points) == 4 else "hexagon")
else:
print("are not the vertices of an acceptable figure")
tri_point_lists = [[1, 2, 3],
[11, 13, 22, 24],
[11, 13, 29, 31],
[11, 13, 23, 25],
[26, 11, 13, 24],
[22, 23, 30],
[4, 5, 9, 13, 12, 7]]
for lst in tri_point_lists:
check_valid_figure(lst)
The last code can be further compressed using list comprehensions:
def check_valid_figure_bis(tri_points):
points = [find_row_col_diag(t) for t in tri_points]
rs, cs, ds = [[p[i] for p in points] for i in range(3)]
sums = [sum(xs) for xs in (rs, cs, ds)]
mins = [min(xs) for xs in (rs, cs, ds)]
maxs = [max(xs) for xs in (rs, cs, ds)]
lens = [ma - mi for mi, ma in zip(mins, maxs)]
if len(points) == 3:
is_ok = max(lens) == min(lens)
elif len(points) == 4:
is_ok = max(lens) == 2 * min(lens) and all([su == 2 * (mi + ma) for su, mi, ma in zip(sums, mins, maxs)])
elif len(points) == 6:
is_ok = max(lens) == min(lens) and all([len(set(xs)) == 3 for xs in (rs, cs, ds)])
else:
is_ok = False
return is_ok

determine mean zero crossing

using numpy I have extracted the zero crossings of a signal.
Unfortunately the source of the data is noisy and thus there are multiple zero crossings.
If I filter the data before checking for zero crossings, aspects of the filter (gain-phase margin) will need to be justified while averaging the zero crossing points is slightly easier to justify
[123,125,127,1045,1049,1050,2147,2147,2151,2155]
consider the above list. what would be an appropriate way to create:
[125, 1048, 2149]
The aim is to find the phase shift between two sine waves
This code takes a simplistic approach of looking for a gap THRESHOLD between the transitions - exceeding this marks the end of a signal transition.
xings = [123,125,127,1045,1049,1050,2147,2147,2151,2155]
THRESHOLD = 100
xlast = -1000000
tot = 0
n = 0
results = []
i = 0
while i < len(xings):
x = xings[i]
if x-xlast > THRESHOLD:
# emit a transition, averaged for the
if n > 0:
results.append(tot/n)
tot = 0
n = 0
tot += x
n += 1
xlast = x
i += 1
if n > 0:
results.append(tot/n)
print results
prints:
[125, 1048, 2150]
I was hoping for a more elegant solution to just iterating over the list of zero crossings, but it seems that is the only solution.
I settled on:
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
with example code of it in-use:
#!/usr/bin/env python
import numpy as np
from matplotlib import pyplot as plt
dt = 5e-6
TOL = 50
class DCfilt():
def __init__(self,dt,freq):
self.alpha = dt/(dt + 1/(2*np.pi*freq))
self.y = [0,0]
def step(self,x):
y = self.y[-1] + self.alpha*(x - self.y[-1])
self.y[-1] = y
return y
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
t = np.arange(0,2,dt)
print(t.size)
rng = (np.random.random_sample(t.size) - 0.5)*0.1
s = 10*np.sin(2*np.pi*t*10 + np.pi/12)+rng
c = 10*np.cos(2*np.pi*t*10)+rng
filt_s = DCfilt(dt,16000)
filt_s.y[-1] =s[0]
filt_c = DCfilt(dt,1600)
filt_c.y[-1] =c[0]
# filter the RAW data first
for i in range(s.size):
s[i] = filt_s.step(s[i])
c[i] = filt_c.step(c[i])
# determine the zero crossings
s_z = np.where(np.diff(np.sign(s)))[0]
c_z = np.where(np.diff(np.sign(c)))[0]
sin_zc = zero_crossing_avg( np.where(np.diff(np.sign(s)))[0] )
cos_zc = zero_crossing_avg( np.where(np.diff(np.sign(c)))[0] )
HALF_PERIOD = (sin_zc[1] - sin_zc[0])
for i in range([len(sin_zc),len(cos_zc)][len(sin_zc) > len(cos_zc)]):
delta = abs(cos_zc[i]-sin_zc[i])
print(90 - (delta/HALF_PERIOD)*180)
plt.hold(True)
plt.grid(True)
plt.plot(s)
plt.plot(c)
plt.show()
This works well enough.

Categories

Resources