Make box-counting code faster using Python

Make box-counting code faster using Python - python

The code below is one of the features on the audio file. Since I segmented one audio file to 4096 samples so I must call boxcounting function 4096 times to get the output for each segmented file. This code I wrote for one segmented file and called from the main python file. It takes around 10 seconds/segmented file if full audio is short and takes 30 seconds/segmented file if full audio is around 3-4 minutes. My problem is it takes a very long time to run one audio file.
get an array of from audio and separate them to 2 mono array (Left channel and Right channel)
normalize values and multiply array with 20 for scaling up
round numbers to one decimal place
pair them (L, R) by using zip()
remove the duplicate value
count coordinate pair in each small box
count boxes that have value (Final output)
This is my example
import numpy as np
from pydub import AudioSegment
from collections import OrderedDict
def difference(a, b):
if (a > 0) and (b > 0):
return (abs(a - b))
elif (a > 0) and (b < 0):
return abs(a + abs(b))
elif (a < 0) and (b < 0):
return (abs(a) - abs(b))
def boxcounting(left_channel, right_channel, scale):
ratioX = difference(max(left_channel), min(left_channel))/scale
ratioY = difference(max(right_channel), min(right_channel))/scale
startX = min(left_channel)
count_per_scale = []
countbox = 0
pair = list(OrderedDict.fromkeys(list(zip(left_channel, right_channel))))
for x in range(scale):
print('startX',startX)
startY = min(right_channel)
endX = startX + ratioX
if x == (scale-1):
endX = max(left_channel)
print('endX',endX)
for y in range(scale):
print('-----------------------')
print('startY',startY)
endY = startY + ratioY
if y == (scale-1):
endY = max(right_channel)
print('endY',endY)
count = 0 # reset
for l,r in pair:
if (startX < l <= endX):
if (startY < r <= endY):
count+=1
print('0',l,r)
print('count',count)
elif (min(right_channel) == r and r == startY):
count+=1
print('1',l,r)
print('count',count)
elif (min(left_channel) == l and l == startX):
if (startY < r <= endY):
count+=1
print('2',l,r)
print('count',count)
elif (min(right_channel) == r and r == startY):
count+=1
print('3',l,r)
print('count',count)
count_per_scale.append(count)
if count != 0:
countbox += 1
startY = endY
startX = endX
print('===============================')
print(count_per_scale)
countbox = 0
for i in count_per_scale:
if(i > 0):
countbox += 1
countbox = np.count_nonzero(count_per_scale)
print('No. of box that has value =', countbox)
return countbox
sound = AudioSegment.from_file('Alpharock - Pump This Party.mp3')
split_sound = sound.split_to_mono()
left_channel = np.array(split_sound[0].get_array_of_samples())
right_channel = np.array(split_sound[1].get_array_of_samples())
scale = 10 #norm and scale up
scaleupL = np.round((left_channel/np.abs(left_channel).max())* scale,1)
scaleupR = np.round((right_channel/np.abs(right_channel).max())* scale,1)
Can anyone help me to make it faster? Thank you very much.

Related

Check if a set of points described a triangle

I tried to solve this question but couldn't find a simple solution without passing all rows and find which numbers are on the same line.
Is there a simple way to find triangles?
this is my solution for finding a triangle:
How can I change it to be more "pythonic"? (or even better method for solving it)
from sympy.solvers import solve
from sympy import Symbol
from collections import Counter
vals = [8,17,19] # the triangle
dicl = [] #list of dics
for v in vals:
dic = {}
dic['val'] = v
v1 = v
done = 0
stepsb = 0
while done == 0: #going backword untill reaching the big triabgle edges
x = Symbol('x')
k = solve((x**2 + x)/2 +1 - v1, x)
k = list(filter(lambda x:x>0, k))
if k[0]%1 == 0:
done = 1
else:
v1 -= 1
stepsb += 1
dic['line'] = k[0]
dic['stepsb'] = stepsb #dist from the left edge
dic['stepsf'] = (k[0]**2 + 3*k[0] + 2)/2 - v #dist from the right edge
dicl.append(dic)
print(dic)
lines = [l['line'] for l in dicl]
mc = Counter(lines).most_common(1)[0][0] #finding the numbers on the same line
minv = min([l['val'] for l in dicl if l['line'] == mc])
maxv = max([l['val'] for l in dicl if l['line'] == mc])
stb = [l['stepsb'] for l in dicl if l['val'] == minv][0]
stf = [l['stepsf'] for l in dicl if l['val'] == maxv][0]
for k in dicl:
if k['stepsb'] == stb and k['stepsf'] == stf:
print("good")
break

A first step could be to search for a formula that translates the one-dimensional point number t to an x,y coordinate.
So, search for an n such that n*(n+1)/2 < t:
from sympy import solve, Eq
from sympy.abc import n, t
f = Eq(n * (n + 1), 2 * t)
print(solve(f, n))
This shows as positive root: (sqrt(8*t + 1) - 1)/2.
To be strict smaller, a formula that copes with small approximation errors, could be:
floor((sqrt(8*t + 1) - 1)/2 - 0.0000001
The following idea is, given a list of indices:
convert them to xy coordinates
find their center (sum and divide by the length of the list)
find the distances of each xy to the center
check that all distances are equal
To convert to an xy position, note that the height of an equilateral triangle with base 1 is sqrt(3)/2, so the distances between the y-positions should be multiplied by that factor. The x-positions need to be centered which can be achieved by subtracting n/2.
import math
def find_xy(t):
# convert the numerical position into an xy coordinate in the plane
# first find largest n such that n*(n+1)/2 < t
n = math.floor((math.sqrt(8 * t + 1) - 1) / 2 - 0.0000001)
return (n + 1) * math.sqrt(3) / 2, t - n * (n + 1) // 2 - n/2
def sq_dist(p, q):
return (p[0] - q[0]) ** 2 + (p[1] - q[1]) ** 2
def center(points):
# find the center of a list of points
l = len(points)
x = sum(p[0] for p in points)
y = sum(p[1] for p in points)
return x / l, y / l
def is_regular(tri_points):
points = [find_xy(t) for t in tri_points]
cent = center(points)
dists = [sq_dist(cent, p) for p in points]
return max(dists) - min(dists) < 0.000001
Note that this code finds geometric figures for which all the points lie on a circle. This doesn't work for the parallelogram. The actual question also has some extra criteria: all edges should follow the grid lines, and all edges need to be equal in length.
Therefore, it is useful to have 3 coordinates for each point: the row, the column and the diagonal (the 3 directions of the grid).
The length in each direction, is just the maximum minus the minimum for that direction. These lengths are called d_r, d_c and d_d in the code below.
Checking for a valid triangle, the 3 lengths need to be equal. One way to check this, is to check that the minimum of the lengths is equal to the maximum.
For a valid parallelogram, two lengths need to be equal, and the third should be the double. Checking that the maximum length is twice the minimum length should cover this. But, because this can already be reached using 3 points, we should also check that for a given direction, there are exactly 2 points at the minimum and 2 at the maximum. Summing all points and comparing twice the sum of maximum and minimum should accomplish this.
For a valid hexagon, the 3 lengths should be equal. So, the same test as for the triangle: the minimum of the lengths equal to the maximum. And also the test on the sums is needed, as 4 points can already fulfil the length conditions.
import math
def find_row_col_diag(t):
# convert the numerical position into an row,col,diag coordinate in the plane
# first find largest n such that n*(n+1)/2 < t
n = math.floor((math.sqrt(8 * t + 1) - 1) / 2 - 0.0000001)
row, col = n + 1, t - n * (n + 1) // 2
return row, col, row - col
def check_valid_figure(tri_points):
points = [find_row_col_diag(t) for t in tri_points]
rs = [r for (r, c, d) in points]
cs = [c for (r, c, d) in points]
ds = [d for (r, c, d) in points]
sum_r = sum(rs)
min_r = min(rs)
max_r = max(rs)
d_r = max_r - min_r
sum_c = sum(cs)
min_c = min(cs)
max_c = max(cs)
d_c = max_c - min_c
sum_d = sum(ds)
min_d = min(ds)
max_d = max(ds)
d_d = max_d - min_d
if len(points) == 3:
is_ok = max(d_r, d_c, d_d) == min(d_r, d_c, d_d)
elif len(points) == 4:
is_ok = max(d_r, d_c, d_d) == 2 * min(d_r, d_c, d_d) \
and sum_r == 2 * (min_r + max_r) and sum_c == 2 * (min_c + max_c) and sum_d == 2 * (min_d + max_d)
elif len(points) == 6:
is_ok = max(d_r, d_c, d_d) == min(d_r, d_c, d_d) \
and len(set(rs)) == 3 and len(set(cs)) == 3 and len(set(ds)) == 3
else:
is_ok = False
print(" ".join([str(t) for t in tri_points]), end=" ")
if is_ok:
print("are the vertices of a",
"triangle" if len(points) == 3 else "parallelogram" if len(points) == 4 else "hexagon")
else:
print("are not the vertices of an acceptable figure")
tri_point_lists = [[1, 2, 3],
[11, 13, 22, 24],
[11, 13, 29, 31],
[11, 13, 23, 25],
[26, 11, 13, 24],
[22, 23, 30],
[4, 5, 9, 13, 12, 7]]
for lst in tri_point_lists:
check_valid_figure(lst)
The last code can be further compressed using list comprehensions:
def check_valid_figure_bis(tri_points):
points = [find_row_col_diag(t) for t in tri_points]
rs, cs, ds = [[p[i] for p in points] for i in range(3)]
sums = [sum(xs) for xs in (rs, cs, ds)]
mins = [min(xs) for xs in (rs, cs, ds)]
maxs = [max(xs) for xs in (rs, cs, ds)]
lens = [ma - mi for mi, ma in zip(mins, maxs)]
if len(points) == 3:
is_ok = max(lens) == min(lens)
elif len(points) == 4:
is_ok = max(lens) == 2 * min(lens) and all([su == 2 * (mi + ma) for su, mi, ma in zip(sums, mins, maxs)])
elif len(points) == 6:
is_ok = max(lens) == min(lens) and all([len(set(xs)) == 3 for xs in (rs, cs, ds)])
else:
is_ok = False
return is_ok

Finding the union of multiple overlapping rectangles - OpenCV python

I have several overlapping bounding boxes that encompass a single object, however they overlap minimally in some places. Taken as a whole, they encompass the entire object, but openCV's groupRectangles function does not return a box encompassing the object. The bounding boxes I have are shown in blue, and bounding boxes I would like to return are shown in red here
I would like to get the union of only the overlapping rectangles but am unsure about how to iterate through the list without combining every rectangle.
I have union and intersect functions shown below, and a list of the rectangles represented by (x y w h), where x and y are the coordinates of the top left corner of the box.
def union(a,b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0]+a[2], b[0]+b[2]) - x
h = max(a[1]+a[3], b[1]+b[3]) - y
return (x, y, w, h)
def intersection(a,b):
x = max(a[0], b[0])
y = max(a[1], b[1])
w = min(a[0]+a[2], b[0]+b[2]) - x
h = min(a[1]+a[3], b[1]+b[3]) - y
if w<0 or h<0: return () # or (0,0,0,0) ?
return (x, y, w, h)
My function for combining is currently as follows:
def combine_boxes(boxes):
noIntersect = False
while noIntersect == False and len(boxes) > 1:
a = boxes[0]
print a
listBoxes = boxes[1:]
print listBoxes
index = 0
for b in listBoxes:
if intersection(a, b):
newBox = union(a,b)
listBoxes[index] = newBox
boxes = listBoxes
noIntersect = False
index = index + 1
break
noIntersect = True
index = index + 1
print boxes
return boxes.astype("int")
This gets most of the way there, as shown here
there are still a few nested bounding boxes that I'm not sure how to continue iterating through.

I haven't worked with openCV, so the object may need more mangling, but maybe use itertools.combinations to make the combine_boxes function simpler:
import itertools
import numpy as np
def combine_boxes(boxes):
new_array = []
for boxa, boxb in itertools.combinations(boxes, 2):
if intersection(boxa, boxb):
new_array.append(union(boxa, boxb))
else:
new_array.append(boxa)
return np.array(new_array).astype('int')
EDIT (you may actually need zip instead)
for boxa, boxb in zip(boxes, boxes[1:])
everything is the same.

Thank you, salparadise (https://stackoverflow.com/users/62138/salparadise). Very helpful to find a way out.
But the solution looks rectangles could be repeated added into the new_array. e.g. A B C has no intersection to each other, A B C will be added twice respectively. So the new_array will contain A B A C B C.
Please refer to the revised code. Hope it helps.
Had tested it on multiple test cases. It looks working fine.
def merge_recs(rects):
while (1):
found = 0
for ra, rb in itertools.combinations(rects, 2):
if intersection(ra, rb):
if ra in rects:
rects.remove(ra)
if rb in rects:
rects.remove(rb)
rects.append((union(ra, rb)))
found = 1
break
if found == 0:
break
return rects

I go into a similar situation to combine all the intersected rectangle found in each frame of my OpenCV project, after some time I finally come up with a solution and want to share it here for someone having a headache combining those rectangles. (This might not be the best solution but it's simple though)
import itertools
# my Rectangle = (x1, y1, x2, y2), a bit different from OP's x, y, w, h
def intersection(rectA, rectB): # check if rect A & B intersect
a, b = rectA, rectB
startX = max( min(a[0], a[2]), min(b[0], b[2]) )
startY = max( min(a[1], a[3]), min(b[1], b[3]) )
endX = min( max(a[0], a[2]), max(b[0], b[2]) )
endY = min( max(a[1], a[3]), max(b[1], b[3]) )
if startX < endX and startY < endY:
return True
else:
return False
def combineRect(rectA, rectB): # create bounding box for rect A & B
a, b = rectA, rectB
startX = min( a[0], b[0] )
startY = min( a[1], b[1] )
endX = max( a[2], b[2] )
endY = max( a[3], b[3] )
return (startX, startY, endX, endY)
def checkIntersectAndCombine(rects):
if rects is None:
return None
mainRects = rects
noIntersect = False
while noIntersect == False and len(mainRects) > 1:
mainRects = list(set(mainRects))
# get the unique list of rect, or the noIntersect will be
# always true if there are same rect in mainRects
newRectsArray = []
for rectA, rectB in itertools.combinations(mainRects, 2):
newRect = []
if intersection(rectA, rectB):
newRect = combineRect(rectA, rectB)
newRectsArray.append(newRect)
noIntersect = False
# delete the used rect from mainRects
if rectA in mainRects:
mainRects.remove(rectA)
if rectB in mainRects:
mainRects.remove(rectB)
if len(newRectsArray) == 0:
# if no newRect is created = no rect in mainRect intersect
noIntersect = True
else:
# loop again the combined rect and those remaining rect in mainRects
mainRects = mainRects + newRectsArray
return mainRects

It's horribly janky, but after a bit of finagling I did manage to get the results I wanted
I have included my combine_boxes function below in case anyone is having a similar problem.
def combine_boxes(boxes):
noIntersectLoop = False
noIntersectMain = False
posIndex = 0
# keep looping until we have completed a full pass over each rectangle
# and checked it does not overlap with any other rectangle
while noIntersectMain == False:
noIntersectMain = True
posIndex = 0
# start with the first rectangle in the list, once the first
# rectangle has been unioned with every other rectangle,
# repeat for the second until done
while posIndex < len(boxes):
noIntersectLoop = False
while noIntersectLoop == False and len(boxes) > 1:
a = boxes[posIndex]
listBoxes = np.delete(boxes, posIndex, 0)
index = 0
for b in listBoxes:
#if there is an intersection, the boxes overlap
if intersection(a, b):
newBox = union(a,b)
listBoxes[index] = newBox
boxes = listBoxes
noIntersectLoop = False
noIntersectMain = False
index = index + 1
break
noIntersectLoop = True
index = index + 1
posIndex = posIndex + 1
return boxes.astype("int")

The most voted answer will not work if you need a single maximum box, however the above one will work, but it has a bug.
posting the correct code for someone
tImageZone = namedtuple('tImageZone', 'x y w h')
def merge_zone(z1, z2):
if (z1.x == z2.x and z1.y == z2.y and z1.w == z2.w and z1.h == z2.h):
return z1
x = min(z1.x, z2.x)
y = min(z1.y, z2.y)
w = max(z1.x + z1.w, z2.x + z2.w) - x
h = max(z1.y + z1.h, z2.y + z2.h) - y
return tImageZone(x, y, w, h)
def is_zone_overlap(z1, z2):
# If one rectangle is on left side of other
if (z1.x > z2.x + z2.w or z1.x + z1.w < z2.x):
return False
# If one rectangle is above other
if (z1.y > z2.y + z2.h or z1.y + z1.h < z2.y):
return False
return True
def combine_zones(zones):
index = 0
if zones is None: return zones
while index < len(zones):
no_Over_Lap = False
while no_Over_Lap == False and len(zones) > 1 and index < len(zones):
zone1 = zones[index]
tmpZones = np.delete(zones, index, 0)
tmpZones = [tImageZone(*a) for a in tmpZones]
for i in range(0, len(tmpZones)):
zone2 = tmpZones[i]
if (is_zone_overlap(zone1, zone2)):
tmpZones[i] = merge_zone(zone1, zone2)
zones = tmpZones
no_Over_Lap = False
break
no_Over_Lap = True
index += 1
return zones

Python .append seems to run forever and the 'uniform' value seems not too random (making Poisson sphere distribution in Python)

I am now trying to calculate the poisson sphere distribution(a 3D version of the poisson disk) using python and then plug in the result to POV-RAY so that I can generate some random distributed packing rocks.
I am following these two links:
[https://github.com/CodingTrain/Rainbow-Code/blob/master/CodingChallenges/CC_33_poisson_disc/sketch.js#L13]
[https://www.cs.ubc.ca/~rbridson/docs/bridson-siggraph07-poissondisk.pdf]tl;dr
0.Create an n-dimensional grid array and cell size = r/sqrt(n) where r is the minimum distance between each sphere. All arrays are set to be default -1 which stands for 'without point'
1.Create an initial sample. (it should be placed randomly but I choose to put it in the middle). Put it in the grid array. Also, intialize an active array. Put the initial sample in the active array.
2.While the active list is not empty, pick a random index. Generate points near it and make sure the points are not overlapping with nearby points(only test with the nearby arrays). If no sample can be created near the 'random index', kick the 'random index' out. Loop the process.
And here is my code:
import math
import numpy
from random import uniform
import random
from math import floor
r = 1
k = 30
grid = []
w = r / math.sqrt(2)
active = []
width = 100
height = 100
depth = 100
cols = floor(width / w)
rows = floor(height / w)
deps = floor(depth / w)
default = numpy.array((-1,-1,-1))
for i in range(cols * rows * deps):
grid.append(default)
x = width / 2
y = height / 2
z = depth / 2
i = floor(x / w)
j = floor(y / w)
k = floor(z / w)
pos = numpy.array((x,y,z))
grid[i + cols * (j + rows * k)] = pos
active.append(pos)
while (len(active) > 0) and (len(grid[grid == -1]) > 0):
randIndex = floor(uniform(0, len(active)))
pos = active[randIndex]
found = False
for n in range(k):
m1 = uniform(-2 * r, 2 * r)
m2 = uniform(-2 * r, 2 * r)
m3 = uniform(-2 * r, 2 * r)
m = numpy.array((m1,m2,m3))
sample = numpy.add(pos, m)
col = floor(sample[0] / w)
row = floor(sample[1] / w)
dep = floor(sample[2] / w)
if (col > -1 and row > -1 and dep > -1 and col < cols and row < rows and dep < deps and numpy.all([grid[col + cols * (row + rows * dep)],default])==True):
ok = True
for i in range(-1,2):
for j in range(-1, 2):
for k in range(-1, 2):
index = (col + i) + cols * ((row + j) + rows * (dep + k))
if col + i > -1 and row + j > -1 and dep + k > -1 and col + i < cols and row + j < rows and dep + k < deps:
neighbor = grid[index]
if numpy.all([neighbor, default]) == False:
d = numpy.linalg.norm(sample - neighbor)
if (d < r):
ok = False
if ok == True:
found = True
grid[col + cols * (row + rows * dep)] = sample
active.append(sample)
if found == False:
del active[randIndex]
print(len(active))
for printout in range(len(grid)):
print("<" + str(active[printout][0]) + "," + str(active[printout][1]) + "," + str(active[printout][2]) + ">")
print(len(grid))
My code seems to run forever and do not obey my condition(distance of two spheres must be larger than 2 * radius) as shown in the visualization by POV-RAY.(picture in comment)
Therefore I tried to add a print(len(active)) in the last of the while loop.
Surprisingly, I think I discovered the bug as the length of the active list just keep increasing! (It is supposed to be the same length as the grid) I think the problem is caused by the active.append(), but I can't figure out where is the problem as the code is literally the 90% the same as the one made by Mr.Shiffman.
I don't want to free ride this but I have already checked again and again while correcting again and again for this code :(. Still, I don't know where the bug is. (why do the active[] keep appending!?)
Thank you for the precious time.

Increasing performance with octant search algorithm

I am working on an octant search to find the n-number(e.g. 8) of points (+) closest to my circular point (o) in each octant. This would mean that my points (+) are reduced to only 64 (8 per octant).
The first thing I did is to divide my region into octants with my point (o) as reference.
data = array containing (x, y, z) for all points (+)
gdata = array containing (x, y) for point (o)
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
from collections import defaultdict
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
data = pd.read_excel(file_path)
data = np.array(data, dtype=np.float)
nrow, cols = data.shape
file_path1 = filedialog.askopenfilename()
gdata = pd.read_excel(file_path1)
gdata = np.array(gdata, dtype=np.float)
pwangle = np.zeros(nrow)
for j in range(nrow):
delta_x = gdata[:,0]-data[:,0][j]
delta_y = gdata[:,1]-data[:,1][j]
if delta_x != 0:
pwangle[j] = np.rad2deg(np.arctan(delta_y/delta_x))
else:
if delta_y > 0:
pwangle[j] = 90
elif delta_y < 0:
pwangle[j] = 270
if (delta_x < 0)&(delta_y > 0):
pwangle[j] = 180 + pwangle[j]
elif (delta_x < 0)&(delta_y < 0):
pwangle[j] = 270 - pwangle[j]
elif (delta_x > 0)&(delta_y < 0):
pwangle[j] = 360 + pwangle[j]
vecangle = pwangle.ravel()
sortdata = defaultdict(list)
count = -1
get_anglesector = 45
N = 8
d = cdist(data[:,:2], gdata)
P = np.hstack((data, d))
for j in range(0, 360, get_anglesector):
count += 1
get_data = []
for k, dummy_val in enumerate(vecangle):
if j <= vecangle[k] < j + get_anglesector:
get_data.append(P[k,::])
sortdata[count] = np.array(get_data)
After data have been grouped into various octant, I then sort data in each octant to obtain the closest 8 data to the point (o).
for i, j in enumerate(sortdata):
octantsort = defaultdict(list)
for i in range(8):
octantsort[i] = np.array(sortdata[i][sortdata[i][:,3].argsort()[:N]])
Is there an efficient and pythonic way of doing this do increase performance?
This works fine but when i have more than one 'o' point (e.g. 10000 points 'o') and I have run the above code for each point, it would be time consuming.

The job gets a lot easier if you use arctan2 instead of arctan. Then vectorizing for speed we may get something like this:
import numpy as np
from scipy.spatial.distance import cdist
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
dist_order = np.argsort(cdist(data_i, gdata))
octantsort.append(data_i[dist_order[:N]])

Thank you #user7138814, apart from making some slight changes, your code is faster
N=8
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
dist_order = np.argsort(cdist(data_i[:,:2], gdata), axis=0)
[octantsort.append(data_i[dist_order[:N][j]]) for j in range(8)]
final = np.vstack(octantsort)
Time of execution of the previous code (code in the question):
---- 0.021449804306030273 seconds ------
Time of execution of the code in this post:
---- 0.0015172958374023438 seconds ------

determine mean zero crossing

using numpy I have extracted the zero crossings of a signal.
Unfortunately the source of the data is noisy and thus there are multiple zero crossings.
If I filter the data before checking for zero crossings, aspects of the filter (gain-phase margin) will need to be justified while averaging the zero crossing points is slightly easier to justify
[123,125,127,1045,1049,1050,2147,2147,2151,2155]
consider the above list. what would be an appropriate way to create:
[125, 1048, 2149]
The aim is to find the phase shift between two sine waves

This code takes a simplistic approach of looking for a gap THRESHOLD between the transitions - exceeding this marks the end of a signal transition.
xings = [123,125,127,1045,1049,1050,2147,2147,2151,2155]
THRESHOLD = 100
xlast = -1000000
tot = 0
n = 0
results = []
i = 0
while i < len(xings):
x = xings[i]
if x-xlast > THRESHOLD:
# emit a transition, averaged for the
if n > 0:
results.append(tot/n)
tot = 0
n = 0
tot += x
n += 1
xlast = x
i += 1
if n > 0:
results.append(tot/n)
print results
prints:
[125, 1048, 2150]

I was hoping for a more elegant solution to just iterating over the list of zero crossings, but it seems that is the only solution.
I settled on:
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
with example code of it in-use:
#!/usr/bin/env python
import numpy as np
from matplotlib import pyplot as plt
dt = 5e-6
TOL = 50
class DCfilt():
def __init__(self,dt,freq):
self.alpha = dt/(dt + 1/(2*np.pi*freq))
self.y = [0,0]
def step(self,x):
y = self.y[-1] + self.alpha*(x - self.y[-1])
self.y[-1] = y
return y
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
t = np.arange(0,2,dt)
print(t.size)
rng = (np.random.random_sample(t.size) - 0.5)*0.1
s = 10*np.sin(2*np.pi*t*10 + np.pi/12)+rng
c = 10*np.cos(2*np.pi*t*10)+rng
filt_s = DCfilt(dt,16000)
filt_s.y[-1] =s[0]
filt_c = DCfilt(dt,1600)
filt_c.y[-1] =c[0]
# filter the RAW data first
for i in range(s.size):
s[i] = filt_s.step(s[i])
c[i] = filt_c.step(c[i])
# determine the zero crossings
s_z = np.where(np.diff(np.sign(s)))[0]
c_z = np.where(np.diff(np.sign(c)))[0]
sin_zc = zero_crossing_avg( np.where(np.diff(np.sign(s)))[0] )
cos_zc = zero_crossing_avg( np.where(np.diff(np.sign(c)))[0] )
HALF_PERIOD = (sin_zc[1] - sin_zc[0])
for i in range([len(sin_zc),len(cos_zc)][len(sin_zc) > len(cos_zc)]):
delta = abs(cos_zc[i]-sin_zc[i])
print(90 - (delta/HALF_PERIOD)*180)
plt.hold(True)
plt.grid(True)
plt.plot(s)
plt.plot(c)
plt.show()
This works well enough.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Make box-counting code faster using Python - python

Related

Check if a set of points described a triangle

Finding the union of multiple overlapping rectangles - OpenCV python

Python .append seems to run forever and the 'uniform' value seems not too random (making Poisson sphere distribution in Python)

Increasing performance with octant search algorithm

determine mean zero crossing

Categories

Resources