For example this array of GPS Coordinates:
GPSS = [{"Lat":40.641099,"Lon": -73.917094},{"Lat":40.60442,"Lon": -74.054873},{"Lat":40.779582,"Lon": -73.920213},{"Lat":40.651616,"Lon": -73.89097},{"Lat":40.755183,"Lon": -73.846248}]
I have already calculated the Distances below for each possible combination:
Distances = [{'GPSS': [0, 1], 'Distance': 12.34895151892164}, {'GPSS': [0, 2], 'Distance': 15.380561959360797}, {'GPSS': [0, 3], 'Distance': 2.499303143635897}, {'GPSS': [0, 4], 'Distance': 14.012560598709298}, {'GPSS': [1, 2], 'Distance': 22.53687775052488}, {'GPSS': [1, 3], 'Distance': 14.824576927209662}, {'GPSS': [1, 4], 'Distance': 24.318038568441654}, {'GPSS': [2, 3], 'Distance': 14.423642658224264}, {'GPSS': [2, 4], 'Distance': 6.807346029310139}, {'GPSS': [3, 4], 'Distance': 12.106031672624894}]
0,1 = referring to 40.641099,-73.917094 and 40.60442,-74.054873
1,4 = 40.641099,-73.917094 and 40.755183,-73.846248
I would now like to find out the shortest Distance (route) to visit each set of coordinates, so it's most likely not going to be point 0 to 1 to 2 to 3 to 4.
But something like 1 to 3 to 4 to 2 to 0.
How would I accomplish something like this?
This is as far as I got:
for index, d in enumerate(Distances):
print(d['GPSS'])
Total = d['Distance']
Start = d['GPSS'][1] #[0]
CheckPoints = []
CheckPoints.append(d['GPSS'][0])
CheckPoints.append(d['GPSS'][1])
for index2, d2 in enumerate(Distances):
if index != index2:
if Start == d2['GPSS'][0]: #0-1, 1-2, 2-3
Total += d2['Distance']
Start += 1
if d2['GPSS'][0] not in CheckPoints:
CheckPoints.append(d2['GPSS'][0])
if d2['GPSS'][1] not in CheckPoints:
CheckPoints.append(d2['GPSS'][1])
#print(CheckPoints)
print("+"+str(d2['Distance'])+" = "+str(Total)+" | "+str(Start)+" - "+str(d2['GPSS']))
if len(CheckPoints) <= len(GPSS)-1: #GPPS - is from above
for x in range(len(GPSS)-1):
if x not in CheckPoints:
for d3 in Distances:
if d3['GPSS'][0] == x and d3['GPSS'][1] == CheckPoints[-1]:
print("HERE")
print(d3)
Total += d3['Distance']
break
print(Total)
Any help would be much appreciated.
Thanks
The best way to do what you are looking for is to create a Graph. If you do not know what that is, you should look it up as it's a very important data structure. You will probably also need to know what it is to fully understand the following code. Python does not have a built in graph so you need to create your own.
The type of graph you are going to need is a un-directed weighted graph with all of the nodes, or in your case GPS coordinates, connected to each other. Then you can sort the graph by using a form of "Dijkstra's Algorithm" to find the shortest path to all of the points.
Below is an implementation of what you are looking for. However I coded this to work with a list containing lists of paired coordinates. It also includes a driver, driver(), you can call to test it out.
I wrote this up quick and didn't code it as a class, but in the real world you most definitely should.
As a note, when you run the driver function it will execute the code and print out all of the possible paths and their weights for the provided coordinate list. "Weight" in your case refers to the distance between the points. The list printed shows the path it took with "1" referring to the pair of points at index "0" of the coordinate list. The next number in the list is the pair of points it went to next.
If you have any further questions feel free to ask
from collections import defaultdict
from math import sqrt
# Shortest path to all coordinates from any node
# Coordinates must be provided as a list containing lists of
# x/y pairs. ie [[23.2321, 58.3123], [x.xxx, y.yyy]]
def distance_between_coords(x1, y1, x2, y2):
distance = sqrt(((x2 - x1) ** 2) + ((y2 - y1) ** 2))
return distance
# Adds "names" to coordinates to use as keys for edge detection
def name_coords(coords):
coord_count = 0
for coord in coords:
coord_count += 1
coord.append(coord_count)
return coords
# Creates a weighted and undirected graph
# Returns named coordinates and their connected edges as a dictonary
def graph(coords):
coords = name_coords(coords)
graph = defaultdict(list)
edges = {}
for current in coords:
for comparer in coords:
if comparer == current:
continue
else:
weight = distance_between_coords(current[0], current[1],
comparer[0], comparer[1])
graph[current[2]].append(comparer[2])
edges[current[2], comparer[2]] = weight
return coords, edges
# Returns a path to all nodes with least weight as a list of names
# from a specific node
def shortest_path(node_list, edges, start):
neighbor = 0
unvisited = []
visited = []
total_weight = 0
current_node = start
for node in node_list:
if node[2] == start:
visited.append(start)
else:
unvisited.append(node[2])
while unvisited:
for index, neighbor in enumerate(unvisited):
if index == 0:
current_weight = edges[start, neighbor]
current_node = neighbor
elif edges[start, neighbor] < current_weight:
current_weight = edges[start, neighbor]
current_node = neighbor
total_weight += current_weight
unvisited.remove(current_node)
visited.append(current_node)
return visited, total_weight
def driver():
coords = [[1.7592675, 92.4836507], [17.549836, 32.457398],
[23.465896, 45], [25.195462, 37.462742],
[42.925274, 63.234028], [2.484631, 5.364871],
[50.748376, 36.194797]]
coords, edges = graph(coords)
shortest_path(coords, edges, 3)
shortest_path_taken = []
shortest_path_weight = 0
for index, node in enumerate(coords):
path, weight = shortest_path(coords, edges, index + 1)
print('--------------------------------------')
print("Path", index + 1, "=", path)
print("Weight =", weight)
if index == 0:
shortest_path_weight = weight
shortest_path_taken = path
elif weight < shortest_path_weight:
shortest_path_weight = weight
shortest_path_taken = path
print('--------------------------------------')
print("The shortest path to all nodes is:", shortest_path_taken)
print("The weight of the path is:", shortest_path_weight)
Edit:
Here is what the output will look like when you call the driver function:
--------------------------------------
Path 1 = [1, 5, 3, 4, 2, 7, 6]
Weight = 386.3252849770695
--------------------------------------
Path 2 = [2, 4, 3, 6, 7, 5, 1]
Weight = 189.3710721663407
--------------------------------------
Path 3 = [3, 4, 2, 5, 7, 6, 1]
Weight = 173.99235180101968
--------------------------------------
Path 4 = [4, 3, 2, 7, 5, 6, 1]
Weight = 172.86112533927678
--------------------------------------
Path 5 = [5, 3, 7, 4, 2, 1, 6]
Weight = 247.08415835699554
--------------------------------------
Path 6 = [6, 2, 4, 3, 7, 5, 1]
Weight = 330.1567215845902
--------------------------------------
Path 7 = [7, 4, 5, 3, 2, 6, 1]
Weight = 247.70066871941674
--------------------------------------
The shortest path to all nodes is: [4, 3, 2, 7, 5, 6, 1]
The weight of the path is: 172.86112533927678
[Finished in 0.1s]*
Related
Trying to solve a problem where I need to find all the possible path in a 4x4 grid.
df = [[1,1,1,1], [1,1,1,1], [1,1,1,1], [1,1,1,1]]
Possible moves are left, right, top, down, top right, top left, down right, down left and no repeats in the coordinates.
I tried to start small with 2x2 but I'm not sure even with how to convert my thought into code.
All help appreciated. Thank you
I tried to convert all data into cords and the code below, however this would only get me from top to corner in the right bottom.
dp = [[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]]
n, m = len(dp), len(dp[0])
paths = []
curpath = []
def getPath(r, c):
if r < 0 or c < 0:
return
curpath.append((r, c))
if r == 0 and c == 0:
paths.append(list(reversed(curpath)))
getPath(r - 1, c)
getPath(r, c - 1)
curpath.pop()
getPath(n - 1, m - 1)
print(paths)
I want to convert [0, 0, 1, 0, 1, 0, 1, 0] to [2, 4, 6] using ortools.
Where "2", "4", "6" in the second list are the index of "1" in the first list.
Using the below code I could get a list [0, 0, 2, 0, 4, 0, 6, 0]. How can I get [2, 4, 6]?
from ortools.sat.python import cp_model
model = cp_model.CpModel()
solver = cp_model.CpSolver()
work = {}
days = 8
horizon = 7
for i in range(days):
work[i] = model.NewBoolVar("work(%i)" % (i))
model.Add(work[0] == 0)
model.Add(work[1] == 0)
model.Add(work[2] == 1)
model.Add(work[3] == 0)
model.Add(work[4] == 1)
model.Add(work[5] == 0)
model.Add(work[6] == 1)
model.Add(work[7] == 0)
v1 = [model.NewIntVar(0, horizon, "") for _ in range(days)]
for d in range(days):
model.Add(v1[d] == d * work[d])
status = solver.Solve(model)
print("status:", status)
vec = []
for i in range(days):
vec.append(solver.Value(work[i]))
print("work",vec)
vec = []
for v in v1:
vec.append(solver.Value(v))
print("vec1",vec)
You should see this output on the console,
status: 4
work [0, 0, 1, 0, 1, 0, 1, 0]
vec1 [0, 0, 2, 0, 4, 0, 6, 0]
Thank you.
Edit:
I also wish to get a result as [4, 6, 2].
For just 3 variables, this is easy. In pseudo code:
The max index is max(work[i] * i)
The min index is min(horizon - (horizon - i) * work[i])
The medium is sum(i * work[i]) - max_index - min_index
But that is cheating.
If you want more that 3 variable, you will need parallel arrays of Boolean variables that indicate the rank of each variable.
Let me sketch the full solution.
You need to build a graph. The X axis are the variables. The why axis are the ranks. You have horizontal arcs going right, and diagonal arcs going right and up. If the variable is selected, you need to use a diagonal arc, otherwise an horizontal arc.
If using a diagonal arc, you will assign the current variable to the rank of the tail of the arc.
Then you need to add constraints to make it a contiguous path:
mass conservation at each node
variable is selected -> one of the diagonal arc must be selected
variable is not selected -> one of the horizontal arc must be selected
bottom left node has one outgoing arc
top right node has one incoming arc
I'm trying to implement the Warshall algorithm in python 3 to create a matrix with the shortest distance between each point.
This is supposed to be a simple implementation, I make a matrix and fill it with the distance between each point.
However, I'm getting the wrong result, and I dont know what is the problem with my implementation.
#number of vertex (N), number of connections(M)
N, M = 4,4;
#my matrix [A,B,C] where A and B indicates a connection
#from A to B with a distance C
A = [[0,1,2],[0,2,4],[1,3,1],[2,3,5]];
#matrix alocation
inf = float("inf");
dist = [[inf for x in range(N)] for y in range(M)];
#set distances from/to the same vertex as 0
for vertex in range(N):
dist[vertex][vertex] = 0;
#set the distances from each vertex to the other
#they are bidirectional.
for vertex in A:
dist[vertex[0]][vertex[1]] = vertex[2];
dist[vertex[1]][vertex[0]] = vertex[2];
#floyd warshall algorithm
for k in range(N):
for i in range(N):
for j in range(N):
if dist[i][j] > dist[i][k] + dist[k][j]:
dist[1][j] = dist[i][k] + dist[k][j];
print(dist);
Expected Matrix on the first index (dist[0]):
[0, 2, 4, 3]
Actual result:
[0, 2, 4, inf]
for some reason I keep getting inf instead of 3 on dist[0][3].
What am I missing?
It's a little tricky to spot, but a simple change-by-change trace of your program spots the problem:
if dist[i][j] > dist[i][k] + dist[k][j]:
dist[1][j] = dist[i][k] + dist[k][j];
^ This should be i, not 1
You're changing the distance from node 1 to the target node; rather than from the source node. Your resulting distance matrix is
[0, 2, 4, 3]
[2, 0, 6, 1]
[4, 6, 0, 5]
[3, 1, 5, 0]
See this lovely debug blog for help.
I have thousands of time series (24 dimensional data -- 1 dimension for each hour of the day). Out of these time series, I'm interested in a particular sub-sequence or pattern that looks like this:
I'm interested in sub-sequences that resemble the overall shape of the highlighted section -- that is, a sub-sequence with a sharp negative slope, followed by a period of several hours where the slope is relatively flat before finally ending with a sharp positive slope. I know the sub-sequences I'm interested in won't match each other exactly and most likely will be shifted in time, scaled differently, have longer/shorter periods where the slope is relatively flat, etc. but I would like to find a way to detect them all.
To do this, I have developed a simple Heuristic (based on my definition of the highlighted section) to quickly find some of the sub-sequences of interest. However, I was wondering if there was a more elegant way (in Python) to search thousands of time series for the sub-sequence I'm interested in (while taking into account things mentioned above -- differences in time, scale, etc.)?
Edit: a year later I cannot believe how much I overcomplicated flatline and slope detection; stumbling on the same question, I realized it's as simple as
idxs = np.where(x[1:] - x[:-1] == 0)
idxs = [i for idx in idxs for i in (idx, idx + 1)]
First line is implemented efficiently via np.diff(x); further, to e.g. detect slope > 5, use np.diff(x) > 5. The second line is since differencing tosses out right endpoints (e.g. diff([5,6,6,6,7]) = [1,0,0,1] -> idxs=[1,2], excludes 3,.
Functions below should do; code written with intuitive variable & method names, and should be self-explanatory with some readovers. The code is efficient and scalable.
Functionalities:
Specify min & max flatline length
Specify min & max slopes for left & right tails
Specify min & max average slopes for left & right tails, over multiple intervals
Example:
import numpy as np
import matplotlib.pyplot as plt
# Toy data
t = np.array([[ 5, 3, 3, 5, 3, 3, 3, 3, 3, 5, 5, 3, 3, 0, 4,
1, 1, -1, -1, 1, 1, 1, 1, -1, 1, 1, -1, 0, 3, 3,
5, 5, 3, 3, 3, 3, 3, 5, 7, 3, 3, 5]]).T
plt.plot(t)
plt.show()
# Get flatline indices
indices = get_flatline_indices(t, min_len=4, max_len=5)
plt.plot(t)
for idx in indices:
plt.plot(idx, t[idx], marker='o', color='r')
plt.show()
# Filter by edge slopes
lims_left = (-10, -2)
lims_right = (2, 10)
averaging_intervals = [1, 2, 3]
indices_filtered = filter_by_tail_slopes(indices, t, lims_left, lims_right,
averaging_intervals)
plt.plot(t)
for idx in indices_filtered:
plt.plot(idx, t[idx], marker='o', color='r')
plt.show()
def get_flatline_indices(sequence, min_len=2, max_len=6):
indices=[]
elem_idx = 0
max_elem_idx = len(sequence) - min_len
while elem_idx < max_elem_idx:
current_elem = sequence[elem_idx]
next_elem = sequence[elem_idx+1]
flatline_len = 0
if current_elem == next_elem:
while current_elem == next_elem:
flatline_len += 1
next_elem = sequence[elem_idx + flatline_len]
if flatline_len >= min_len:
if flatline_len > max_len:
flatline_len = max_len
trim_start = elem_idx
trim_end = trim_start + flatline_len
indices_to_append = [index for index in range(trim_start, trim_end)]
indices += indices_to_append
elem_idx += flatline_len
flatline_len = 0
else:
elem_idx += 1
return indices if not all([(entry == []) for entry in indices]) else []
def filter_by_tail_slopes(indices, data, lims_left, lims_right, averaging_intervals=1):
indices_filtered = []
indices_temp, tails_temp = [], []
got_left, got_right = False, False
for idx in indices:
slopes_left, slopes_right = _get_slopes(data, idx, averaging_intervals)
for tail_left, slope_left in enumerate(slopes_left):
if _valid_slope(slope_left, lims_left):
if got_left:
indices_temp = [] # discard prev if twice in a row
tails_temp = []
indices_temp.append(idx)
tails_temp.append(tail_left + 1)
got_left = True
if got_left:
for edge_right, slope_right in enumerate(slopes_right):
if _valid_slope(slope_right, lims_right):
if got_right:
indices_temp.pop(-1)
tails_temp.pop(-1)
indices_temp.append(idx)
tails_temp.append(edge_right + 1)
got_right = True
if got_left and got_right:
left_append = indices_temp[0] - tails_temp[0]
right_append = indices_temp[1] + tails_temp[1]
indices_filtered.append(_fill_range(left_append, right_append))
indices_temp = []
tails_temp = []
got_left, got_right = False, False
return indices_filtered
def _get_slopes(data, idx, averaging_intervals):
if type(averaging_intervals) == int:
averaging_intervals = [averaging_intervals]
slopes_left, slopes_right = [], []
for interval in averaging_intervals:
slopes_left += [(data[idx] - data[idx-interval]) / interval]
slopes_right += [(data[idx+interval] - data[idx]) / interval]
return slopes_left, slopes_right
def _valid_slope(slope, lims):
min_slope, max_slope = lims
return (slope >= min_slope) and (slope <= max_slope)
def _fill_range(_min, _max):
return [i for i in range(_min, _max + 1)]
I have a set of points with x and y coordinates that can be seen in the figure below. The coordinates of the 9 points were stored in a list as follows:
L = [[5,2], [4,1], [3.5,1], [1,2], [2,1], [3,1], [3,3], [4,3] , [2,3]]
The idea is to sort the points clockwise from an origin. In this case, the origin is the point that is colored and that has an arrow that indicates the direction of the ordering. Do not worry about creating methodology to determine the origin because it is already solved.
Thus, after being ordered, the list L should be as follows:
L = [[2,3], [3,3], [4,3], [5,2], [4,1], [3.5,1], [3,1], [2,1], [1,2]]
Note that the x and y coordinates are not changed. What changes is the storage order.
Do you have any idea of an algorithm, script or methodology for this problem in the python language?
With a bit of trigonometry it's not that hard. Maybe you know but the angle between two (normalized) vectors is acos(vec1 * vec2). However this calculates only the projected angle but one could use atan2 to calculate the direction-aware angle.
To this means a function calculating it and then using it as key for sorting would be a good way:
import math
pts = [[2,3], [5,2],[4,1],[3.5,1],[1,2],[2,1],[3,1],[3,3],[4,3]]
origin = [2, 3]
refvec = [0, 1]
def clockwiseangle_and_distance(point):
# Vector between point and the origin: v = p - o
vector = [point[0]-origin[0], point[1]-origin[1]]
# Length of vector: ||v||
lenvector = math.hypot(vector[0], vector[1])
# If length is zero there is no angle
if lenvector == 0:
return -math.pi, 0
# Normalize vector: v/||v||
normalized = [vector[0]/lenvector, vector[1]/lenvector]
dotprod = normalized[0]*refvec[0] + normalized[1]*refvec[1] # x1*x2 + y1*y2
diffprod = refvec[1]*normalized[0] - refvec[0]*normalized[1] # x1*y2 - y1*x2
angle = math.atan2(diffprod, dotprod)
# Negative angles represent counter-clockwise angles so we need to subtract them
# from 2*pi (360 degrees)
if angle < 0:
return 2*math.pi+angle, lenvector
# I return first the angle because that's the primary sorting criterium
# but if two vectors have the same angle then the shorter distance should come first.
return angle, lenvector
A sorted run:
>>> sorted(pts, key=clockwiseangle_and_distance)
[[2, 3], [3, 3], [4, 3], [5, 2], [4, 1], [3.5, 1], [3, 1], [2, 1], [1, 2]]
and with a rectangular grid around the origin this works as expected as well:
>>> origin = [2,3]
>>> refvec = [0, 1]
>>> pts = [[1,4],[2,4],[3,4],[1,3],[2,3],[3,3],[1,2],[2,2],[3,2]]
>>> sorted(pts, key=clockwiseangle_and_distance)
[[2, 3], [2, 4], [3, 4], [3, 3], [3, 2], [2, 2], [1, 2], [1, 3], [1, 4]]
even if you change the reference vector:
>>> origin = [2,3]
>>> refvec = [1,0] # to the right instead of pointing up
>>> pts = [[1,4],[2,4],[3,4],[1,3],[2,3],[3,3],[1,2],[2,2],[3,2]]
>>> sorted(pts, key=clockwiseangle_and_distance)
[[2, 3], [3, 3], [3, 2], [2, 2], [1, 2], [1, 3], [1, 4], [2, 4], [3, 4]]
Thanks #Scott Mermelstein for the better function name and #f5r5e5d for the atan2 suggestion.
this should illustrate the issues, gives a visualization tool
but it doesn't work every time for the getting the correct entry point for a group of points at the same distance
import random
import pylab
import cmath
from itertools import groupby
pts = [(random.randrange(-5,5), random.randrange(-5,5)) for _ in range(10)]
# for this problem complex numbers are just too good to pass up
z_pts = [ i[0] + 1j*i[1] for i in pts if i != (0, 0)]
z_pts.sort(key = lambda x: abs(x))
gpts = [[*g] for _, g in groupby(z_pts, key = lambda x: abs(x) ) ]
print(*gpts, sep='\n')
spts = [1j/2]
for e in gpts:
if len(e) > 1:
se = sorted(e, key = lambda x: cmath.phase(-x / spts[-1]))
spts += se
else:
spts += e
print(spts)
def XsYs(zs):
xs = [z.real for z in zs]
ys = [z.imag for z in zs]
return xs, ys
def SpiralSeg(a, b):
'''
construct a clockwise spiral segment connecting
ordered points a, b specified as complex numbers
Inputs
a, b complex numbers
Output
list of complex numbers
'''
seg = [a]
if a == 0 or a == b:
return seg
# rotation interpolation with complex numbers!
rot = ( b / a ) ** ( 1 / 30 )
# impose cw rotation direction constraint
if cmath.phase( b / a ) > 0: # add a halfway point to force long way around
plr = cmath.polar( b / a )
plr = (plr[0]**(1/2), plr[1] / 2 - 1 * cmath.pi ) # the rotor/2
a_b = cmath.rect(*plr) * a # rotate the start point halfway round
return SpiralSeg(a, a_b) + (SpiralSeg(a_b, b))
for _ in range(30):
a *= rot
seg.append(a)
return seg
segs = [SpiralSeg(a, b) for a, b in zip(spts, spts[1:])]
pylab.axes().set_aspect('equal', 'datalim')
pylab.scatter(*XsYs(z_pts))
for seg in segs:
pylab.plot(*XsYs(seg))
[(1-2j), (-2-1j)]
[(2-3j)]
[(1+4j)]
[(3+3j)]
[(-3-4j), (3-4j), (4-3j)]
[(1-5j)]
[(-4-4j)]
[0.5j, (-2-1j), (1-2j), (2-3j), (1+4j), (3+3j), (-3-4j), (3-4j), (4-3j), (1-5j), (-4-4j)]

[-1j]
[(-1-1j)]
[(-1-2j), (-1+2j), (2+1j)]
[(-4+0j)]
[(1-4j)]
[-5j, (-4-3j)]
[(1-5j)]
[0.5j, -1j, (-1-1j), (-1-2j), (2+1j), (-1+2j), (-4+0j), (1-4j), (-4-3j), -5j, (1-5j)]
Sorting by angle is not enough
We should sort points lexicographicallly by polar angle and distance from origin
We sort by polar angle and in case of a tie we sort by a distance from origin