I would like to use pint to convert degrees (distance in a geographic CRS) into nautical miles.
https://geopandas.org/docs/reference/api/geopandas.GeoDataFrame.sjoin_nearest.html outputs distance in degree for epsg:4326.
Given distance (in nm) varies from equator to pole i'm not sure if this is possible.
I could use a rule of thumb of 1 deg ~= 111 km ~= 60 nm.
Perhaps it can be calculated using the starting point and distance using something like: https://github.com/anitagraser/movingpandas/blob/master/movingpandas/geometry_utils.py#L38
This code is also useful: https://geopy.readthedocs.io/en/stable/#module-geopy.distance
Here's some code to test:
import pandas as pd
import geopandas as gpd
df = pd.DataFrame({"lon": [0], "lat": [0]})
gdf_pt = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df["lon"], df["lat"]), crs="epsg:4326")
df2 = pd.DataFrame({"lon": [1, 2], "lat": [0, 0]})
gdf_pts = gpd.GeoDataFrame(df2, geometry=gpd.points_from_xy(df2["lon"], df2["lat"]), crs="epsg:4326")
value = gdf_pt.sjoin_nearest(gdf_pts, distance_col="distances")["distances"].values[0]
import pint
l = value * ureg.arcdegree
Probably best to throw it to Mercator and use that if you can
import pint_pandas
gdf = gdf_pt.to_crs("EPSG:3395").sjoin_nearest(gdf_pts.to_crs("EPSG:3395"), distance_col="distances")
gdf["distance"] = gdf["distance"].astype("pint[meter]").pint.to("nautical_mile")
This function, which I've pulled from existing code, computes the distance in meters between two lat/long sets. "rlat" and "rlong" are expressed in radians; you'll have to do the conversion from degrees. To get nm instead of meters, just set R to 3440.
from math import *
# Radius of the earth, in meters.
R = 6371000
# Return distance between two lat/longs.
def distance( pt1, pt2 ):
rlat1 = pt1.rlat
rlat2 = pt2.rlat
dlat = pt2.rlat - pt1.rlat
dlong = pt2.rlong - pt1.rlong
a = sin(dlat/2) * sin(dlat/2) + cos(rlat1) * cos(rlat2) * sin(dlong/2) * sin(dlong/2)
c = 2 * atan2(sqrt(a), sqrt(1-a))
return R * c
Related
I am currently working on a project whereby I am plotting longitude & latitude on a map. Upon plotting I am then entering a location and I would like my python model to identify the 3 closest locations to the user.
I have looked at geopandas but have had no luck. How can I make the model output the 3 closest longitudes/latitudes?
Try BallTree from sklearn
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree, DistanceMetric
# Dataset
N = 10
df = pd.DataFrame({'place': 'place' + pd.RangeIndex(1, N+1).astype(str),
'lat': np.random.uniform(30, 35, N),
'lon': np.random.uniform(-150, -145, N)})
coords = np.radians(df[['lat', 'lon']])
dist = DistanceMetric.get_metric('haversine')
tree = BallTree(coords, metric=dist)
# User coordinate
user = [32.109643, -148.167012]
coords = np.radians(user)
# Query the database
distances, indices = tree.query([coords], k=3)
out = df.iloc[indices[0]]
Output:
# Nearest places around [32.109643, -148.167012]
>>> out
place lat lon
9 place10 32.267336 -148.225587
5 place6 31.269928 -148.293427
7 place8 31.667689 -149.197724
Step 1: Build a distance formula
from math import radians, cos, sin, sqrt, atan2
def dms(degrees, minutes, seconds):
""" Convert degrees minutes and seconds to radians """
return radians(degrees + minutes / 60 + seconds / 3600)
def dist(p, q, R=6_371):
""" Compute distance between two points on a sphere.
Haversine formula:
a = sin²(Δφ/2) + cos φ1 ⋅ cos φ2 ⋅ sin²(Δλ/2)
c = 2 ⋅ atan2( √a, √(1−a) )
d = R ⋅ c
where:
φ is latitude
λ is longitude
R is earth’s radius (mean radius = 6,371km)
Example:
>>> p = dms(50, 3, 59), dms(5, 42, 53) # 50° 03′ 59″
>>> q = dms(58, 38, 38), dms(3, 4, 12) # 58° 38′ 38″
>>> round(dist(p, q), 1)
968.9
"""
# https://www.movable-type.co.uk/scripts/philamg.html
phi_p, lam_p = p
phi_q, lam_q = q
delta_phi = phi_q - phi_p
delta_lam = lam_q - lam_p
a = sin(delta_phi / 2)**2 + cos(phi_p) * cos(phi_q) * sin(delta_lam / 2)**2
c = 2 * atan2(sqrt(a), sqrt(1 - a))
return R * c
Step 2: Compute the three closest points
from heapq import nsmallest
from functools import partial
answer = nsmallest(3, locations, key=partial(dist, user_location))
I'm using code from the source given below to get the nearest "site".
Source: https://automating-gis-processes.github.io/site/notebooks/L3/nearest-neighbor-faster.html
My Code:
# Read data from a DB
test_df = pd.read_sql_query(sql, conn)
# Calculates distance between 2 points on a map using lat and long
# (Source: https://towardsdatascience.com/heres-how-to-calculate-distance-between-2-geolocations-in-python-93ecab5bbba4)
def haversine_distance(lat1, lon1, lat2, lon2):
r = 6371
phi1 = np.radians(float(lat1))
phi2 = np.radians(float(lat2))
delta_phi = np.radians(lat2 - lat1)
delta_lambda = np.radians(lon2- lon1)
a = np.sin(delta_phi / 2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2)**2
res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
return np.round(res, 2)
test_df["actualDistance (km)"] = test_df.apply(lambda row: haversine_distance(row['ClientLat'],row['ClientLong'],row['actual_SLa'],row['actual_SLo']), axis=1)
test_gdf = geopandas.GeoDataFrame(test_df, geometry=geopandas.points_from_xy(test_df.ClientLong, test_df.ClientLat))
site_gdf = geopandas.GeoDataFrame(site_df, geometry=geopandas.points_from_xy(site_df.SiteLong, site_df.SiteLat))
#-------Set up the functions as shown in the tutorial-------
def get_nearest(src_points, candidates, k_neighbors=1):
"""Find nearest neighbors for all source points from a set of candidate points"""
# Create tree from the candidate points
tree = BallTree(candidates, leaf_size=15, metric='haversine')
# Find closest points and distances
distances, indices = tree.query(src_points, k=k_neighbors)
# Transpose to get distances and indices into arrays
distances = distances.transpose()
indices = indices.transpose()
# Get closest indices and distances (i.e. array at index 0)
# note: for the second closest points, you would take index 1, etc.
closest = indices[0]
closest_dist = distances[0]
# Return indices and distances
return (closest, closest_dist)
def nearest_neighbor(left_gdf, right_gdf, return_dist=False):
"""
For each point in left_gdf, find closest point in right GeoDataFrame and return them.
NOTICE: Assumes that the input Points are in WGS84 projection (lat/lon).
"""
left_geom_col = left_gdf.geometry.name
right_geom_col = right_gdf.geometry.name
# Ensure that index in right gdf is formed of sequential numbers
right = right_gdf.copy().reset_index(drop=True)
# Parse coordinates from points and insert them into a numpy array as RADIANS
left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())
right_radians = np.array(right[right_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())
# Find the nearest points
# -----------------------
# closest ==> index in right_gdf that corresponds to the closest point
# dist ==> distance between the nearest neighbors (in meters)
closest, dist = get_nearest(src_points=left_radians, candidates=right_radians)
# Return points from right GeoDataFrame that are closest to points in left GeoDataFrame
closest_points = right.loc[closest]
# Ensure that the index corresponds the one in left_gdf
closest_points = closest_points.reset_index(drop=True)
# Add distance if requested
if return_dist:
# Convert to meters from radians
earth_radius = 6371000 # meters
closest_points['distance'] = dist * earth_radius
return closest_points
closest_sites = nearest_neighbor(test_gdf, site_gdf, return_dist=True)
# Rename the geometry of closest sites gdf so that we can easily identify it
closest_sites = closest_sites.rename(columns={'geometry': 'closest_site_geom'})
# Merge the datasets by index (for this, it is good to use '.join()' -function)
test_gdf = test_gdf.join(closest_sites)
#Extracted closest site latitude and longitude for data analysis
test_gdf['CS_lo'] = test_gdf.closest_site_geom.apply(lambda p: p.x)
test_gdf['CS_la'] = test_gdf.closest_site_geom.apply(lambda p: p.y)
The code is a replica of the tutorial link I provided. And based on their explanation it should've worked.
To verify this data I got some statistical data using .describe(), and it showed me that the tutorials method did indeed give me a mean distance that was much closer than the distance in the actual data (792 m vs the actual distance which was 1.80 km).
Closest Distance generated using the BallTree method
Actual Distance in the data
However when I plotted them out on a map using plotly I noticed that the BallTree method's outputs weren't closer than the "actual" distance.
This is generally what the plotted data looks like (Blue: predetermined site, Red: site predicted using the BallTree method
Could someone help me track down the discrepancy
I'm not sure why this works but it did. I decided to just write the code based on the docs instead of following the tutorial and this worked:
# Build BallTree with haversine distance metric, which expects (lat, lon) in radians and returns distances in radians
dist = DistanceMetric.get_metric('haversine')
tree = BallTree(np.radians(site_df[['SiteLat', 'SiteLong']]), metric=dist)
test_coords = np.radians(test_df[['ClientLat', 'ClientLong']])
dists, ilocs = tree.query(test_coords)
The problem is that the tutorial code provides coordinates in Longitude, Latitude format instead of the Latitude, Longitude format BallTree anticipates. So you're measuring distances between inverted points.
If you swap the order of geom.x and geom.y in the coordinate parsing code you will get correct measurements.
# Parse coordinates from points and insert them into a numpy array as RADIANS
left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.y * np.pi / 180, geom.x * np.pi / 180)).to_list())
right_radians = np.array(right[right_geom_col].apply(lambda geom: (geom.y * np.pi / 180, geom.x * np.pi / 180)).to_list())
Having the following information:
Origin point: Point(lat_origin, long_origin)
End point: Point(lat_end, long_end)
Center point: Point(lat_center, long_center)
Distance: 100
Bearing: 90º
from shapely.geometry import Point
origin_point = Point(...,...)
end_point = Point(...,...)
center_point = Point(...,...)
distance = 100
bearing = 90
I would like to be able to generate an arc as close as possible with as few points as possible, obtaining the coordinates of this approximation.
A good functionality would be to be able to control the error tolerance and to be able to dynamically graduate the number of points to approximate the arc.
We must have in mind that we are working with coordinates and we cannot ignore surface curvature.
The expected output would be a function that obtains as inputs, the origin point, the end point, center point, distance, bearing and optionally the error tolerance and returns as output a series of point coordinates from the original point to the end point that approximately form the required arc.
Related links:
https://gis.stackexchange.com/questions/326871/generate-arc-from-projection-coordinates
Any help would be greatly appreciated.
https://www.igismap.com/formula-to-find-bearing-or-heading-angle-between-two-points-latitude-longitude/
import math
import numpy as np
from shapely.geometry import Point, LineString
def get_bearing(center_point, end_point):
lat3 = math.radians(end_point[0])
long3 = math.radians(end_point[1])
lat1 = math.radians(center_point[0])
long1 = math.radians(center_point[1])
dLon = long3 - long1
X = math.cos(lat3) * math.sin(dLon)
Y = math.cos(lat1) * math.sin(lat3) - math.sin(lat1) * math.cos(lat3) * math.cos(dLon)
end_brng = math.atan2(X, Y)
return end_brng
def get_arc_coordinates(center_point, origin_point, end_point, brng_init, distance):
'''
center_point: (center_latitude, center_long)
origin_point: (origin_latitude, origin_long)
end_point: (end_latitude, end_long)
brng_init: degrees
distance: aeronautical miles
'''
brng_init = math.radians(brng_init) #Bearing in degrees converted to radians.
d = distance * 1.852 #Distance in km
R = 6378.1 #Radius of the Earth
brng = get_bearing(center_point,end_point) #1.57 #Bearing is 90 degrees converted to radians.
list_bearings = np.arange(brng, brng_init, 0.1) # 0.1 value to be modify with tolerance
coordinates = []
for i in list_bearings:
lat1 = math.radians(center_point[0]) #Center lat point converted to radians
lon1 = math.radians(center_point[1]) #Center long point converted to radians
brng = i
lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
math.cos(lat1)*math.sin(d/R)*math.cos(brng))
lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
lat2 = math.degrees(lat2)
lon2 = math.degrees(lon2)
coordinates.append(Point(lat2, lon2))
return LineString(coordinates)
I'm facing some troubles with doing a pairwise calculation in Python.
I have two sets of nodes (e.g. suppliers and customers).
Set 1: SupplierCO = (Xco, Yco) for multiple suppliers
Set 2: Customer CO = (Xco, Yco) for multiple customers
I want to calculate the distances between a customer and all the suppliers, and save the shortest distance. This should be looped for all customers.
I realize I will have to work with two for loops, and an if function. But I don't understand how to select the coordinates from the correct points while looping.
Thanks for the responses!
Some more information:
- Haversine distance
- Each point in set 1 has to be compared to all the points of set 2
- This is what I've so far
import urllib.parse
from openpyxl import load_workbook, Workbook
import requests
from math import radians, cos, sin, asin, sqrt
"""load datafile"""
workbook = load_workbook('Macro.xlsm')
Companysheet = workbook.get_sheet_by_name("Customersheet")
Networksheet = workbook.get_sheet_by_name("Suppliersheet")
"""search for column with latitude/longitude - customers"""
numberlatC = -1
i = 0
for col in Customersheet.iter_cols():
if col[2].value == "Latitude" :
numberlatC = i
i+=1
numberlongC = -1
j = 0
for col in Customersheet.iter_cols():
if col[2].value == "Longitude" :
numberlongC = j
j+=1
latC = [row[numberlatC].value for row in Companysheet.iter_rows() ]
longC = [row[numberlongC].value for row in Companysheet.iter_rows()]
# haversine formula
dlon = lonC - lonS
dlat = latC - latS
a = sin(dlat/2)**2 + cos(latC) * cos(latS) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
distance = c*r
distances.append([distance])
return distances
customers = [latC, longC]
Thanks!
This should give you the general idea. In the following example I've just used regular coordinates, however, you should be able to convert this to your need.
supplier = [(1,3),(2,4),(8,7),(15,14)]
customer = [(0,2),(8,8)]
def CoordinatesDistance(A, B):
import math
x1, x2 = A
y1, y2 = B
return math.sqrt(math.exp((x2-x1)+(y2-y1)))
def shortest_distance_pair(Cust, Sup):
pairs = []
for X in Cust:
shortest_distance = 999999
for Y in Sup:
distance = CoordinatesDistance(X,Y)
#customer_distance.append(distance)
if distance < shortest_distance:
shortest_distance = distance
sdp = (X,Y)
pairs.append(sdp)
return pairs
print(shortest_distance_pair(customer,supplier))
print(shortest_distance_pair(customer,supplier))
[((0, 2), (8, 7)), ((8, 8), (8, 7))]
Now if you create two lists, 1. Customer coordinates, and 2. Supplier coordinates; you should be able to utilize the above.
I have a series of points, of right ascension and declination values.
These points correspond to the vertices of a polygon on the surface of a sphere.
What would be the best way to calculate the area enclosed by these points? I would assume that converting the points with an equal-area projection, and then carrying out typical polygonal area calculating on a flat surface would be an appropriate solution.
note: I cannot use custom python libraries. eg pyproj or shapely
Example code (works for latitude longitude, what modifications would be required to enure this works with sky coordinates?)
def reproject(latitude, longitude):
"""Returns the x & y coordinates in metres using a sinusoidal projection"""
from math import pi, cos, radians
earth_radius = 6371009
lat_dist = pi * earth_radius / 180.0
y = [lat * lat_dist for lat in latitude]
x = [long * lat_dist * cos(radians(lat))
for lat, long in zip(latitude, longitude)]
return x, y
def area_of_polygon(x, y):
"""Calculates the area of an arbitrary polygon given its vertices"""
area = 0.0
for i in xrange(-1, len(x)-1):
area += x[i] * (y[i+1] - y[i-1])
return abs(area) / 2.0
dec = [-15.,89.,89.,-15.,-15.]
ra = [105.,105.,285.,285.,105.]
x,y = reproject(dec, ra)
print area_of_polygon(x,y)
One of the ways is to perform a line integral based on Green's Theorem. See below an implementation, and this question for more details.
def polygon_area(lats, lons, algorithm = 0, radius = 6378137):
"""
Computes area of spherical polygon, assuming spherical Earth.
Returns result in ratio of the sphere's area if the radius is specified.
Otherwise, in the units of provided radius.
lats and lons are in degrees.
"""
from numpy import arctan2, cos, sin, sqrt, pi, power, append, diff, deg2rad
lats = np.deg2rad(lats)
lons = np.deg2rad(lons)
# Line integral based on Green's Theorem, assumes spherical Earth
#close polygon
if lats[0]!=lats[-1]:
lats = append(lats, lats[0])
lons = append(lons, lons[0])
#colatitudes relative to (0,0)
a = sin(lats/2)**2 + cos(lats)* sin(lons/2)**2
colat = 2*arctan2( sqrt(a), sqrt(1-a) )
#azimuths relative to (0,0)
az = arctan2(cos(lats) * sin(lons), sin(lats)) % (2*pi)
# Calculate diffs
# daz = diff(az) % (2*pi)
daz = diff(az)
daz = (daz + pi) % (2 * pi) - pi
deltas=diff(colat)/2
colat=colat[0:-1]+deltas
# Perform integral
integrands = (1-cos(colat)) * daz
# Integrate
area = abs(sum(integrands))/(4*pi)
area = min(area,1-area)
if radius is not None: #return in units of radius
return area * 4*pi*radius**2
else: #return in ratio of sphere total area
return area
Please find a somewhat more explicit version (and with many more references and TODOs...) here.
Looks like I can treat ra and dec like lat and long, work out the area on the Earth's surface in m^2, and use this value to convert into an area in sq degrees.
Please let me know if the solution I propose below is flawed:
def reproject(latitude, longitude):
"""Returns the x & y coordinates in metres using a sinusoidal projection"""
from math import pi, cos, radians
earth_radius = 6371009
lat_dist = pi * earth_radius / 180.0
y = [lat * lat_dist for lat in latitude]
x = [long * lat_dist * cos(radians(lat))
for lat, long in zip(latitude, longitude)]
return x, y
def area_of_polygon(x, y):
"""Calculates the area of an arbitrary polygon given its vertices"""
area = 0.0
for i in xrange(-1, len(x)-1):
area += x[i] * (y[i+1] - y[i-1])
return ((abs(area) / 2.0)/5.10100E14) * 41253
dec = [-15.,89.,89.,-15.,-15.]
ra = [105.,105.,285.,285.,105.]
x,y = reproject(dec, ra)
print area_of_polygon(x,y)