Plot latitude longitude from CSV in Python 3.6 - python

I'm trying to plot a large number of latitude longitude values from a CSV file on a map, having this format (first column and second column):
I'm using python 3.6 (apparently some libraries like Basemap doesn't operate on this version).
How can I do that?

If you are just looking at plotting the point data as a scatterplot, is as simple as
import matplotlib.pyplot as plt
plt.scatter(x=df['Longitude'], y=df['Latitude'])
plt.show()
If you want to plot the points on the map, it's getting interesting because it depends more on how you plot your map.
A simple way is to use shapely and geopandas. The code below is not tested given my limited access on the laptop I am currently using, but it should give you a conceptual roadmap.
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
from geopandas import GeoDataFrame
df = pd.read_csv("Long_Lats.csv", delimiter=',', skiprows=0, low_memory=False)
geometry = [Point(xy) for xy in zip(df['Longitude'], df['Latitude'])]
gdf = GeoDataFrame(df, geometry=geometry)
#this is a simple map that goes with geopandas
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
gdf.plot(ax=world.plot(figsize=(10, 6)), marker='o', color='red', markersize=15);
Find below an example of the rendered image:

You can also use plotly express to plot the interactive worldmap for latitude and longitude
import plotly.express as px
import pandas as pd
df = pd.read_csv("location_coordinate.csv")
fig = px.scatter_geo(df,lat='lat',lon='long', hover_name="id")
fig.update_layout(title = 'World map', title_x=0.5)
fig.show()

Here's an example of adding Lat & Long to a real OpenStreet map:
import plotly.express as px
import pandas as pd
df = pd.read_csv("dataset/dataset.csv")
df.dropna(
axis=0,
how='any',
thresh=None,
subset=None,
inplace=True
)
color_scale = [(0, 'orange'), (1,'red')]
fig = px.scatter_mapbox(df,
lat="Lat",
lon="Long",
hover_name="Address",
hover_data=["Address", "Listed"],
color="Listed",
color_continuous_scale=color_scale,
size="Listed",
zoom=8,
height=800,
width=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
Example CSV:
Address, Lat, Long, Listed
Address #1, -33.941, 18.467, 1250000
Address #2, -33.942, 18.468, 1900000
Address #3, -33.941, 18.467, 1200000
Address #4, -33.936, 18.467, 1195000
Address #5, -33.944, 18.470, 2400000
Example output (interactive map):

Related

python plotly express mutiple layer graph (boxchart + scatter)

I want to create a multi layer graph with the same data frame from pandas.
One should be a boxplot and the other a scatter to see where the company is located.
Is there a way to combine both plots?
boxplot
scatterplot
import pandas as pd
import plotly.express as px
df = pd.read_csv("company_index.csv", sep=";", decimal=",")
print(df)
df_u9 = df.loc[df["company"].isin(["U9"])]
fig_1 = px.box(
df,
x="period",
y="index"
)
fig_2 = px.scatter(
df_u9,
x="period",
y="index"
)
fig_1.show()
fig_2.show()
company_index.csv
period;index;company
1;202,4;U1
1;226,69;U10
1;235,18;U9
1;236,49;U4
1;238,13;U2
1;244,05;U6
1;252,08;U3
1;256,68;U8
1;294,99;U5
1;299,391;U7
2;243,78;U1
2;264,26;U10
2;270,6;U2
2;272,89;U9
2;285,26;U5
2;289,29;U4
2;291,15;U6
2;291,19;U3
2;305,92;U7
2;314,65;U8
3;271,82;U1
3;278,65;U2
3;296,16;U10
3;297,21;U4
3;305,93;U6
3;308,96;U5
3;323,74;U9
3;335,93;U3
3;354,13;U8
3;381,2;U7
4;281,26;U5
4;308,5;U2
4;311,61;U1
4;334,03;U4
4;335,72;U9
4;344,32;U8
4;345,27;U6
4;355,44;U3
4;373,54;U7
4;381,68;U10
5;288,6;U1
5;305,66;U5
5;323,2;U2
5;358,46;U8
5;365,57;U3
5;366,96;U10
5;368,38;U7
5;371,23;U6
5;373,63;U4
5;422,93;U9
6;285,32;U5
6;291,65;U1
6;308,68;U2
6;372,04;U8
6;376,64;U3
6;403,55;U6
6;407,38;U4
6;420,65;U10
6;423,68;U9
6;453,09;U7
Found this solution. Works rather well.
Im still struggling to understand the ".data[0]" but i believe its referring to the first fig in use. Maybe if you have multiple graphs.
import pandas as pd
import plotly.express as px
df = pd.read_csv("company_index.csv", sep=";", decimal=",")
print(df)
df_u9 = df.loc[df["company"].isin(["U9"])].copy()
df_u9["size"] = 1
fig = px.box(
df,
x="period",
y="index"
)
fig.add_trace(px.scatter(
df_u9,
x="period",
y="index",
size="size",
size_max=15,
color_discrete_sequence=(203,153,201)
).data[0])
fig.show()

plot data on Geopandas matplotlib

i want to plot x and y from a csv file in a geopandas graph but only the graph axis that shows up
import fiona
import matplotlib.pyplot as plt
from mpl_toolkits.axisartist.axislines import Subplot
import pandas as pd
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
gpd.io.file.fiona.drvsupport.supported_drivers["KML"] = "rw"
dfN = pd.read_csv ("nodes.txt",delimiter ="\\s+")
dfN.to_csv ("nodes.csv", index=None)
df = gpd.read_file("data.kml", driver="KML")
df=df.to_crs(epsg=32733)
gdf = gpd.GeoDataFrame(dfN ,geometry=gpd.points_from_xy(dfN.X, dfN.Y))
dg=df.translate(433050,299)
fig,ax = plt.subplots()
ax.set_aspect('equal')
ax.scatter(gdf.X, gdf.Y , zorder=1, alpha= 1, c='r', s=10)
dg.plot(ax=ax,zorder=0,color='white', edgecolor='black',aspect= 'equal')
plt.show()
this is not a MWE so have sourced data from publicly available and have applied same transformations...
plotting code can simplified, then it works. using plot() on geopandas which includes POINT objects will produce a scatter
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests, io
# data sourcing generated two geopandas data frames, let's replace to make MWE
df = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
df=df.to_crs(epsg=32733)
dg = df.loc[df["geometry"].is_valid *df["iso_a3"].eq("GBR")].translate(433050,299)
dfN = pd.read_csv(io.StringIO(requests.get("https://assets.nhs.uk/data/foi/Hospital.csv").text),
sep="Č",engine="python",).loc[:,["OrganisationName","Latitude","Longitude"]].rename(columns={"Latitude":"Y","Longitude":"X"})
gdf = gpd.GeoDataFrame(dfN ,geometry=gpd.points_from_xy(dfN.X, dfN.Y))
gdf = gdf.set_crs("EPSG:4326").to_crs(epsg=32733)
# plotting code is simplified as:
ax = dg.plot(zorder=0,color='white', edgecolor='black',aspect= 'equal')
gdf.plot(ax=ax, zorder=1, alpha= 1, c='r', markersize=10)
output
clearly within the defined CRS, plus one set of geometry has been transformed

Problem plotting geodataframe with Altair

I'm trying to create a map of the following GeoJSON: https://github.com/nychealth/coronavirus-data/blob/master/Geography-resources/UHF_resources/UHF42.geo.json
I load it with GeoPandas and can plot it fine with matplotlib:
But when I try to plot it with Altair I get a blue square:
I don't know why it's not working. I've tried plotting other GeoJSONs with Altair and they work fine. I have also checked the geodataframe's crs and it's WGS 84, which is the recommended one for Altair.
Here's my code:
import pandas as pd
import geopandas as gpd
gdf = gpd.read_file('https://raw.githubusercontent.com/nychealth/coronavirus-data/master/Geography-resources/UHF_resources/UHF42.geo.json')
print(gdf.crs)
# Matplotlib plot
gdf.plot()
# Altair plot
alt.Chart(gdf).mark_geoshape()
I'm new to working with maps in Altair, but here's a great answer: from a URL, you need to use alt.Data(url,format) to convert it to data.
Edit:
Since you want to use geopandas to make use of it, I used data from the same github to visualize the 7 days data, since the current geopandas doesn't have data to graph. and associated it with 'id'.
import pandas as pd
import geopandas as gpd
import altair as alt
gdf = gpd.read_file('https://raw.githubusercontent.com/nychealth/coronavirus-data/master/Geography-resources/UHF_resources/UHF42.geo.json')
#print(gdf.crs)
data_url = 'https://raw.githubusercontent.com/nychealth/coronavirus-data/master/latest/now-transmission-by-uhf42.csv'
df =pd.read_csv(data_url)
df.columns = ['id', 'neighborhood_name', 'case_rate_7day']
url_geojson = 'https://raw.githubusercontent.com/nychealth/coronavirus-data/master/Geography-resources/UHF_resources/UHF42.geo.json'
data_geojson_remote = alt.Data(url=url_geojson, format=alt.DataFormat(property='features',type='json'))
alt.Chart(data_geojson_remote).mark_geoshape().encode(
color="case_rate_7day:Q"
).transform_lookup(
lookup='id',
from_=alt.LookupData(df, 'id', ['case_rate_7day'])
).project(
type='identity', reflectY=True
)

Geopandas Coloring of concatenated shapefiles

I am trying to set a different color for map objects of a concatenated set of geodataframes (instead of a single color) using GEOPANDAS PYTHON.
I've tried conventional ways to set facecolor and cmap however it did not work for concatenated geodataframes.
I want to get different color shapes for gdf and boundaries (red and blue for example) instead of a single color which is what I'm currently getting.
here is the code:
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import matplotlib.pyplot as plt
import pandas
from shapely import wkt
#Converting an excel file into a geodataframe
Shape=pd.read_excel('C:/Users/user/OneDrive/documents/Excel .xlsx')
print(Shape)
Shape['geometry'] = Shape['geometry'].apply(wkt.loads)
gdf = gpd.GeoDataFrame(Shape, geometry='geometry')
gdf.plot()
#reading another geodataframe
Boundaries=gpd.read_file('C:/Users/user/Desktop/Boundaries/eez_v10.shp')
#concatenating Boundaries and gdfgeodataframes
map=pd.concat([gdf,Boundaries], sort=False)
ax=map.plot(figsize=(20,20))
plt.xlim([47,60])
plt.ylim([22,32])
plt.show()
You don't need to do concat, just plot both df to the same axis.
gdf = gpd.GeoDataFrame(Shape, geometry='geometry')
Boundaries=gpd.read_file('C:/Users/user/Desktop/Boundaries/eez_v10.shp')
ax = gdf.plot(color='blue')
Boundaries.plot(ax=ax, color='red')

Ploting data in geopandas

I am working on Kaggle Global Terrorism Database (https://www.kaggle.com/START-UMD/gtd/download) and I am trying to use geopandas for visualization.
I am also using countries dataset (http://www.naturalearthdata.com/downloads/110m-cultural-vectors/110m-admin-0-countries/)
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
sns.set(style = "ticks", context = "poster")
from shapely.geometry import Point
countries = gpd.read_file("C:/Users/petr7/Desktop/ne_110m_admin_0_countries/")
countries = countries[(countries['NAME'] != "Antarctica")]
countries.plot(figsize = (15, 15))
using code above I can easily plot entire Europe,
after that I import kaggle terrorist dataset and define it as geopandas dataframe
DF = pd.read_csv("C:/Users/petr7/Desktop/gtd/globalterrorismdb_0718dist.csv", encoding='latin1')
crs = {"init": "epsg:4326"}
geometry = [Point(xy) for xy in zip ( DF["longitude"], DF["latitude"])]
geo_DF = gpd.GeoDataFrame(DF, geometry = geometry)
geo_DF.head()
Until this point everything is working and dataset can be inspect
NOW when I try to plot it it return nonsense plot:
geo_DF.plot()
I am prety new to geopandas so I wanted to ask what I am missing and also how would you plot entire europe map (countries.plot) and above that terrorist attacks?
PICTURE HERE
There is an error in the data. DF["longitude"].min() gives -86185896.0.
DF.loc[DF["longitude"] == DF["longitude"].min()]
As you can see if you run the snippet above, row with the error is 17658.
It seems to be missing comma. If you do
DF.at[17658, 'longitude'] = -86.185896
before generating geometry, it will work. Or you can drop the row if you are not sure what is exactly wrong with the data.

Categories

Resources