Error when trying to make a GeoDataFrame of network nodes - python

I need to make a GeoDataFrame of some nodes on a road network (which was extracted from OpenStreetMap using OSMnx). In the code below, graph_proj is the graph whose nodes I'm working with, the points are start_point and end_point:
import osmnx as ox
import geopandas as gpd
nodes_proj, edges_proj = ox.graph_to_gdfs(graph_proj, nodes=True, edges=True)
# Finding the nodes on the graph nearest to the points
start_node = ox.nearest_nodes(graph_proj, start_point.geometry.x, start_point.geometry.y, return_dist=False)
end_node = ox.nearest_nodes(graph_proj, end_point.geometry.x, end_point.geometry.y, return_dist=False)
start_closest = nodes_proj.loc[start_node]
end_closest = nodes_proj.loc[end_node]
# Create a GeoDataBase from the start and end nodes
od_nodes = gpd.GeoDataFrame([start_closest, end_closest], geometry='geometry', crs=nodes_proj.crs)
During the last step ("# Create a GeoDataBase...", etc.), an error is thrown. Apparently, it has something to do with a 3-dimensional array being passed to the GeoDataFrame function. Am I right that the way I pass in the locations([start_closest, end_closest]) results in a 3D array? (The error message reads, 'Must pass 2-d input. shape=(2, 1, 7)') I tried transposing this array, but then GeoPandas could not locate the 'geometry' column. How do I go about passing in this argument in a way that it will be accepted?

OK, so I was able to get around this by writing each node to its own GeoDataFrame and then merging the two GeoDataFrames, like this:
od_nodes1 = gpd.GeoDataFrame(start_closest, geometry='geometry', crs=nodes_proj.crs)
od_nodes2 = gpd.GeoDataFrame(end_closest, geometry='geometry', crs=nodes_proj.crs)
od_nodes = od_nodes1.append(od_nodes2)
Surely, though, there must be a more elegant way of writing more than one feature into a GeoDataFrame?

Related

Adding icon for node shape using networkx and pyvis (python)

I am new to netwrokx and pyvis and am making a small network to display the different shapes possible for each node. I managed to use all the shapes except for icons. I searched a lot but I couldn't find anything useful and the examples available did not work with my code I would appreciate it if anyone could help me figure this out.
here is my code:
import networkx as nx
import xlrd #used to access the external excel file
import pyvis
from pyvis.network import Network
import pandas as pd
import textwrap
df = pd.read_csv("Visualizer\Data\EECS2311\shapes.csv",encoding='cp1252')
G=nx.Graph()
nodes = []
p1 = df['person1']
p2 = df['person2']
p3 = df['person3']
p4 = df['person4']
p5 = df['person5']
p6 = df['person6']
p7 = df['person7']
p8 = df['person8']
p9 = df['person9']
p10 = df['person10']
p11 = df['person11']
p12 = df['person12']
p13 = df['person13']
p14 = df['person14']
data = zip(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14)
for e in data:
person1 = e[0]
G.add_node(person1, shape="ellipse")
person2 = e[1]
G.add_node(person2, shape="circle")
person3 = e[2]
G.add_node(person3, shape="database")
person4 = e[3]
G.add_node(person4, shape="box")
person5 = e[4]
G.add_node(person5, shape="text")
person6 = e[5]
G.add_node(person6, shape="image", image="https://image.shutterstock.com/image-vector/hello-funny-person-simple-cartoon-260nw-1311467669.jpg")
person7 = e[6]
G.add_node(person7, shape="circularImage", image="https://image.shutterstock.com/image-vector/hello-funny-person-simple-cartoon-260nw-1311467669.jpg")
person8 = e[7]
G.add_node(person8, shape="diamond")
person9 = e[8]
G.add_node(person9, shape="dot")
person10 = e[9]
G.add_node(person10, shape="star")
person11 = e[10]
G.add_node(person11, shape="triangle")
person12 = e[11]
G.add_node(person12, shape="triangleDown")
person13 = e[12]
G.add_node(person13, shape="square")
person14 = e[13]
G.add_node(person14, shape="icon", icon="https://image.shutterstock.com/image-vector/hello-funny-person-simple-cartoon-260nw-1311467669.jpg")
nodes.append((person1, person2))
nodes.append((person2, person3))
nodes.append((person3, person4))
nodes.append((person4, person5))
nodes.append((person5, person6))
nodes.append((person6, person7))
nodes.append((person7, person8))
nodes.append((person8, person9))
nodes.append((person9, person10))
nodes.append((person10, person11))
nodes.append((person11, person12))
nodes.append((person12, person13))
nodes.append((person13, person14))
options = {
"layout": {
"hierarchical": {
"enabled": True,
"levelSeparation": 300,
"nodeSpacing": 165,
"treeSpacing": 305,
"direction": "LR"
}
},
"physics": {
"hierarchicalRepulsion": {
"centralGravity": 0,
"nodeDistance": 110,
},
"minVelocity": 0.75,
"solver": "hierarchicalRepulsion"
}
}
G.add_edges_from(nodes)
G2 = Network(height="800px", width="100%", bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True, directed=True)
G2.from_nx(G)
G2.options = options
neighbor_map = G2.get_adj_list()
for node in G2.nodes:
node["value"] = len(neighbor_map[node["id"]])
#to wrap long labels:
id_string = node["label"]
width = 20
wrapped_strings = textwrap.wrap(id_string, width)
wrapped_id ="";
for line in wrapped_strings:
wrapped_id = textwrap.fill(id_string, width)
node["label"] = wrapped_id
#G2.show_buttons()
G2.show("shapes.html")
and here is my .csv file:
person1,person2,person3,person4,person5,person6,person7,person8,person9,person10,person11,person12,person13,person14
ellipse, circle, database,box,text,image, circularImage,diamond,dot,star,triangle,triangleDown,square,icon
"ellipse shape displays label inside the shape. To use this simply set shape =""ellipse""","circle shape displays label inside the shape. To use this simply set shape =""circle""","database shape displays label inside the shape. To use this simply set shape =""database""","box shape displays label inside the shape. To use this simply set shape =""box""","only displays text. To use this simply set shape =""text""","image displays a image with label outside. To use set shape=""image"", image=""url"". Note: requires link to image","circularImage displays a circular image with label outside. To use set shape="" circularImage"", image=""url"". Note: requires link to image","diamond shape displays label outside the shape. To use this simply set shape =""diamond""","dot shape displays label outside the shape. To use this simply set shape =""dot""","star shape displays label outside the shape. To use this simply set shape =""star""","triangle shape displays label outside the shape. To use this simply set shape =""triangle""","triangleDown shape displays label outside the shape. To use this simply set shape =""triangleDown""","square shape displays label outside the shape. To use this simply set shape =""square""","icon displays a circular image with label outside. To use set shape="" icon"", image=""url"". Note: requires link to image"
ps. forgive the heading for the csv file :)
This doesn't answer your question, I just want to help you shrink your code so you can debug it more easily.
Use the DataFrame directly
You're doing a ton of extra work to get at your data, assigning to temporary variables, then zipping them together. They are already together! To loop over the things in row 0 of the DataFrame try this:
for item in df.loc[0]:
print(item)
There's also a function in NetworkX, nx.from_pandas_dataframe(), that will create a network directly from a DataFrame... but you can only add edge attributes with that, not node attributes.
Then again...
Maybe dn't even use a DataFrame
Pandas is a convenient way to load CSVs, but your data isn't all that well-suited to this data structure. A dict would be better. It's a kind of mapping, in your case from node names to a node attribute.
Fortunately, there's a fairly easy way to get a dict from your DataFrame:
df.T.to_dict()[0]
This 'transposes' the DataFrame (turns the rows into columns) then turns the result into a dict. Then the [0] gives you the only column in the data.
This way you can avoid needing to repeat all your data (the mapping from person number to symbol) in your code.
Then again...
Maybe don't even use a dictionary
Any time you are mapping from a continuous set of numbers to some other objects (like person1, person2, etc) you might as well just use a list. Everything is indexed by position, which is basically what you have already. So you could just store your data like ['ellipse', 'circle', 'dot'] etc.
Then again...
Maybe don't even store the data
It turns out all these symbols are already defined in matplotlib. Have a look at:
from matplotlib.lines import Line2D
Line2D.markers
It's a dictionary of all the markers! If you want to try all of them, then you can just use these, no need to define anything.
Use zip to add your edges
zip is great for combining two or more lists, or combining a list with itself but with some offset. You can step over the nodes and make edges like so:
nodes = list(G.nodes)
for u, v in zip(nodes, nodes[1:]):
G.add_edge(u, v)
General advice
Try to avoid using tools like pandas just to load data. In my experience, it often introduces a bunch of complexity you don't need.
Get something small and simple working before making it more complex, e.g. with URLs of images.
You can store dictionaries easily as JSON text files. Check out the json module.
Again, sorry for not directly answering your question. But I feel like all this should help get your code down to something that is much easier to debug.

How to cut vertices and faces connected to points lower than some value in pyvista?

So when one exports r.out.vtk from Grass GIS we get a bad surface with -99999 points instead of nulls:
I want to remove them, yet a simple clip is not enough:
pd = pv.read('./pid1.vtk')
pd = pd.clip((0,1,1), invert=False).extract_surface()
p.add_mesh(pd ) #add atoms to scene
p.show()
resulting in:
So I wonder how to keep from it only top (> -999) points and connected vertices - in order to get only the top plane (it is curved\not flat actually) using pyvista?
link to example .vtk
There is an easy way to do this and there isn't...
You could use pyvista's threshold filter with all_scalars=True as long as you have only one set of scalars:
import pyvista as pv
pd = pv.read('./pid1.vtk')
pd = pd.threshold(-999, all_scalars=True)
plotter = pv.Plotter()
plotter.add_mesh(pd) #add atoms to scene
plotter.show()
Since all_scalars starts filtering based on every scalar array, this will only do what you'd expect if there are no other scalars. Furthermore, unfortunately there seems to be a bug in pyvista (expected to be fixed in version 0.32.0) which makes the use of this keyword impossible.
What you can do in the meantime (if you don't want to use pyvista's main branch before the fix is released) is to threshold the data yourself using numpy:
import pyvista as pv
pd = pv.read('./pid1.vtk')
scalars = pd.active_scalars
keep_inds = (scalars > -999).nonzero()[0]
pd = pd.extract_points(keep_inds, adjacent_cells=False)
plotter = pv.Plotter()
plotter.add_mesh(pd) #add atoms to scene
plotter.show()
The main point of both all_scalars (in threshold) and adjacent_cells (in extract_points) is to only keep cells where every point satisfies the condition.
With both of the above I get the following figure using your data:

Matching Geopandas Dissolve with ArcGIS Dissolve on set of Polylines

I am trying to replicate the output from ArcGIS Dissolve on a set of stream flow lines using geopandas. Essentially the df/stream_0 layer is a stream network extracted from a DEM using pysheds. That output has some randomly overlapping reaches which I am trying to remove. Running Dissolve through ArcGIS Pro does this well, but I would prefer not to have to deal with ArcGIS/ArcPy to resolve this.
Stream Network
ArcGIS Dissolve Setting
#streams_0.geojson = df.shp = streams_0.shp from Dissolve Setting image
#~~~~~~~~~~~~~~~~~~~~
import geopandas as gpd
df = gpd.read_file('streams_0.geojson')
df.head()
Out[3]:
geometry
0 LINESTRING (400017.781 3000019.250, 400017.781...
1 LINESTRING (400027.781 3000039.250, 400027.781...
2 LINESTRING (400027.781 3000039.250, 400037.781...
3 LINESTRING (400027.781 3000029.250, 400037.781...
4 LINESTRING (400047.781 3000079.250, 400047.781...
I have tried using gpd.dissolve() using a filler column with no luck.
df['dissolvefield'] = 1;
df2 = df.dissolve(by='dissolvefield')
df3 = gpd.geoseries.GeoSeries([geom for geom in df2.geometry.iloc[0].geoms])
Similarly tried to use unary_union in shapely with no luck.
import fiona
shape1 = fiona.open("df.shp")
first = shape1.next()
from shapely.geometry import shape
shp_geom = shape(first['geometry'])
from shapely.ops import unary_union
shape2 = unary_union(shp_geom)
Seems like an easy solution, wondering why I am running into so many issues. My GeoDataFrame only consists of the line geometry, so there is not necessarily another attribute I can aggregate based on. I am essentially just trying keep the geometry of the lines unchanged, but remove any overlapping features that may be there. I don't want to split the lines, and I don't want to aggregate them into multipart features.
i use the unary_union, but no need to read it as shapely feature.
after reading the file and put it in GPD (you can do it straight from the *.shp file):
df = gpd.read_file('streams_0.geojson')
try to plot it to see the if the output is correct
df.plot()
than use the unary_union like this, and plot again:
shape2 = df.unary_union
shape2
and the last step (if necessary), is to set as geopandas again:
# transform Geometry Collection to shapely multilinestirng
segments = [feature for feature in shape2]
# set back as geopandas
gdf = gpd.GeoDataFrame(list(range(len(segments))), geometry=segments,
crs=crs)
gdf .columns = ['index', 'geometry']

Add Points to Geopandas Object

My objective is to create some kind of geojson object and add several Point's objects to it, with a For Loop.
What am I missing here?
from geojson import Feature
import pandas as pd
import geopandas as gpd
# Point((-115.81, 37.24))
# Create a Dataframe with **Schools Centroids**
myManipulationObj = pd.DataFrame
for schoolNumber in listOfResults:
myManipulationObj.append(centroids[schoolNumber])
# GDF should be a Beautiful collection (geoDataFrame) of Points
gdf = gpd.GeoDataFrame(myManipulationObj, geometry='Coordinates')
After that, I want to use geopandas write() to create a .geojson file.
Any Help?
(solved)
I solved that problem by:
creating a python list (listOfPoints),
Using the POINT object as geometry parameter to the FEATURE object,
Using the List of Features (with Points) to create a FeatureCollection
Leave here for future reference if someone needs :D
# Used to get the Index of Schools from the M Model Optimized
listOfResults = []
for e in range(numSchools):
tempObj = m.getVarByName(str(e))
# If This School is on the Results Optimized
if(tempObj.x != 0):
listOfResults.append(int(tempObj.varName))
# Select, from the List Of Results, A set of Centroid Points
listOfPoints = []
for schoolNumber in listOfResults:
# Attention to the Feature(geometry) from geopandas
listOfPoints.append(Feature(geometry=centroids[schoolNumber]))
# Creating a FeatureCollection with the Features (Points) manipulated above
resultCentroids = FeatureCollection(listOfPoints)

raise ValueError when producing a shape file with geopandas

I have just recently started to work with shapefiles. I have a shapefile in which each object is a polygon. I want to produce a new shapefile in which the geometry of each polygon is replaced by its centroid. There is my code.
import geopandas as gp
from shapely.wkt import loads as load_wkt
fname = '../data_raw/bg501c_starazagora.shp'
outfile = 'try.shp'
shp = gp.GeoDataFrame.from_file(fname)
centroids = list()
index = list()
df = gp.GeoDataFrame()
for i,r in shp.iterrows():
index.append(i)
centroid = load_wkt(str(r['geometry'])).centroid.wkt
centroids.append(centroid)
df['geometry'] = centroids
df['INDEX'] = index
gp.GeoDataFrame.to_file(df,outfile)
When I run the script I end up with raise ValueError("Geometry column cannot contain mutiple " ValueError: Geometry column cannot contain mutiple geometry types when writing to file.
I cannot understand what is wrong. Any help?
The issue is that you're populating the geometry field with a string representation of the geometry rather than a shapely geometry object.
No need to convert to wkt. Your loop could instead be:
for i,r in shp.iterrows():
index.append(i)
centroid = r['geometry'].centroid
centroids.append(centroid)
However, there's no need to loop through the geodataframe at all. You could create a new one of shapefile centroids as follows:
df=gp.GeoDataFrame(data=shp, geometry=shp['geometry'].centroid)
df.to_file(outfile)

Categories

Resources