i want to plot x and y from a csv file in a geopandas graph but only the graph axis that shows up
import fiona
import matplotlib.pyplot as plt
from mpl_toolkits.axisartist.axislines import Subplot
import pandas as pd
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
gpd.io.file.fiona.drvsupport.supported_drivers["KML"] = "rw"
dfN = pd.read_csv ("nodes.txt",delimiter ="\\s+")
dfN.to_csv ("nodes.csv", index=None)
df = gpd.read_file("data.kml", driver="KML")
df=df.to_crs(epsg=32733)
gdf = gpd.GeoDataFrame(dfN ,geometry=gpd.points_from_xy(dfN.X, dfN.Y))
dg=df.translate(433050,299)
fig,ax = plt.subplots()
ax.set_aspect('equal')
ax.scatter(gdf.X, gdf.Y , zorder=1, alpha= 1, c='r', s=10)
dg.plot(ax=ax,zorder=0,color='white', edgecolor='black',aspect= 'equal')
plt.show()
this is not a MWE so have sourced data from publicly available and have applied same transformations...
plotting code can simplified, then it works. using plot() on geopandas which includes POINT objects will produce a scatter
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests, io
# data sourcing generated two geopandas data frames, let's replace to make MWE
df = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
df=df.to_crs(epsg=32733)
dg = df.loc[df["geometry"].is_valid *df["iso_a3"].eq("GBR")].translate(433050,299)
dfN = pd.read_csv(io.StringIO(requests.get("https://assets.nhs.uk/data/foi/Hospital.csv").text),
sep="Č",engine="python",).loc[:,["OrganisationName","Latitude","Longitude"]].rename(columns={"Latitude":"Y","Longitude":"X"})
gdf = gpd.GeoDataFrame(dfN ,geometry=gpd.points_from_xy(dfN.X, dfN.Y))
gdf = gdf.set_crs("EPSG:4326").to_crs(epsg=32733)
# plotting code is simplified as:
ax = dg.plot(zorder=0,color='white', edgecolor='black',aspect= 'equal')
gdf.plot(ax=ax, zorder=1, alpha= 1, c='r', markersize=10)
output
clearly within the defined CRS, plus one set of geometry has been transformed
I have 4 arrays of clusters that I need to plot in a scatter plot. The documentation shows a simple example of X and Y plotting. I've tried some tutorials but most of them work with datasets or dataframes, so I was unable to properly figure out how to plot my data the right way. In short, I'm trying to plot these 4 arrays as clusters:
[ 4.33976958 19.73690959 9.05452373 1.29938447 1.25155903
18.07181231
1.28825463 14.31906422 1.58 4.04618339 4.27626005 1.28062485
1.00079968 12.40582121 5.31973684 3.59755473 6.18436739 4.96310387
4.21620683]
[1.31590273 3.75281228 2.5215868 1.99959996 1.06376689 2.35703203
1.02449988 1.64012195 2.755431 1.35661343 6.20786598 1.26
1.18389189 2.10864886 1.81118746 1.4 1.6857046 1.23693169
1.18810774]
[2.45348731 8.16029411 3.09767655 1.9078784 1.23951603 8.81716508
1.08885261 3.22546121 3.85585269 1.34164079 5.62138773 1.74688294
1.20016666 1.96203975 2.9662097 1.63963411 1.69339895 1.27687118
1.34699666]
[2.48386795 4.32485838 2.03381415 2.3 3.48137904 4.8340873
3.52278299 1.41421356 1.41265707 1.26743836 3.90384426 2.44532206
1.36367151 3.3346664 2.16 0.97897906 1.68534863 1.6503333
1.47837749]
My current code:
import matplotlib.pyplot as plt
std_colomns1 = [4.33976958, 19.73690959, 9.05452373, 1.29938447, 1.25155903, 18.07181231, 1.28825463, 14.31906422, 1.58, 4.04618339, 4.27626005, 1.28062485, 1.00079968, 12.40582121, 5.31973684, 3.59755473, 6.18436739, 4.96310387, 4.21620683]
std_colomns2 = [1.31590273, 3.75281228, 2.5215868, 1.99959996, 1.06376689, 2.35703203, 1.02449988, 1.64012195, 2.755431, 1.35661343, 6.20786598, 1.26, 1.18389189, 2.10864886, 1.81118746, 1.4, 1.6857046, 1.23693169, 1.18810774]
std_colomns3 = [2.45348731, 8.16029411, 3.09767655, 1.9078784, 1.23951603, 8.81716508, 1.08885261, 3.22546121, 3.85585269, 1.34164079, 5.62138773, 1.74688294, 1.20016666, 1.96203975, 2.9662097, 1.63963411, 1.69339895, 1.27687118, 1.34699666]
std_colomns4 = [2.48386795, 4.32485838, 2.03381415, 2.3, 3.48137904, 4.8340873, 3.52278299, 1.41421356, 1.41265707, 1.26743836, 3.90384426, 2.44532206, 1.36367151, 3.3346664, 2.16, 0.97897906, 1.68534863, 1.6503333, 1.47837749]
x = std_colomns1
y = std_colomns4
plt.scatter(x, y, label="Face clusters", color='k', s=10)
plt.xlabel('X')
plt.ylabel('y')
plt.title("Faces Features")
plt.legend()
plt.show()
I wish to plot those 4 arrays in a 2D space and distinguish them either by class (color) or centroids plotted in the center of each cluster.
import matplotlib.pyplot as plt
import numpy as np
# plot style
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.style.use('ggplot')
# create list of data lists
data = [std_colomns1, std_colomns2, std_colomns3, std_colomns4]
# plot data and print median
for i, d in enumerate(data, 1):
plt.plot(d, marker='.', linestyle='none', markersize=7, label=f'col_{i}')
print(f'Median col_{i}: {np.median(d)}')
# format plot
plt.xticks(range(0, 19, 1))
plt.yticks(range(1, 21, 1))
plt.ylabel('Values')
plt.xlabel('Index')
plt.legend()
plt.show()
Alternative:
I think a bar plot displays the data more clearly
I didn't add column names to the dataframe, but that can be done with the columns parameter.
column=['a', 'b', 'c', 'd'] as an example.
import pandas as pd
import matplotlib.pyplot as plt
# plot style
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.style.use('ggplot')
# create list of data lists
data = [std_colomns1, std_colomns2, std_colomns3, std_colomns4]
# create dataframe
df = pd.DataFrame(list(zip(*data)))
# print median
stats = df.agg(['median', 'mean'])
print(stats)
0 1 2 3
median 4.276260 1.640122 1.907878 2.160000
mean 6.222733 1.993142 2.875864 2.425034
# plot
df.plot.bar()
# format plot
plt.xticks(rotation=0)
plt.yticks(range(1, 21, 1))
plt.ylabel('Values')
plt.xlabel('Index')
plt.legend()
plt.show()
Check this code:
import matplotlib.pyplot as plt
import numpy as np
std_colomns1 = [4.33976958,19.73690959,9.05452373,1.29938447,1.25155903,18.07181231,1.28825463,14.31906422,1.58,4.04618339,4.27626005,1.28062485,1.00079968,12.40582121,5.31973684,3.59755473,6.18436739,4.96310387,4.21620683]
std_colomns2 = [1.31590273,3.75281228,2.5215868,1.99959996,1.06376689,2.35703203,1.02449988,1.64012195,2.755431,1.35661343,6.20786598,1.26,1.18389189,2.10864886,1.81118746,1.4,1.6857046,1.23693169,1.18810774]
std_colomns3 = [2.45348731,8.16029411,3.09767655,1.9078784,1.23951603,8.81716508,1.08885261,3.22546121,3.85585269,1.34164079,5.62138773,1.74688294,1.20016666,1.96203975,2.9662097,1.63963411,1.69339895,1.27687118,1.34699666]
std_colomns4 = [2.48386795,4.32485838,2.03381415,2.3,3.48137904,4.8340873,3.52278299,1.41421356,1.41265707,1.26743836,3.90384426,2.44532206,1.36367151,3.3346664,2.16,0.97897906,1.68534863,1.6503333,1.47837749]
x = std_colomns1
y = std_colomns4
center_colomn1 = np.median(np.array(std_colomns1))
center_colomn2 = np.median(np.array(std_colomns2))
center_colomn3 = np.median(np.array(std_colomns3))
center_colomn4 = np.median(np.array(std_colomns4))
plt.plot(std_colomns1, 'ko', label="Face 1")
plt.plot(std_colomns2, 'ro', label="Face 2")
plt.plot(std_colomns3, 'go', label="Face 3")
plt.plot(std_colomns4, 'bo', label="Face 4")
plt.xlabel('X')
plt.ylabel('Y')
plt.title("Faces Features")
plt.legend()
plt.show()
it will provide these centers:
4.27626005
1.64012195
1.9078784
2.16
and this scatter plot:
Here is another possibility, showing 4 boxplots:
import matplotlib.pyplot as plt
import numpy as np
std_colomns1 = [4.33976958,19.73690959,9.05452373,1.29938447,1.25155903,18.07181231,1.28825463,14.31906422,1.58,4.04618339,4.27626005,1.28062485,1.00079968,12.40582121,5.31973684,3.59755473,6.18436739,4.96310387,4.21620683]
std_colomns2 = [1.31590273,3.75281228,2.5215868,1.99959996,1.06376689,2.35703203,1.02449988,1.64012195,2.755431,1.35661343,6.20786598,1.26,1.18389189,2.10864886,1.81118746,1.4,1.6857046,1.23693169,1.18810774]
std_colomns3 = [2.45348731,8.16029411,3.09767655,1.9078784,1.23951603,8.81716508,1.08885261,3.22546121,3.85585269,1.34164079,5.62138773,1.74688294,1.20016666,1.96203975,2.9662097,1.63963411,1.69339895,1.27687118,1.34699666]
std_colomns4 = [2.48386795,4.32485838,2.03381415,2.3,3.48137904,4.8340873,3.52278299,1.41421356,1.41265707,1.26743836,3.90384426,2.44532206,1.36367151,3.3346664,2.16,0.97897906,1.68534863,1.6503333,1.47837749]
plt.boxplot([std_colomns1, std_colomns2, std_colomns3, std_colomns4], positions=range(4))
plt.xticks(ticks=range(4), labels=['std_colomns1', 'std_colomns2', 'std_colomns3', 'std_colomns4'])
plt.show()
Or, using seaborn (and pandas) you could draw a violin plot or a swarm plot:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.DataFrame({'std_colomns1': std_colomns1, 'std_colomns2': std_colomns2,
'std_colomns3': std_colomns3, 'std_colomns4': std_colomns4})
sns.violinplot(data=df)
plt.show()
At the left sns.violinplot(data=df), at the right sns.swarmplot(data=df):
I have a function that creates a figure and for some reason it is shown in Jupyter notebook twice, even though I didn't run show at all. I pass the fig and ax as an output of this function, and plan to show it only later.
I get confused between plt, fig and ax functionaries and guess that the answer is hidden somewhere there.
Here is an anonymised version of my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
def plot_curve(dummydata):
# builds a chart
fig,ax = plt.subplots(1) # get subplots
fig.set_figheight(7)
fig.set_figwidth(12) #set shape
plt.plot(dummydata.x1, dummydata.y1,label = 'l1') #curve 1
plt.plot(dummydata.x2, dummydata.y2,label = 'l2') #curve2
plt.xlabel('xlabel') #labels
plt.ylabel('xlabel')
plt.yscale('linear') #scale and bounds
plt.ylim(0,100)
ymin,ymax= ax.get_ylim()
ax.axhline(1, color='k', linestyle=':', label = 'lab1') #guideline - horizontal
ax.axvline(2, color='r',linestyle='--', label = 'lab2') #guideline - vertical
ax.axvline(3, color='g',linestyle='--', label = 'lab3') #guideline - vertical
ax.arrow(1,2,3,0, head_width=0.1, head_length=0.01, fc='k', ec='k') # arrow
rect = mpl.patches.Rectangle((1,2), 2,3, alpha = 0.1, facecolor='yellow',
linewidth=0 , label= 'lab4') #yellow area patch
ax.add_patch(rect)
plt.legend()
plt.title('title')
return fig,ax
and then call it with:
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
What should I change to not show the figure by default, and show it only by my command?
Thanks
Try disabling matplotlib interactive mode using plt.ioff(). With interactive mode disabled the plots will only be shown with an explicit plt.show().
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
# Desactivate interactive mode
plt.ioff()
def plot_curve(dummydata):
# the same code as before
Then in another cell
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
# I'am assuming this should not be in the for loop
# The plot will NOT be shown because we are not in interactive mode
fig, ax = plot_curve(dummydata) #get the chart
No plot will be shown yet.
Now in another cell
# Now ANY plot (figure) which was created and not shown yet will be finally shown
plt.show()
The plot is finally shown. Note that if you have created several plots all of them will be shown now.
Try this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib
With this importing you should not see the figure after plotting.
But you can see the figure by writing fig to IPython cell:
dummydata = pd.DataFrame({
'x1':np.arange(1,100,0.1),
'y1':np.arange(11,110,0.1),
'x2':np.arange(1,100,0.1),
'y2':np.arange(21,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
fig # Will now plot the figure.
Is this the desired output?