How to plot a graph from csv in python - python

I have the following code and was wondering how to plot it as a graph in python
year,month,sales,expenditure
2018,jan,6226,3808
2018,feb,1521,3373
2018,mar,1842,3965
2018,apr,2051,1098
2018,may,1728,3046
2018,jun,2138,2258
2018,jul,7479,2084
2018,aug,4434,2799
2018,sep,3615,1649
2018,oct,5472,1116
2018,nov,7224,1431
2018,dec,1812,3532
this is my code so far
import matplotlib.pyplot as plt
import csv
x = []
y = []
with open('sales.csv','r') as sales_csv:
plots = csv.reader(sales_csv, delimiter=',')
for row in plots:
x.append(row[1])
y.append(row[3])
plt.plot(x,y, label='Loaded from file!')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline # jupyter notebook
# Load data
data = pd.read_csv('your_csv_file.csv')
# Plot
plt.figure(figsize=(6.8, 4.2))
x = range(len(data['month']))
plt.plot(x, data['sales'])
plt.xticks(x, data['month'])
plt.xlabel('Month')
plt.ylabel('Sales')
plt.show()
I hope this will help you.

df =pd.read_csv('filename.csv', sep=',')
months = {'jan':1,
'feb':2,
'mar':3,
'apr':4,
'may':5,
'jun':6,
'jul':7,
'aug':8,
'sep':9,
'oct':10,
'nov':11,
'dec':12
}
plt.plot(df['month'].replace(months), df['sales'], label='sales')
plt.plot(df['month'].replace(months), df['expenditure'], label='expenditure')
plt.gca().set_xticks(list(months.values()))
plt.gca().set_xticklabels(list(months.keys()))
plt.legend()

Related

plot data on Geopandas matplotlib

i want to plot x and y from a csv file in a geopandas graph but only the graph axis that shows up
import fiona
import matplotlib.pyplot as plt
from mpl_toolkits.axisartist.axislines import Subplot
import pandas as pd
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
gpd.io.file.fiona.drvsupport.supported_drivers["KML"] = "rw"
dfN = pd.read_csv ("nodes.txt",delimiter ="\\s+")
dfN.to_csv ("nodes.csv", index=None)
df = gpd.read_file("data.kml", driver="KML")
df=df.to_crs(epsg=32733)
gdf = gpd.GeoDataFrame(dfN ,geometry=gpd.points_from_xy(dfN.X, dfN.Y))
dg=df.translate(433050,299)
fig,ax = plt.subplots()
ax.set_aspect('equal')
ax.scatter(gdf.X, gdf.Y , zorder=1, alpha= 1, c='r', s=10)
dg.plot(ax=ax,zorder=0,color='white', edgecolor='black',aspect= 'equal')
plt.show()
this is not a MWE so have sourced data from publicly available and have applied same transformations...
plotting code can simplified, then it works. using plot() on geopandas which includes POINT objects will produce a scatter
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests, io
# data sourcing generated two geopandas data frames, let's replace to make MWE
df = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
df=df.to_crs(epsg=32733)
dg = df.loc[df["geometry"].is_valid *df["iso_a3"].eq("GBR")].translate(433050,299)
dfN = pd.read_csv(io.StringIO(requests.get("https://assets.nhs.uk/data/foi/Hospital.csv").text),
sep="Č",engine="python",).loc[:,["OrganisationName","Latitude","Longitude"]].rename(columns={"Latitude":"Y","Longitude":"X"})
gdf = gpd.GeoDataFrame(dfN ,geometry=gpd.points_from_xy(dfN.X, dfN.Y))
gdf = gdf.set_crs("EPSG:4326").to_crs(epsg=32733)
# plotting code is simplified as:
ax = dg.plot(zorder=0,color='white', edgecolor='black',aspect= 'equal')
gdf.plot(ax=ax, zorder=1, alpha= 1, c='r', markersize=10)
output
clearly within the defined CRS, plus one set of geometry has been transformed

how to create the outliers for dates in python

my purpose is to create an anomaly graph for a stock that have dates and close. I tried to create outliers, but I get the lines not in the place I want. For example, I want the line to be in the year of 2019 and after 2020 where there are drastic changes. The X line has dates and the problem I don't know how to write the outliers
I thought to write y["2019"]=40 for example but it doesn't do anything
from pandas import read_csv
from matplotlib import pyplot
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
#from IPython.core.debugger import set_trace
#import data
AAPL= pd.read_csv('AAPL.csv', header=0, squeeze=True)
x=AAPL['Date']
x=pd.to_datetime(x)
y=AAPL['Close/Last']
plt.figure(figsize=(15,7))
plt.plot(x, y, label="Close")
plt.title("AAPL")
plt.xlabel("Time")
plt.ylabel("Close")
plt.xticks(rotation=0)
plt.grid()
plt.show()
y[5] = 5
y[60] =55
y[85] = 1.4
n_outliers = 3
plt.figure(figsize=(15,7))
plt.plot(x,y)
plt.scatter(x,y)
plt.grid()
plt.ylabel('Y')
plt.xlabel('x')
plt.show()
Thank you in advance

Scatter plot for multiple classes

I have 4 arrays of clusters that I need to plot in a scatter plot. The documentation shows a simple example of X and Y plotting. I've tried some tutorials but most of them work with datasets or dataframes, so I was unable to properly figure out how to plot my data the right way. In short, I'm trying to plot these 4 arrays as clusters:
[ 4.33976958 19.73690959 9.05452373 1.29938447 1.25155903
18.07181231
1.28825463 14.31906422 1.58 4.04618339 4.27626005 1.28062485
1.00079968 12.40582121 5.31973684 3.59755473 6.18436739 4.96310387
4.21620683]
[1.31590273 3.75281228 2.5215868 1.99959996 1.06376689 2.35703203
1.02449988 1.64012195 2.755431 1.35661343 6.20786598 1.26
1.18389189 2.10864886 1.81118746 1.4 1.6857046 1.23693169
1.18810774]
[2.45348731 8.16029411 3.09767655 1.9078784 1.23951603 8.81716508
1.08885261 3.22546121 3.85585269 1.34164079 5.62138773 1.74688294
1.20016666 1.96203975 2.9662097 1.63963411 1.69339895 1.27687118
1.34699666]
[2.48386795 4.32485838 2.03381415 2.3 3.48137904 4.8340873
3.52278299 1.41421356 1.41265707 1.26743836 3.90384426 2.44532206
1.36367151 3.3346664 2.16 0.97897906 1.68534863 1.6503333
1.47837749]
My current code:
import matplotlib.pyplot as plt
std_colomns1 = [4.33976958, 19.73690959, 9.05452373, 1.29938447, 1.25155903, 18.07181231, 1.28825463, 14.31906422, 1.58, 4.04618339, 4.27626005, 1.28062485, 1.00079968, 12.40582121, 5.31973684, 3.59755473, 6.18436739, 4.96310387, 4.21620683]
std_colomns2 = [1.31590273, 3.75281228, 2.5215868, 1.99959996, 1.06376689, 2.35703203, 1.02449988, 1.64012195, 2.755431, 1.35661343, 6.20786598, 1.26, 1.18389189, 2.10864886, 1.81118746, 1.4, 1.6857046, 1.23693169, 1.18810774]
std_colomns3 = [2.45348731, 8.16029411, 3.09767655, 1.9078784, 1.23951603, 8.81716508, 1.08885261, 3.22546121, 3.85585269, 1.34164079, 5.62138773, 1.74688294, 1.20016666, 1.96203975, 2.9662097, 1.63963411, 1.69339895, 1.27687118, 1.34699666]
std_colomns4 = [2.48386795, 4.32485838, 2.03381415, 2.3, 3.48137904, 4.8340873, 3.52278299, 1.41421356, 1.41265707, 1.26743836, 3.90384426, 2.44532206, 1.36367151, 3.3346664, 2.16, 0.97897906, 1.68534863, 1.6503333, 1.47837749]
x = std_colomns1
y = std_colomns4
plt.scatter(x, y, label="Face clusters", color='k', s=10)
plt.xlabel('X')
plt.ylabel('y')
plt.title("Faces Features")
plt.legend()
plt.show()
I wish to plot those 4 arrays in a 2D space and distinguish them either by class (color) or centroids plotted in the center of each cluster.
import matplotlib.pyplot as plt
import numpy as np
# plot style
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.style.use('ggplot')
# create list of data lists
data = [std_colomns1, std_colomns2, std_colomns3, std_colomns4]
# plot data and print median
for i, d in enumerate(data, 1):
plt.plot(d, marker='.', linestyle='none', markersize=7, label=f'col_{i}')
print(f'Median col_{i}: {np.median(d)}')
# format plot
plt.xticks(range(0, 19, 1))
plt.yticks(range(1, 21, 1))
plt.ylabel('Values')
plt.xlabel('Index')
plt.legend()
plt.show()
Alternative:
I think a bar plot displays the data more clearly
I didn't add column names to the dataframe, but that can be done with the columns parameter.
column=['a', 'b', 'c', 'd'] as an example.
import pandas as pd
import matplotlib.pyplot as plt
# plot style
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.style.use('ggplot')
# create list of data lists
data = [std_colomns1, std_colomns2, std_colomns3, std_colomns4]
# create dataframe
df = pd.DataFrame(list(zip(*data)))
# print median
stats = df.agg(['median', 'mean'])
print(stats)
0 1 2 3
median 4.276260 1.640122 1.907878 2.160000
mean 6.222733 1.993142 2.875864 2.425034
# plot
df.plot.bar()
# format plot
plt.xticks(rotation=0)
plt.yticks(range(1, 21, 1))
plt.ylabel('Values')
plt.xlabel('Index')
plt.legend()
plt.show()
Check this code:
import matplotlib.pyplot as plt
import numpy as np
std_colomns1 = [4.33976958,19.73690959,9.05452373,1.29938447,1.25155903,18.07181231,1.28825463,14.31906422,1.58,4.04618339,4.27626005,1.28062485,1.00079968,12.40582121,5.31973684,3.59755473,6.18436739,4.96310387,4.21620683]
std_colomns2 = [1.31590273,3.75281228,2.5215868,1.99959996,1.06376689,2.35703203,1.02449988,1.64012195,2.755431,1.35661343,6.20786598,1.26,1.18389189,2.10864886,1.81118746,1.4,1.6857046,1.23693169,1.18810774]
std_colomns3 = [2.45348731,8.16029411,3.09767655,1.9078784,1.23951603,8.81716508,1.08885261,3.22546121,3.85585269,1.34164079,5.62138773,1.74688294,1.20016666,1.96203975,2.9662097,1.63963411,1.69339895,1.27687118,1.34699666]
std_colomns4 = [2.48386795,4.32485838,2.03381415,2.3,3.48137904,4.8340873,3.52278299,1.41421356,1.41265707,1.26743836,3.90384426,2.44532206,1.36367151,3.3346664,2.16,0.97897906,1.68534863,1.6503333,1.47837749]
x = std_colomns1
y = std_colomns4
center_colomn1 = np.median(np.array(std_colomns1))
center_colomn2 = np.median(np.array(std_colomns2))
center_colomn3 = np.median(np.array(std_colomns3))
center_colomn4 = np.median(np.array(std_colomns4))
plt.plot(std_colomns1, 'ko', label="Face 1")
plt.plot(std_colomns2, 'ro', label="Face 2")
plt.plot(std_colomns3, 'go', label="Face 3")
plt.plot(std_colomns4, 'bo', label="Face 4")
plt.xlabel('X')
plt.ylabel('Y')
plt.title("Faces Features")
plt.legend()
plt.show()
it will provide these centers:
4.27626005
1.64012195
1.9078784
2.16
and this scatter plot:
Here is another possibility, showing 4 boxplots:
import matplotlib.pyplot as plt
import numpy as np
std_colomns1 = [4.33976958,19.73690959,9.05452373,1.29938447,1.25155903,18.07181231,1.28825463,14.31906422,1.58,4.04618339,4.27626005,1.28062485,1.00079968,12.40582121,5.31973684,3.59755473,6.18436739,4.96310387,4.21620683]
std_colomns2 = [1.31590273,3.75281228,2.5215868,1.99959996,1.06376689,2.35703203,1.02449988,1.64012195,2.755431,1.35661343,6.20786598,1.26,1.18389189,2.10864886,1.81118746,1.4,1.6857046,1.23693169,1.18810774]
std_colomns3 = [2.45348731,8.16029411,3.09767655,1.9078784,1.23951603,8.81716508,1.08885261,3.22546121,3.85585269,1.34164079,5.62138773,1.74688294,1.20016666,1.96203975,2.9662097,1.63963411,1.69339895,1.27687118,1.34699666]
std_colomns4 = [2.48386795,4.32485838,2.03381415,2.3,3.48137904,4.8340873,3.52278299,1.41421356,1.41265707,1.26743836,3.90384426,2.44532206,1.36367151,3.3346664,2.16,0.97897906,1.68534863,1.6503333,1.47837749]
plt.boxplot([std_colomns1, std_colomns2, std_colomns3, std_colomns4], positions=range(4))
plt.xticks(ticks=range(4), labels=['std_colomns1', 'std_colomns2', 'std_colomns3', 'std_colomns4'])
plt.show()
Or, using seaborn (and pandas) you could draw a violin plot or a swarm plot:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.DataFrame({'std_colomns1': std_colomns1, 'std_colomns2': std_colomns2,
'std_colomns3': std_colomns3, 'std_colomns4': std_colomns4})
sns.violinplot(data=df)
plt.show()
At the left sns.violinplot(data=df), at the right sns.swarmplot(data=df):

Matplotlib inline in Jupyter - how to contol when the plot is shown?

I have a function that creates a figure and for some reason it is shown in Jupyter notebook twice, even though I didn't run show at all. I pass the fig and ax as an output of this function, and plan to show it only later.
I get confused between plt, fig and ax functionaries and guess that the answer is hidden somewhere there.
Here is an anonymised version of my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
def plot_curve(dummydata):
# builds a chart
fig,ax = plt.subplots(1) # get subplots
fig.set_figheight(7)
fig.set_figwidth(12) #set shape
plt.plot(dummydata.x1, dummydata.y1,label = 'l1') #curve 1
plt.plot(dummydata.x2, dummydata.y2,label = 'l2') #curve2
plt.xlabel('xlabel') #labels
plt.ylabel('xlabel')
plt.yscale('linear') #scale and bounds
plt.ylim(0,100)
ymin,ymax= ax.get_ylim()
ax.axhline(1, color='k', linestyle=':', label = 'lab1') #guideline - horizontal
ax.axvline(2, color='r',linestyle='--', label = 'lab2') #guideline - vertical
ax.axvline(3, color='g',linestyle='--', label = 'lab3') #guideline - vertical
ax.arrow(1,2,3,0, head_width=0.1, head_length=0.01, fc='k', ec='k') # arrow
rect = mpl.patches.Rectangle((1,2), 2,3, alpha = 0.1, facecolor='yellow',
linewidth=0 , label= 'lab4') #yellow area patch
ax.add_patch(rect)
plt.legend()
plt.title('title')
return fig,ax
and then call it with:
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
What should I change to not show the figure by default, and show it only by my command?
Thanks
Try disabling matplotlib interactive mode using plt.ioff(). With interactive mode disabled the plots will only be shown with an explicit plt.show().
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
# Desactivate interactive mode
plt.ioff()
def plot_curve(dummydata):
# the same code as before
Then in another cell
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
# I'am assuming this should not be in the for loop
# The plot will NOT be shown because we are not in interactive mode
fig, ax = plot_curve(dummydata) #get the chart
No plot will be shown yet.
Now in another cell
# Now ANY plot (figure) which was created and not shown yet will be finally shown
plt.show()
The plot is finally shown. Note that if you have created several plots all of them will be shown now.
Try this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib
With this importing you should not see the figure after plotting.
But you can see the figure by writing fig to IPython cell:
dummydata = pd.DataFrame({
'x1':np.arange(1,100,0.1),
'y1':np.arange(11,110,0.1),
'x2':np.arange(1,100,0.1),
'y2':np.arange(21,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
fig # Will now plot the figure.
Is this the desired output?

how to plot a pie chart?

I have data like:
Machine_id Cycling Idle
81091001 41000000000 19000000000
81091001 40000000000 19000000000
81091001 41000000000 19000000000
81091001 41000000000 20000000000
81091001 41000000000 19000000000
Code for plotting Pie chart :
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(palette='Paired')
df = pd.read_csv('sample1.csv')
df = df.set_index('Machine_id')
for ind in df.index:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(5,5)
df.iloc[ind].plot(kind='pie', ax=ax, autopct='%1.1f%%')
ax.set_ylabel('')
ax.set_xlabel('')
I am getting a error here like:
IndexError: single positional indexer is out-of-bounds
Then how a pie chart can be formed for Cycling v/s Idle in pandas each Machine_id wise ?
Here is your problem solved:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(palette='Paired')
df = pd.read_csv('sample1.csv')
#df = df.set_index('Machine_id') comment this
for ind in df.index:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(5,5)
df.iloc[ind].plot(kind='pie', ax=ax, autopct='%1.1f%%')
ax.set_ylabel('')
ax.set_xlabel('')
fig.show() #plot/show final results
another way, to consider individual chart with Cycling and Idle time per row. A Pie Chart for each line. (Maybe Pie Charts are not the best way to illustrate this but any way)
Ref. https://matplotlib.org/api/pyplot_api.html
import csv as csv
import matplotlib.pyplot as plt
colors = ['r', 'g']
with open('sample1.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
i = 0
for row in readCSV:
if i == 0:
activities = [row[1], row[2]]
title = row[0]
else:
slices = [row[1], row[2]]
plt.title("Machine ID: " + row[0]) #title is here UPDATED
plt.pie(slices, labels=activities, colors=colors, startangle=90, autopct='%.1f%%')
plt.show()
i += 1

Categories

Resources