how to plot a pie chart? - python

I have data like:
Machine_id Cycling Idle
81091001 41000000000 19000000000
81091001 40000000000 19000000000
81091001 41000000000 19000000000
81091001 41000000000 20000000000
81091001 41000000000 19000000000
Code for plotting Pie chart :
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(palette='Paired')
df = pd.read_csv('sample1.csv')
df = df.set_index('Machine_id')
for ind in df.index:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(5,5)
df.iloc[ind].plot(kind='pie', ax=ax, autopct='%1.1f%%')
ax.set_ylabel('')
ax.set_xlabel('')
I am getting a error here like:
IndexError: single positional indexer is out-of-bounds
Then how a pie chart can be formed for Cycling v/s Idle in pandas each Machine_id wise ?

Here is your problem solved:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(palette='Paired')
df = pd.read_csv('sample1.csv')
#df = df.set_index('Machine_id') comment this
for ind in df.index:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(5,5)
df.iloc[ind].plot(kind='pie', ax=ax, autopct='%1.1f%%')
ax.set_ylabel('')
ax.set_xlabel('')
fig.show() #plot/show final results
another way, to consider individual chart with Cycling and Idle time per row. A Pie Chart for each line. (Maybe Pie Charts are not the best way to illustrate this but any way)
Ref. https://matplotlib.org/api/pyplot_api.html
import csv as csv
import matplotlib.pyplot as plt
colors = ['r', 'g']
with open('sample1.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
i = 0
for row in readCSV:
if i == 0:
activities = [row[1], row[2]]
title = row[0]
else:
slices = [row[1], row[2]]
plt.title("Machine ID: " + row[0]) #title is here UPDATED
plt.pie(slices, labels=activities, colors=colors, startangle=90, autopct='%.1f%%')
plt.show()
i += 1

Related

Bar plot for multidimensional columns using pandas

I want to plot my dataframe (df) as a bar plot based on the time columns, where each bar represents the value counts() for each letter that appears in the column.
Expected output
.
date,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00
2002-02-01,Y,Y,U,N,N
2002-02-02,U,N,N,N,N
2002-02-03,N,N,N,N,N
2002-02-04,N,N,N,N,N
2002-02-05,N,N,N,N,N
When I select individual time columns, I can do as below
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
df = df['04:00:00'].value_counts()
df.plot(kind='bar')
plt.show()
How can I plot all the columns on the same bar plot as shown on the expected output.
One possible solution is:
pd.DataFrame({t: df[t].value_counts() for t in df.columns if t != "date"}).T.plot.bar()
Here is an approach via seaborn's catplot:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from io import StringIO
df_str = '''date,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00
2002-02-01,Y,Y,U,N,N
2002-02-02,U,N,N,N,N
2002-02-03,N,N,N,N,N
2002-02-04,N,N,N,N,N
2002-02-05,N,N,N,N,N'''
df = pd.read_csv(StringIO(df_str))
df_long = df.set_index('date').melt(var_name='hour', value_name='kind')
g = sns.catplot(kind='count', data=df_long, x='kind', palette='mako',
col='hour', col_wrap=5, height=3, aspect=0.5)
for ax in g.axes.flat:
ax.set_xlabel(ax.get_title()) # use the title as xlabel
ax.grid(True, axis='y')
ax.set_title('')
if len(ax.get_ylabel()) == 0:
sns.despine(ax=ax, left=True) # remove left axis for interior subplots
ax.tick_params(axis='y', size=0)
plt.tight_layout()
plt.show()

How to plot a graph from csv in python

I have the following code and was wondering how to plot it as a graph in python
year,month,sales,expenditure
2018,jan,6226,3808
2018,feb,1521,3373
2018,mar,1842,3965
2018,apr,2051,1098
2018,may,1728,3046
2018,jun,2138,2258
2018,jul,7479,2084
2018,aug,4434,2799
2018,sep,3615,1649
2018,oct,5472,1116
2018,nov,7224,1431
2018,dec,1812,3532
this is my code so far
import matplotlib.pyplot as plt
import csv
x = []
y = []
with open('sales.csv','r') as sales_csv:
plots = csv.reader(sales_csv, delimiter=',')
for row in plots:
x.append(row[1])
y.append(row[3])
plt.plot(x,y, label='Loaded from file!')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline # jupyter notebook
# Load data
data = pd.read_csv('your_csv_file.csv')
# Plot
plt.figure(figsize=(6.8, 4.2))
x = range(len(data['month']))
plt.plot(x, data['sales'])
plt.xticks(x, data['month'])
plt.xlabel('Month')
plt.ylabel('Sales')
plt.show()
I hope this will help you.
df =pd.read_csv('filename.csv', sep=',')
months = {'jan':1,
'feb':2,
'mar':3,
'apr':4,
'may':5,
'jun':6,
'jul':7,
'aug':8,
'sep':9,
'oct':10,
'nov':11,
'dec':12
}
plt.plot(df['month'].replace(months), df['sales'], label='sales')
plt.plot(df['month'].replace(months), df['expenditure'], label='expenditure')
plt.gca().set_xticks(list(months.values()))
plt.gca().set_xticklabels(list(months.keys()))
plt.legend()

Matplotlib - Pie Chart from dataframe

I saw a sample from the internet to build a simple pie chart from Matplotlib but not sure how to embed it with my dataset (https://gist.github.com/datomnurdin/33961755b306bc67e4121052ae87cfbc).
from pandas import DataFrame
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
my_labels = 'Positive','Neutral','Negative'
my_colors = ['lightblue','lightsteelblue','silver']
plt.pie(df, labels=my_labels, autopct='%1.1f%%', startangle=15, shadow = True, colors=my_colors)
plt.title('Sentiment Overview')
plt.axis('equal')
plt.show()
P.S: The dataset didn't contain any labels, only values.
I would do something like this:
my_labels = {1:'Positive',0:'Neutral',-1:'Negative'}
my_colors = ['lightblue','lightsteelblue','silver']
# count the values to plot pie chart
s = df.sentiment.map(my_labels).value_counts()
plt.pie(s, labels=s.index, autopct='%1.1f%%', colors=my_colors)
# also
# s.plot.pie(autopct='%1.1f%%', colors=my_colors)
plt.show()
Output:

Seaborn - Display Last Value / Label

I would like create an plot with to display the last value on line. But i can not create the plot with the last value on chart. Do you have an idea for to resolve my problem, thanks you !
Input :
DataFrame
Plot
Output :
Cross = Last Value In columns
Output Final
# import eikon as ek
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import seaborn as sns; sns.set()
import pylab
from scipy import *
from pylab import *
fichier = "P:/GESTION_RPSE/GES - Gestion Epargne Salariale/Dvp Python/Florian/Absolute
Performance/PLOT.csv"
df = pd.read_csv(fichier)
df = df.drop(columns=['Unnamed: 0'])
# sns.set()
plt.figure(figsize=(16, 10))
df = df.melt('Date', var_name='Company', value_name='Value')
#palette = sns.color_palette("husl",12)
ax = sns.lineplot(x="Date", y="Value", hue='Company', data=df).set_title("LaLaLa")
plt.show()
Do you just want to put an 'X' at the end of your lines?
If so, you could pass markerevery=[-1] to the call to lineplot(). However there are a few caveats:
You have to use style= instead of hue= otherwise, there are no markers drawn
Filled markers work better than unfilled markers (like "x"). You can just use markers=True to use the default markers, or pass a list markers=['s','d','o',etc...]
code:
fmri = sns.load_dataset("fmri")
fig, ax = plt.subplots()
ax = sns.lineplot(x="timepoint", y="signal",
style="event", data=fmri, ci=None, markers=True, markevery=[-1], markersize=10)

one of lines in matplotlib is not visualized

from datetime import datetime
import datetime as dt
import csv
import matplotlib.dates as mdates
import numpy as np
import matplotlib.pyplot as plt
ldays = list()
aesvalues = list()
tezvalues = list()
vdevalues = list()
with open('data/dg_year_50.csv', newline='') as csvfile:
has_header = csv.Sniffer().has_header(csvfile.read(1024))
csvfile.seek(0) # Rewind.
datareader = csv.reader(csvfile, delimiter=',', quotechar='|')
if has_header:
next(datareader) # Skip header row.
for row in datareader:
date_cell = row[0]
aes_cell = row[1]
tez_cell = row[2]
vde_cell = row[3]
datetime_object = datetime.strptime(date_cell, '%d.%m.%Y %H:%M:%S')
aesvalues.append(aes_cell)
tezvalues.append(tez_cell)
vdevalues.append(tez_cell)
ldays.append(datetime_object)
days = mdates.date2num(np.array(ldays))
s_aesvalues = sorted(list(map(int, aesvalues)))
s_tezvalues = sorted(list(map(int, tezvalues)))
s_vdevalues = sorted(list(map(int, vdevalues)))
# These are the colors that will be used in the plot
color_sequence = ['#ffbb78', '#e377c2', '#1f77b4', '#ff7f0e', '#2ca02c',
'#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
'#8c564b', '#c49c94', '#aec7e8', '#e377c2', '#f7b6d2',
'#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']
# make a plot
fig, ax = plt.subplots(figsize=(14, 12))
# plot the data
majors = [s_tezvalues, s_vdevalues, s_aesvalues]
for rank, column in enumerate(majors):
line = plt.plot_date(x=days, y=majors[rank], lw=2.5, color=color_sequence[rank])
fig.suptitle('Дані по енергетиці', fontsize=18, ha='center')
plt.xlabel('Дата', fontsize=14, ha='center')
plt.ylabel('Кількість енергії (Вт)', fontsize=14, ha='center')
plt.show()
I have 'data/dg_year_50.csv' with 4 columns (Дата,АЕС,ТЕЦ,ВДЕ).
First one stands for "Date" and rest 3 for the appropriate values that needs to be visualized as a lines in plot.
I do not understand why I'm getting visualized 2 lines instead of 3.
The result visualization looks like this:
I found the mistake, it was in the following string:
vdevalues.append(tez_cell)
It should be changed like this:
vdevalues.append(vde_cell)
And yes, definitely, with pandas solution is much easier:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv('data/dg_year_50.csv', index_col=0, parse_dates=True).apply(np.int64)
data.plot()
plt.show()
Attaching the result figure:

Categories

Resources