Plot Pandas DataFrame and plot side by side - python

There are options to have plots side by side, likewise for pandas dataframes. Is there a way to plot a pandas dataframe and a plot side by side?
This is the code I have so far, but the dataframe is distorted.
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import table
# sample data
d = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'jan': [4, 24, 31, 2, 3],
'feb': [25, 94, 57, 62, 70],
'march': [5, 43, 23, 23, 51]}
df = pd.DataFrame(d)
df['total'] = df.iloc[:, 1:].sum(axis=1)
plt.figure(figsize=(16,8))
# plot table
ax1 = plt.subplot(121)
plt.axis('off')
tbl = table(ax1, df, loc='center')
tbl.auto_set_font_size(False)
tbl.set_fontsize(14)
# pie chart
ax2 = plt.subplot(122, aspect='equal')
df.plot(kind='pie', y = 'total', ax=ax2, autopct='%1.1f%%',
startangle=90, shadow=False, labels=df['name'], legend = False, fontsize=14)
plt.show()

It's pretty simple to do with plotly and make_subplots()
define a figure with appropriate specs argument
add_trace() which is tabular data from your data frame
add_trace() which is pie chart from your data frame
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# sample data
d = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'jan': [4, 24, 31, 2, 3],
'feb': [25, 94, 57, 62, 70],
'march': [5, 43, 23, 23, 51]}
df = pd.DataFrame(d)
df['total'] = df.iloc[:, 1:].sum(axis=1)
fig = make_subplots(rows=1, cols=2, specs=[[{"type":"table"},{"type":"pie"}]])
fig = fig.add_trace(go.Table(cells={"values":df.T.values}, header={"values":df.columns}), row=1,col=1)
fig.add_trace(px.pie(df, names="name", values="total").data[0], row=1, col=2)

Related

How to change scatter XY plot color dependent from string Z value?

I would like that make scatter plot with different dots colors, dependent from column 'Value'. For example all "rand" dots would be blue and "qmax" red. Here is my code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Create a DataFrame
df = pd.DataFrame({
'State': [1000, 1002, 1001, 1003, 1000, 1003, 1001],
'Score': [62, 47, 55, 74, 31, 50, 60],
'Value': ['rand','qmax','rand','qmax','rand','qmax','qmax']
}, columns=['State', 'Score', 'Value'])
# Create figure with plot
fig, ax1 = plt.subplots()
ax1.scatter(df['State'], df['Score'])
plt.show()
My dataframe:
State Score Value
0 1000 62 rand
1 1002 47 qmax
2 1001 55 rand
3 1003 74 qmax
4 1000 31 rand
5 1003 50 qmax
6 1001 60 qmax
Scatter plot:
You could iterate the values in the 'Value' column and, in each iteration, filter the dataframe and plot the filtered data:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Create a DataFrame
df = pd.DataFrame({
'State': [1000, 1002, 1001, 1003, 1000, 1003, 1001],
'Score': [62, 47, 55, 74, 31, 50, 60],
'Value': ['rand','qmax','rand','qmax','rand','qmax','qmax']
}, columns=['State', 'Score', 'Value'])
print(df)
# Create figure with plot
fig, ax1 = plt.subplots()
for value in df['Value'].unique():
ax1.scatter(df[df['Value'] == value]['State'], df[df['Value'] == value]['Score'])
plt.show()
If you want to choose the color for each individual value in 'Value', you can define a dictionary as this one:
colors = {'rand': 'blue', 'qmax': 'red'}
And specify the color parameter in the scatter call:
for value in df['Value'].unique():
ax1.scatter(df[df['Value'] == value]['State'], df[df['Value'] == value]['Score'], color = colors[value])
You could also show a legend:
for value in df['Value'].unique():
ax1.scatter(df[df['Value'] == value]['State'], df[df['Value'] == value]['Score'], color = colors[value], label = value)

Bar graph df.plot() vs ax.bar() structure matplotlib

I am trying to graph a table as a bar graph.
I get my desired outcome using df.plot(kind='bar') structure. But for certain reasons, I now need to graph it using the ax.bar() structure.
Please refer to the example screenshot. I would like to graph the x axis as categorical labels like the df.plot(kind='bar') structure rather than continuous scale, but need to learn to use ax.bar() structure to do the same.
Make the index categorical by setting the type to 'str'
import pandas as pd
import matplotlib.pyplot as plt
data = {'SA': [11, 12, 13, 16, 17, 159, 209, 216],
'ET': [36, 45, 11, 15, 16, 4, 11, 10],
'UT': [11, 26, 10, 11, 16, 7, 2, 2],
'CT': [5, 0.3, 9, 5, 0.2, 0.2, 3, 4]}
df = pd.DataFrame(data)
df['SA'] = df['SA'].astype('str')
df.set_index('SA', inplace=True)
width = 3
fig, ax = plt.subplots(figsize=(12, 8))
p1 = ax.bar(df.index, df.ET, color='b', label='ET')
p2 = ax.bar(df.index, df.UT, bottom=df.ET, color='g', label='UT')
p3 = ax.bar(df.index, df.CT, bottom=df.ET+df.UT, color='r', label='CT')
plt.legend()
plt.show()

Why does setting hue in seaborn plot change the size of a point?

The plot I am trying to make needs to achieve 3 things.
If a quiz is taken on the same day with the same score, that point needs to be bigger.
If two quiz scores overlap there needs to be some jitter so we can see all points.
Each quiz needs to have its own color
Here is how I am going about it.
import seaborn as sns
import pandas as pd
data = {'Quiz': [1, 1, 2, 1, 2, 1],
'Score': [7.5, 5.0, 10, 10, 10, 10],
'Day': [2, 5, 5, 5, 11, 11],
'Size': [115, 115, 115, 115, 115, 355]}
df = pd.DataFrame.from_dict(data)
sns.lmplot(x = 'Day', y='Score', data = df, fit_reg=False, x_jitter = True, scatter_kws={'s': df.Size})
plt.show()
Setting the hue, which almost does everything I need, results in this.
import seaborn as sns
import pandas as pd
data = {'Quiz': [1, 1, 2, 1, 2, 1],
'Score': [7.5, 5.0, 10, 10, 10, 10],
'Day': [2, 5, 5, 5, 11, 11],
'Size': [115, 115, 115, 115, 115, 355]}
df = pd.DataFrame.from_dict(data)
sns.lmplot(x = 'Day', y='Score', data = df, fit_reg=False, hue = 'Quiz', x_jitter = True, scatter_kws={'s': df.Size})
plt.show()
Is there a way I can have hue while keeping the size of my points?
It doesn't work because when you are using hue, seaborn does two separate scatterplots and therefore the size argument you are passing using scatter_kws= no longer aligns with the content of the dataframe.
You can recreate the same effect by hand however:
x_col = 'Day'
y_col = 'Score'
hue_col = 'Quiz'
size_col = 'Size'
jitter=0.2
fig, ax = plt.subplots()
for q,temp in df.groupby(hue_col):
n = len(temp[x_col])
x = temp[x_col]+np.random.normal(scale=0.2, size=(n,))
ax.scatter(x,temp[y_col],s=temp[size_col], label=q)
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
ax.legend(title=hue_col)

How to construct a side-by-side boxplot for a pandas dataframe

Men and women are in a column labeled 'sex'. I want to plot them according to their happiness levels.
So, one figure two columns and one row.
I have tried to extract each gender:
men = df[df['sex'] == 'Men']
women = df[df['sex'] == 'Women']
df_happy_sex = df[['happy', 'sex']].copy()
![https://ibb.co/yqmWKkf]
Boxplots in python
Boxplots require a numeric component, as they are a visualization of statical data, specifically spread.
Use seaborn to make your plots look nicer
Code:
import pandas as pd
import matplotlib.pyplot as plt # doesn't have color by hue
import seaborn as sns
import numpy as np # for generating random data
import random # for random gender selection
np.random.seed(10)
random.seed(10)
df = pd.DataFrame({'age': [x for x in np.random.randint(20, 70, 100)],
'feeling': [random.choice(['happy', 'sad']) for _ in range(100)],
'gender': [random.choice(['male', 'female']) for _ in range(100)]})
# display(df)
age feeling gender
0 29 sad female
1 56 sad male
2 35 sad female
3 20 sad female
4 69 happy female
sns.boxplot(y='age', x='feeling', data=df, hue='gender')
plt.show()
Using groupby with only categorical data:
df = pd.DataFrame({'feeling': [random.choice(['happy', 'sad|']) for _ in range(100)],
'gender': [random.choice(['male', 'female']) for _ in range(100)]})
df.groupby(['feeling','gender'])['gender'].count().plot(kind='bar')
Alternate data - feeling as a numeric value:
df = pd.DataFrame({'feeling': [x for x in np.random.randint(0, 101, 100)],
'gender': [random.choice(['male', 'female']) for _ in range(100)]})
plt.figure(figsize=(8, 7))
sns.boxplot(y='feeling', x='gender', data=df)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
I've created a imaginary sample of your data-frame.
data = [['men', 55], ['men', 77], ['women', 85],
['men', 70], ['women', 68], ['women', 64],
['men', 86], ['men', 64], ['women', 54],
['men', 43], ['women', 86], ['women', 91]]
df = pd.DataFrame(data, columns = ['sex', 'happy'])
You can just:
df.boxplot(by=['sex'], sym ='', figsize = [6, 6])
It yields:
I guess that is what you want.

Plot graph for every dataset entry

So this is my dataset:
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
'female': [0, 1, 1, 0, 1],
'age': [42, 52, 36, 24, 73],
'preTestScore': [4, 24, 31, 2, 3],
'postTestScore': [25, 94, 57, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore'])
I'm new to plotting data and a bit lost here. I want to plot a line for each person, where the x-ticks are preTestScore and postTestScore and the y-ticks go from 0 to 100 (the possible range of test scores).
I was thinking that I could just make a scatter plot but then I wouldn't know how to connect the dots.
Graph
A slopegrahp was what I was looking for. Thanks #mostlyoxygen
x = df.loc[:, "preTestScore":"postTestScore"]
x["full_name"] = df["first_name"] + " " + df["last_name"]
num_students = x.shape[0]
num_times = x.shape[1] - 1
plt.xlim(0, 1)
plt.ylim(0, 100)
plt.xticks(np.arange(2), ["perTestScore", "postTestScore"])
plt.title("Score changes after Test taking")
plt.ylabel("Testscore")
for row in x.values:
plt.plot([0, 1], [row[0], row[1]], label=row[2])
plt.legend(loc="best")
plt.show()

Categories

Resources