python remove edges in stacked area chart - python

I am trying to remove the "edge color" from a stacked area chart in python, as the plots tend to get "overlaid" by the "last" column:
import pandas as pd, numpy as np
df = pd.DataFrame(np.random.randint(0, 10000, size=(10000, 4)), columns=list('ABCD'))
import matplotlib.pyplot as plt, seaborn as sns
def myplot(df):
fig, ax = plt.subplots(figsize=(4, 4))
df.plot.area(ax=ax)
myplot(df=df)
The problem is that it looks as if the red variable is the "largest one". But that is a plotting artifact because its borders from one x value t othe next overlap that of other variables. To see what I mean, run this (same data with only 100 observations)
import pandas as pd, numpy as np
df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD'))
import matplotlib.pyplot as plt, seaborn as sns
def myplot(df):
fig, ax = plt.subplots(figsize=(4, 4))
df.plot.area(ax=ax)
myplot(df=df)

Related

Circular contour map in python

I have a 120mm diameter circular disk, where I measure temperature at 20 different locations. These measurement locations are at random places. I am looking for a way to plot it as in attached desired plot link. When I used tricontour, It just plots the random points. I am unable to find a way to fill the circle as in below attached pic. Is there any other way to plot this? Spent lot of time searching for it with no success.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = {"x": [110,50,-85,20,45,0,-80,-30,-105,80], "y":
[0,100,75,-90,20,115,-85,-20,-45,-90],"z":[10,2,6,4,9,12,2,6,4,12]}
x = data['x']
y = data['y']
z = data['z']
f, ax = plt.subplots(1)
plot = ax.tricontourf(x,y,z, 20)
ax.plot(x,y, 'ko ')
circ1 = Circle((0, 0), 120, facecolor='None', edgecolor='r', lw=5)
ax.add_patch(circ1)
f.colorbar(plot)
Example data :
Desired plot:
What I got from tricontour:
There is much data to do a really nice coontour plot, but here is a solution with your data and an example with a substantially larger dataset:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.tri as tri
data = {"x": [110,50,-85,20,45,0,-80,-30,-105,80], "y":
[0,100,75,-90,20,115,-85,-20,-45,-90],"z":[10,2,6,4,9,12,2,6,4,12]}
df = pd.DataFrame(data)
fig = plt.figure()
ax = fig.add_subplot(projection='polar')
ax.set_title("tricontour")
ax.tricontourf(df["x"], df["y"], df["z"],20)
plt.show()
which gives
and for a larger dataframe:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
df= pd.DataFrame(np.random.randint(0,1000,size=(1000, 3)), columns=list('XYZ'))
fig = plt.figure()
ax = fig.add_subplot(projection='polar')
ax.set_title("tricontour")
ax.tricontourf(df["X"], df["Y"], df["Z"],20)
plt.show()
which returns

Add density curve on the histogram

I am able to make histogram in python but I am unable to add density curve , I see many code which are using different ways to add density curve on histogram but I am not sure how to get on my code
I have added density = true but not able to get density curve on histogram
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X=df['A']
hist, bins = np.histogram(X, bins=10,density=True)
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width)
plt.show()
Here is an approach using distplot method of seaborn. Also, mentioned in the comments:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X = df['A']
sns.distplot(X, kde=True, bins=20, hist=True)
plt.show()
However, distplot will be removed in a future version of seaborn. Therefore, alternatives are to use histplot and displot.
sns.histplot
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X = df['A']
sns.histplot(X, kde=True, bins=20)
plt.show()
sns.displot
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))
X = df['A']
sns.displot(X, kde=True, bins=20)
plt.show()
Pandas also has kde plot:
hist, bins = np.histogram(X, bins=10,density=True)
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width, zorder=1)
# density plot
df['A'].plot.kde(zorder=2, color='C1')
plt.show()
Output:

Showing multiple Line Legends in Matplotlib

I am trying to display all 4 legends of my line graph, with the Column headers serving as the respective Legend names.
Is there an elegant way of executing this without having to write individual lines of code to plot and label each column?
Examples of my current data set are as follows:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
x = pd.Series(np.array([1,2,3,4,5,6,7,8,9,10]))
y = pd.DataFrame(np.random.rand(10,4))
y.columns = ["A","B","C","D"]
fig, ax = plt.subplots(figsize=(10, 7))
ax.plot(x, y, label=True)
Indeed you can use the plot function defined in pandas:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
x = pd.Series(np.array([1,2,3,4,5,6,7,8,9,10]))
y = pd.DataFrame(np.random.rand(10,4))
y.columns = ["A","B","C","D"]
y['x'] = x
fig, ax = plt.subplots(figsize=(10, 7))
y.plot(ax=ax)

Python pandas plot linechart with data points

I would like to plot a linechart based on column A. Based on Column sig I would like to add some markers to the chart A:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.DataFrame(np.random.randn(120), columns=list('A'))
data['sig'] = np.NaN
data['sig'] = np.where((data['A'] > 1), data['A'], data['sig'] )
data.plot(grid=True)
plt.show()
I tried to add markevery=data['sig'] to the plot() statement, but it gave me several errors. Any hints?
Why not plot directly in matplotlib?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.DataFrame(np.random.randn(120), columns=list('A'))
data['sig'] = np.NaN
data['sig'] = np.where((data['A'] > 1), data['A'], data['sig'] )
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(data["A"])
ax.scatter(data.index, data["sig"])

pandas histogram: plot histogram for each column as subplot of a big figure

I am using the following code, trying to plot the histogram of every column of a my pandas data frame df_in as subplot of a big figure.
%matplotlib notebook
from itertools import combinations
import matplotlib.pyplot as plt
fig, axes = plt.subplots(len(df_in.columns) // 3, 3, figsize=(12, 48))
for x in df_in.columns:
df_in.hist(column = x, bins = 100)
fig.tight_layout()
However, the histogram didn't show in the subplot. Any one knows what I missed? Thanks!
I can't comment burhan's answer because I don't have enough reputation points. The problem with his answer is that axes isn't one-dimensional, it contains axes triads, so it needs to be unrolled:
%matplotlib notebook
from itertools import combinations
import matplotlib.pyplot as plt
fig, axes = plt.subplots(len(df_in.columns)//3, 3, figsize=(12, 48))
i = 0
for triaxis in axes:
for axis in triaxis:
df_in.hist(column = df_in.columns[i], bins = 100, ax=axis)
i = i+1
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
fig, axis = plt.subplots(2,3,figsize=(8, 8))
df_in.hist(ax=axis)
The above will plot a 2*3 (total 6 histogram for your dataframe). Adjust the rows and columns as per your arrangement requirements(# of columns)
My TA #Benjamin once told me , dataframe means do not have to use for loop.
You need to specify which axis you are plotting to. This should work:
fig, axes = plt.subplots(len(df_in.columns)//3, 3, figsize=(12, 48))
for col, axis in zip(df_in.columns, axes):
df_in.hist(column = col, bins = 100, ax=axis)

Categories

Resources