Waterfall chart python matplotlib - python

I am having a problem with waterfall. I took this chart from matplotlib site and added my own data frame with 2 simple columns with some integer numbers. My waterfall was produced but without numbers, just empty bars. I am a bit lost and I would appreciate any suggestions.
What I am trying to build is the custom waterfall that takes one dataframe with column names, values, and some values for filters like countries. I haven't found anything like that anywhere so I am trying to build my own.
import numpy as np;
import pandas as pd;
import matplotlib.pyplot as plt;
from matplotlib.ticker import FuncFormatter;
dataset = pd.read_csv('waterfall_test_data.csv')
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = dataset['columns']
data = dataset['amount']
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(15, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')

Related

How to specifically assign X and Y Axis on matplotlib in Python?

I'm trying to create a Monte Carlo simulation to simulate the price of a stock.
Every day, the price of the stock changes. The change is determined by a random variable. The stock prices over the number of days (numDays) is captured in a list, stock_price_list.
I've created an array, monte_list, to store a bunch of different stock_price_lists. I want to graph all those stock_price_lists on the same graph. So I've created the variable numSimulations, which is supposed to create numSimulations number of rows in monte_list.
As far as I can tell, monte_list works. It's an array with one column and numSimulations numbers of rows. These rows are populated with stock_price_lists, which are themselves lists of stock price data.
stock_price_list works; I've graphed it multiple times.
I think that monte_list works too; at least, when I print the array, it returns information that looks correct.
My problem is that the axes are graphing the wrong variables.
The X axis is graphing numSimulations.
The Y axis is graphing stock price.
I WANT the X axis to graph numDays, NOT numSimulations, but I can't figure out how to change that.
I'd really love any advice. (Note that I hope to make numDays and numSimulations much bigger, but wanted to use smaller numbers to get the hang of things.)
daily_mean = .06/250
daily_stdev = .2/(250**.5)
start_stock_price = 100
numDays = 7
numSimulations = 5
monte_arr = pd.DataFrame({'FirstCol': numSimulations}, index=[0])
monte_list = [None] * numSimulations #this is a test: I'm trying to createa list of numPrices Nones,\
#then fill them all with stock_price_lists in the for loop
for j in range(0, numSimulations):
stock_price_list = [start_stock_price]
daily_stock_price = start_stock_price
#add a col of stock price data
for i in range (0,numDays):
daily_ret = np.random.normal(daily_mean, daily_stdev, 1) # generates a random return
daily_stock_price = daily_stock_price * (1+daily_ret)
stock_price_list.append(float(daily_stock_price))
np.array(stock_price_list)
#arr = np.array(stock_price_list)
#arr[j] = stock_price_list
monte_list[j] = stock_price_list # somehow stock_price_list is over-writing cols
#I think monte_list generates numSimulations of stock_price_list entries.
#Problem: the axes are wrong. X axis should have numDays on it. Y should have prices
# y axis is currently graphing highest stock price, but I want X to be graphing highest stock price
# I want X axis to be numDays
plt.figure(figsize = (14,5))
plt.plot(monte_list)
plt.title("monte list")
plt.show()
Blockquote
So, it actually turns out that I figured out how to code this with some help from a friend.
I created a for loop to plot various elements of monte_list.
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
from scipy.stats import norm
import matplotlib.pyplot as plt
import statsmodels as sm
import math
daily_mean = .06/250
daily_stdev = .2/(250**.5)
start_stock_price = 100
#stock_price_list = [start_stock_price]
#daily_stock_price = start_stock_price
numDays = 250
numSimulations = 100
monte_arr = pd.DataFrame({'FirstCol': numSimulations}, index=[0])
monte_list = [None] * numSimulations #this is a test: I'm trying to createa list of numPrices Nones,\
#then fill them all with stock_price_lists in the for loop
for j in range(0, numSimulations):
stock_price_list = [start_stock_price]
daily_stock_price = start_stock_price
#add a col of stock price data
for i in range (0,numDays):
daily_ret = np.random.normal(daily_mean, daily_stdev, 1) # generates a random return
daily_stock_price = daily_stock_price * (1+daily_ret)
stock_price_list.append(float(daily_stock_price))
np.array(stock_price_list)
monte_list[j] = stock_price_list
plt.figure(figsize = (14,5))
plt.title("Monte List")
plt.xlabel("Number of Days")
plt.ylabel("Stock price")
plt.legend()
for i in range(0, numDays):
plt.plot(monte_list[i])
plt.show()

How to split data into two graphs with mat plot lib

I would be so thankful if someone would be able to help me with this. I am creating a graph in matplotib however I would to love to split up the 14 lines created from the while loop into the x and y values of P, so instead of plt.plot(t,P) it would be plt.plot(t,((P[1])[0]))) and
plt.plot(t,((P[1])[1]))). I would love if someone could help me very quick, it should be easy but i am just getting errors with the arrays
`
#Altering Alpha in Tumor Cells vs PACCs
#What is alpha? α = Rate of conversion of cancer cells to PACCs
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from google.colab import files
value = -6
counter = -1
array = []
pac = []
while value <= 0:
def modelP(x,t):
P, C = x
λc = 0.0601
K = 2000
α = 1 * (10**value)
ν = 1 * (10**-6)
λp = 0.1
γ = 2
#returning odes
dPdt = ((λp))*P*(1-(C+(γ*P))/K)+ (α*C)
dCdt = ((λc)*C)*(1-(C+(γ*P))/K)-(α*C) + (ν***P)
return dPdt, dCdt
#initial
C0= 256
P0 = 0
Pinit = [P0,C0]
#time points
t = np.linspace(0,730)
#solve odes
P = odeint(modelP,Pinit,t)
plt.plot(t,P)
value += 1
#plot results
plt.xlabel('Time [days]')
plt.ylabel('Number of PACCs')
plt.show()
`
You can use subplots() to create two subplots and then plot the individual line into the plot you need. To do this, firstly add the subplots at the start (before the while loop) by adding this line...
fig, ax = plt.subplots(2,1) ## Plot will 2 rows, 1 column... change if required
Then... within the while loop, replace the plotting line...
plt.plot(t,P)
with (do take care of the space so that the lines are within while loop)
if value < -3: ## I am using value = -3 as the point of split, change as needed
ax[0].plot(t,P)#, ax=ax[0]) ## Add to first plot
else:
ax[1].plot(t,P)#,ax=ax[1]) ## Add to second plot
This will give a plot like this.

How can I simplify and create conditional colours on this Waterfall Chart?

This is a code for a waterfall chart. I'd kindly like to ask:
if there is a way to simplify this code. The code is far too long and I'm sure there is a lot of extra lines of code that could be reduced.
How I can make the first and last bars black?. Since I am creating a waterfall chart I am looking for the first and last value to be black at all times and the values in between to be green or red depending on whether or not it is a negative or positive number.
Bars greater than zero green.
Bars less than zero red.
Any help would be greatly appreciated.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(10, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
Answer to questions 2, 3 and 4: set the colors of the bar patches after plotting them:
for p, c in zip(my_plot.containers[0].patches, np.r_[0, np.sign(trans.amount[1:-1]), 0]):
p.set_color({0: 'k', 1: 'g', -1: 'r'}[c])

How to change the range of theta for each plotly scatterpolar category

I am currently using Plotly's scatterpolar module to create a radar chart. I am trying to visualize the statistical categories for all 5 basketball statistics (points, assists, rebounds, steals, and blocks). The problem is that the range for every single category is set to the highest value which is points. I'd like to make the range for each category separate (for example: the range for points is 0 to 50, the range for assists to be 0 to 15, the range of steals to be 0 to 5,etc.).
As you can see the entire chart is skewed towards the point category.
categories = ['Points', 'Assists', 'Rebounds', 'Steals', 'Blocks']
all_averages = [avg_points, avg_assists, avg_rebounds, avg_steals, avg_blocks]
trace = go.Scatterpolar(r = all_averages, theta = categories, fill = 'toself', name = f'{first} {last}')
data = [trace]
figure = go.Figure(data = data, layout = layout)
figure.show()
This is the code I have right now.
One option is to scale each category then set the range 0-1.
import plotly.graph_objects as go
first = 'John'
last = 'Doe'
range_pts = 50
range_ast = 15
range_rbs = 20
range_stl = 5
range_blk = 5
ranges = [range_pts, range_ast, range_rbs, range_stl, range_blk]
categories = [f'Points ({range_pts})', f'Assists ({range_ast})', f'Rebounds ({range_rbs})', f'Steals ({range_stl})', f'Blocks ({range_blk})']
all_averages = [26, 7, 11, 2, 1]
for idx, value in enumerate(ranges):
all_averages[idx] = all_averages[idx]/ranges[idx]
trace = go.Scatterpolar(r = all_averages, theta = categories, fill = 'toself', name = f'{first} {last}')
data = [trace]
figure = go.Figure(data = data, layout = None)
figure.update_polars(radialaxis=dict(visible=False,range=[0, 1]))
figure.show()

Plot an infinite line between two pandas series points

I want to plot an infinite non ending line between two points that are in the form of a pandas series. I am able to successfully plot a standard line between the points, however I don't want the line to "end" and instead it should continue. Expanding on this I would also like to extract the values of this new infinite line to a new dataframe so that I can see what corresponding line value a given x value in has.
data = yf.download("AAPL", start="2021-01-01", interval = "1d").drop(columns=['Adj Close'])
data = data[30:].rename(columns={"Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume"})
local_max = argrelextrema(data['high'].values, np.greater)[0]
local_min = argrelextrema(data['low'].values, np.less)[0]
highs = data.iloc[local_max,:]
lows = data.iloc[local_min,:]
highesttwo = highs["high"].nlargest(2)
lowesttwo = lows["low"].nsmallest(2)
fig = plt.figure(figsize=[10,7])
data['high'].plot(marker='o', markevery=local_max)
data['low'].plot(marker='o', markevery=local_min)
highesttwo.plot()
lowesttwo.plot()
plt.show()
Currently my plot looks like this:
How ever I want it to look like this as well as be able to get the values of the line for the corresponding x value.
This can be done in a few steps as shown in the following example where the lines are computed with element-wise operations (i.e. vectorized) using the slope-intercept form of the line equation.
The stock data has a frequency based on the opening dates of the stock exchange. This frequency is not automatically recognized by pandas, therefore the .plot method produces a plot with a continuous date for the x-axis and includes the days with no data. This can be avoided by setting the argument use_index=False so that the x-axis uses integers starting from zero instead.
The challenge is to then create nicely formatted tick labels. The following example attempts to imitate the pandas tick format by using list comprehensions to select the tick locations and format the labels. These will need to be adjusted if the date range is significantly lengthened or shortened.
import numpy as np # v 1.19.2
import pandas as pd # v 1.2.3
import matplotlib.pyplot as plt # v 3.3.4
from scipy.signal import argrelextrema # v 1.6.1
import yfinance as yf # v 0.1.54
# Import data
data = (yf.download('AAPL', start='2021-01-04', end='2021-03-15', interval='1d')
.drop(columns=['Adj Close']))
data = data.rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low',
'Close': 'close', 'Volume': 'volume'})
# Extract points and get appropriate x values for the points by using
# reset_index for highs/lows
local_max = argrelextrema(data['high'].values, np.greater)[0]
local_min = argrelextrema(data['low'].values, np.less)[0]
highs = data.reset_index().iloc[local_max, :]
lows = data.reset_index().iloc[local_min, :]
htwo = highs['high'].nlargest(2).sort_index()
ltwo = lows['low'].nsmallest(2).sort_index()
# Compute slope and y-intercept for each line
slope_high, intercept_high = np.polyfit(htwo.index, htwo, 1)
slope_low, intercept_low = np.polyfit(ltwo.index, ltwo, 1)
# Create dataframe for each line by using reindexed htwo and ltwo so that the
# index extends to the end of the dataset and serves as the x variable then
# compute y values
# High
line_high = htwo.reindex(range(htwo.index[0], len(data))).reset_index()
line_high.columns = ['x', 'y']
line_high['y'] = slope_high*line_high['x'] + intercept_high
# Low
line_low = ltwo.reindex(range(ltwo.index[0], len(data))).reset_index()
line_low.columns = ['x', 'y']
line_low['y'] = slope_low*line_low['x'] + intercept_low
# Plot data using pandas plotting function and add lines with matplotlib function
fig = plt.figure(figsize=[10,6])
ax = data['high'].plot(marker='o', markevery=local_max, use_index=False)
data['low'].plot(marker='o', markevery=local_min, use_index=False)
ax.plot(line_high['x'], line_high['y'])
ax.plot(line_low['x'], line_low['y'])
ax.set_xlim(0, len(data)-1)
# Set major and minor tick locations
tks_maj = [idx for idx, timestamp in enumerate(data.index)
if (timestamp.month != data.index[idx-1].month) | (idx == 0)]
tks_min = range(len(data))
ax.set_xticks(tks_maj)
ax.set_xticks(tks_min, minor=True)
# Format major and minor tick labels
labels_maj = [ts.strftime('\n%b\n%Y') if (data.index[tks_maj[idx]].year
!= data.index[tks_maj[idx-1]].year) | (idx == 0)
else ts.strftime('\n%b') for idx, ts in enumerate(data.index[tks_maj])]
labels_min = [ts.strftime('%d') if (idx+3)%5 == 0 else ''
for idx, ts in enumerate(data.index[tks_min])]
ax.set_xticklabels(labels_maj)
ax.set_xticklabels(labels_min, minor=True)
plt.show()
You can find more examples of tick formatting here and here in Solution 1.
Date string format codes

Categories

Resources