How can I simplify and create conditional colours on this Waterfall Chart? - python

This is a code for a waterfall chart. I'd kindly like to ask:
if there is a way to simplify this code. The code is far too long and I'm sure there is a lot of extra lines of code that could be reduced.
How I can make the first and last bars black?. Since I am creating a waterfall chart I am looking for the first and last value to be black at all times and the values in between to be green or red depending on whether or not it is a negative or positive number.
Bars greater than zero green.
Bars less than zero red.
Any help would be greatly appreciated.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(10, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')

Answer to questions 2, 3 and 4: set the colors of the bar patches after plotting them:
for p, c in zip(my_plot.containers[0].patches, np.r_[0, np.sign(trans.amount[1:-1]), 0]):
p.set_color({0: 'k', 1: 'g', -1: 'r'}[c])

Related

How to specifically assign X and Y Axis on matplotlib in Python?

I'm trying to create a Monte Carlo simulation to simulate the price of a stock.
Every day, the price of the stock changes. The change is determined by a random variable. The stock prices over the number of days (numDays) is captured in a list, stock_price_list.
I've created an array, monte_list, to store a bunch of different stock_price_lists. I want to graph all those stock_price_lists on the same graph. So I've created the variable numSimulations, which is supposed to create numSimulations number of rows in monte_list.
As far as I can tell, monte_list works. It's an array with one column and numSimulations numbers of rows. These rows are populated with stock_price_lists, which are themselves lists of stock price data.
stock_price_list works; I've graphed it multiple times.
I think that monte_list works too; at least, when I print the array, it returns information that looks correct.
My problem is that the axes are graphing the wrong variables.
The X axis is graphing numSimulations.
The Y axis is graphing stock price.
I WANT the X axis to graph numDays, NOT numSimulations, but I can't figure out how to change that.
I'd really love any advice. (Note that I hope to make numDays and numSimulations much bigger, but wanted to use smaller numbers to get the hang of things.)
daily_mean = .06/250
daily_stdev = .2/(250**.5)
start_stock_price = 100
numDays = 7
numSimulations = 5
monte_arr = pd.DataFrame({'FirstCol': numSimulations}, index=[0])
monte_list = [None] * numSimulations #this is a test: I'm trying to createa list of numPrices Nones,\
#then fill them all with stock_price_lists in the for loop
for j in range(0, numSimulations):
stock_price_list = [start_stock_price]
daily_stock_price = start_stock_price
#add a col of stock price data
for i in range (0,numDays):
daily_ret = np.random.normal(daily_mean, daily_stdev, 1) # generates a random return
daily_stock_price = daily_stock_price * (1+daily_ret)
stock_price_list.append(float(daily_stock_price))
np.array(stock_price_list)
#arr = np.array(stock_price_list)
#arr[j] = stock_price_list
monte_list[j] = stock_price_list # somehow stock_price_list is over-writing cols
#I think monte_list generates numSimulations of stock_price_list entries.
#Problem: the axes are wrong. X axis should have numDays on it. Y should have prices
# y axis is currently graphing highest stock price, but I want X to be graphing highest stock price
# I want X axis to be numDays
plt.figure(figsize = (14,5))
plt.plot(monte_list)
plt.title("monte list")
plt.show()
Blockquote
So, it actually turns out that I figured out how to code this with some help from a friend.
I created a for loop to plot various elements of monte_list.
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
from scipy.stats import norm
import matplotlib.pyplot as plt
import statsmodels as sm
import math
daily_mean = .06/250
daily_stdev = .2/(250**.5)
start_stock_price = 100
#stock_price_list = [start_stock_price]
#daily_stock_price = start_stock_price
numDays = 250
numSimulations = 100
monte_arr = pd.DataFrame({'FirstCol': numSimulations}, index=[0])
monte_list = [None] * numSimulations #this is a test: I'm trying to createa list of numPrices Nones,\
#then fill them all with stock_price_lists in the for loop
for j in range(0, numSimulations):
stock_price_list = [start_stock_price]
daily_stock_price = start_stock_price
#add a col of stock price data
for i in range (0,numDays):
daily_ret = np.random.normal(daily_mean, daily_stdev, 1) # generates a random return
daily_stock_price = daily_stock_price * (1+daily_ret)
stock_price_list.append(float(daily_stock_price))
np.array(stock_price_list)
monte_list[j] = stock_price_list
plt.figure(figsize = (14,5))
plt.title("Monte List")
plt.xlabel("Number of Days")
plt.ylabel("Stock price")
plt.legend()
for i in range(0, numDays):
plt.plot(monte_list[i])
plt.show()

How to split data into two graphs with mat plot lib

I would be so thankful if someone would be able to help me with this. I am creating a graph in matplotib however I would to love to split up the 14 lines created from the while loop into the x and y values of P, so instead of plt.plot(t,P) it would be plt.plot(t,((P[1])[0]))) and
plt.plot(t,((P[1])[1]))). I would love if someone could help me very quick, it should be easy but i am just getting errors with the arrays
`
#Altering Alpha in Tumor Cells vs PACCs
#What is alpha? α = Rate of conversion of cancer cells to PACCs
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from google.colab import files
value = -6
counter = -1
array = []
pac = []
while value <= 0:
def modelP(x,t):
P, C = x
λc = 0.0601
K = 2000
α = 1 * (10**value)
ν = 1 * (10**-6)
λp = 0.1
γ = 2
#returning odes
dPdt = ((λp))*P*(1-(C+(γ*P))/K)+ (α*C)
dCdt = ((λc)*C)*(1-(C+(γ*P))/K)-(α*C) + (ν***P)
return dPdt, dCdt
#initial
C0= 256
P0 = 0
Pinit = [P0,C0]
#time points
t = np.linspace(0,730)
#solve odes
P = odeint(modelP,Pinit,t)
plt.plot(t,P)
value += 1
#plot results
plt.xlabel('Time [days]')
plt.ylabel('Number of PACCs')
plt.show()
`
You can use subplots() to create two subplots and then plot the individual line into the plot you need. To do this, firstly add the subplots at the start (before the while loop) by adding this line...
fig, ax = plt.subplots(2,1) ## Plot will 2 rows, 1 column... change if required
Then... within the while loop, replace the plotting line...
plt.plot(t,P)
with (do take care of the space so that the lines are within while loop)
if value < -3: ## I am using value = -3 as the point of split, change as needed
ax[0].plot(t,P)#, ax=ax[0]) ## Add to first plot
else:
ax[1].plot(t,P)#,ax=ax[1]) ## Add to second plot
This will give a plot like this.

Waterfall chart python matplotlib

I am having a problem with waterfall. I took this chart from matplotlib site and added my own data frame with 2 simple columns with some integer numbers. My waterfall was produced but without numbers, just empty bars. I am a bit lost and I would appreciate any suggestions.
What I am trying to build is the custom waterfall that takes one dataframe with column names, values, and some values for filters like countries. I haven't found anything like that anywhere so I am trying to build my own.
import numpy as np;
import pandas as pd;
import matplotlib.pyplot as plt;
from matplotlib.ticker import FuncFormatter;
dataset = pd.read_csv('waterfall_test_data.csv')
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = dataset['columns']
data = dataset['amount']
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(15, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')

How to I set different colors to subsets of line plot iterations in matplotlib?

I am iteratively plotting the np.exp results of 12 rows of data from a 2D array (12,5000), out_array. All data share the same x values, (x_d). I want the first 4 iterations to all plot as the same color, the next 4 to be a different color, and next 4 a different color...such that I have 3 different colors each corresponding to the 1st-4th, 5th-8th, and 9th-12th iterations respectively. In the end, it would also be nice to define these sets with their corresponding colors in a legend.
I have researched cycler (https://matplotlib.org/examples/color/color_cycle_demo.html), but I can't figure out how to assign colors into sets of iterations > 1. (i.e. 4 in my case). As you can see in my code example, I can have all 12 lines plotted with different (default) colors -or- I know how to make them all the same color (i.e. ...,color = 'r',...)
plt.figure()
for i in range(out_array.shape[0]):
plt.plot(x_d, np.exp(out_array[i]),linewidth = 1, alpha = 0.6)
plt.xlim(-2,3)
I expect a plot like this, only with a total of 3 different colors, each corresponding to the chunks of iterations described above.
An other solution
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(10)
color = ['r', 'g', 'b', 'p']
for i in range(12):
plt.plot(x, i*x, color[i//4])
plt.show()
plt.figure()
n = 0
color = ['r','g','b']
for i in range(out_array.shape[0]):
n = n+1
if n/4 <= 1:
c = 1
elif n/4 >1 and n/4 <= 2:
c = 2
elif n/4 >2:
c = 3
else:
print(n)
plt.plot(x_d, np.exp(out_array[i]),color = color[c-1])
plt.show()

Changing line color

I'm trying to change the colour of a line on matplotlib subject to a condition.
Basically I take a rolling average and a rolling standard deviation. I plot the rolling average, but I would like to change the line colour if the standard deviation corresponding to that average is over the threshold I set. This is not the color of the whole line, just the bits that are over the threshol. Mostly my data is set using pandas
Alternatively I could shade it instead.
This link is useful, although I cannot figure out how to apply it to my situation.
http://nbviewer.ipython.org/urls/raw.github.com/dpsanders/matplotlib-examples/master/colorline.ipynb
EDIT COde: although, overly complicated for the question,
(I know the functions are too long at the moment)
def av_rel_track(rel_values):
#blade==0
avg_rel_track=[]
for i in range(0, int(nb)):
av_values=Series([])
rel_blade=rel_values[i]
rel_blade=rel_blade.fillna(0)
av_values=[]
for num in range(0, int (navg)):
av_values.append( np.nan)
#loops over each revolution(row)
for rev in range(int(navg),len(rel_blade)):
#select section to be number of averages long
N=rev-int(navg)+1
section=rel_blade.loc[N:rev]
#check section for five consecutive zeros
checker=check5(section)
#if there is five con zeros, av_value is zero
if checker==True:
av_value=0
else:
#finds the number of zeros in the section
nz=len (section)-len(section.nonzero()[0])
while nz>0:
#whilst there is a zero, extend average by one
N=N-1
if N<0:
break
new_val=rel_blade.ix[N]
section=rel_blade[N:rev+1]
#checks if new value is zero
if new_val!=0:
nz=nz-1
#checks extended section does not contain 5 consec zeros
checker=check5(section)
if checker==True:
av_value=0
else:
#sets av_value to 0if the range extends beyond the first value of rel_values
if N<0:
av_value=0
else:
#calculates the mean of the sctinon(not including nans)
section=zero_to_nan(section)
av_value=stats.nanmean(section)
av_values.append(av_value)
av_values=zero_to_nan(av_values)
rel_values["a%s" % i]=av_values
av_track=DataFrame({1:rel_values['a0'], 2:rel_values['a1'],3:rel_values['a2'],4:rel_values['a3'],5:rel_values['a4']})
return av_track
def sd_rel_track(rel_values):
for i in range(0, int(nb)):
sd_values=Series([])
rel_blade=rel_values[i]
rel_blade=rel_blade.fillna(0)
sd_values=[]
for num in range(0, int (navg)):
sd_values.append( np.nan)
#loops over each revolution(row)
for rev in range(int(navg),len(rel_blade)):
#select section to be number of averages long
N=rev-int(navg)+1
section=rel_blade.loc[N:rev]
#check section for five consecutive zeros
checker=check5(section)
#if there is five con zeros, av_value is zero
if checker==True:
sd_value=0
else:
#finds the number of zeros in the section
nz=len (section)-len(section.nonzero()[0])
while nz>0:
#whilst there is a zero, extend average by one
N=N-1
if N<0:
break
new_val=rel_blade.ix[N]
section=rel_blade[N:rev+1]
#checks if new value is zero
if new_val!=0:
nz=nz-1
#checks extended section does not contain 5 consec zeros
checker=check5(section)
if checker==True:
sd_value=0
else:
#sets av_value to 0if the range extends beyond the first value of rel_values
if N<0:
sd_value=0
else:
#calculates the mean of the sctinon(not including nans)
section=zero_to_nan(section)
sd_value=stats.nanstd(section)
sd_values.append(sd_value)
sd_values=zero_to_nan(sd_values)
rel_values["sd%s" % i]=sd_values
sd_track=DataFrame({1:rel_values['sd0'], 2:rel_values['sd1'],3:rel_values['sd2'],4:rel_values['sd3'],5:rel_values['sd4']})
sumsd= sd_track.sum(axis=1)
return sumsd
def plot():
plt.figure()
plt.plot(av_values)
plt.show()
plt.figure()
plt.plot(sd_values)
plt.show()
Using
http://nbviewer.ipython.org/urls/raw.github.com/dpsanders/matplotlib-examples/master/colorline.ipynb ,
In[4], you can add something like:
x = np.linspace(0, 4.*np.pi, 1000)
y = np.sin(x)
z = np.zeros(1000)
for i in range(1000):
if math.cos(x[i])>0.7:
z[i]=1
fig, axes = plt.subplots()
colorline(x, y, z)
plt.xlim(x.min(), x.max())
plt.ylim(-1.0, 1.0)
plt.show()

Categories

Resources