I'm trying to change the colour of a line on matplotlib subject to a condition.
Basically I take a rolling average and a rolling standard deviation. I plot the rolling average, but I would like to change the line colour if the standard deviation corresponding to that average is over the threshold I set. This is not the color of the whole line, just the bits that are over the threshol. Mostly my data is set using pandas
Alternatively I could shade it instead.
This link is useful, although I cannot figure out how to apply it to my situation.
http://nbviewer.ipython.org/urls/raw.github.com/dpsanders/matplotlib-examples/master/colorline.ipynb
EDIT COde: although, overly complicated for the question,
(I know the functions are too long at the moment)
def av_rel_track(rel_values):
#blade==0
avg_rel_track=[]
for i in range(0, int(nb)):
av_values=Series([])
rel_blade=rel_values[i]
rel_blade=rel_blade.fillna(0)
av_values=[]
for num in range(0, int (navg)):
av_values.append( np.nan)
#loops over each revolution(row)
for rev in range(int(navg),len(rel_blade)):
#select section to be number of averages long
N=rev-int(navg)+1
section=rel_blade.loc[N:rev]
#check section for five consecutive zeros
checker=check5(section)
#if there is five con zeros, av_value is zero
if checker==True:
av_value=0
else:
#finds the number of zeros in the section
nz=len (section)-len(section.nonzero()[0])
while nz>0:
#whilst there is a zero, extend average by one
N=N-1
if N<0:
break
new_val=rel_blade.ix[N]
section=rel_blade[N:rev+1]
#checks if new value is zero
if new_val!=0:
nz=nz-1
#checks extended section does not contain 5 consec zeros
checker=check5(section)
if checker==True:
av_value=0
else:
#sets av_value to 0if the range extends beyond the first value of rel_values
if N<0:
av_value=0
else:
#calculates the mean of the sctinon(not including nans)
section=zero_to_nan(section)
av_value=stats.nanmean(section)
av_values.append(av_value)
av_values=zero_to_nan(av_values)
rel_values["a%s" % i]=av_values
av_track=DataFrame({1:rel_values['a0'], 2:rel_values['a1'],3:rel_values['a2'],4:rel_values['a3'],5:rel_values['a4']})
return av_track
def sd_rel_track(rel_values):
for i in range(0, int(nb)):
sd_values=Series([])
rel_blade=rel_values[i]
rel_blade=rel_blade.fillna(0)
sd_values=[]
for num in range(0, int (navg)):
sd_values.append( np.nan)
#loops over each revolution(row)
for rev in range(int(navg),len(rel_blade)):
#select section to be number of averages long
N=rev-int(navg)+1
section=rel_blade.loc[N:rev]
#check section for five consecutive zeros
checker=check5(section)
#if there is five con zeros, av_value is zero
if checker==True:
sd_value=0
else:
#finds the number of zeros in the section
nz=len (section)-len(section.nonzero()[0])
while nz>0:
#whilst there is a zero, extend average by one
N=N-1
if N<0:
break
new_val=rel_blade.ix[N]
section=rel_blade[N:rev+1]
#checks if new value is zero
if new_val!=0:
nz=nz-1
#checks extended section does not contain 5 consec zeros
checker=check5(section)
if checker==True:
sd_value=0
else:
#sets av_value to 0if the range extends beyond the first value of rel_values
if N<0:
sd_value=0
else:
#calculates the mean of the sctinon(not including nans)
section=zero_to_nan(section)
sd_value=stats.nanstd(section)
sd_values.append(sd_value)
sd_values=zero_to_nan(sd_values)
rel_values["sd%s" % i]=sd_values
sd_track=DataFrame({1:rel_values['sd0'], 2:rel_values['sd1'],3:rel_values['sd2'],4:rel_values['sd3'],5:rel_values['sd4']})
sumsd= sd_track.sum(axis=1)
return sumsd
def plot():
plt.figure()
plt.plot(av_values)
plt.show()
plt.figure()
plt.plot(sd_values)
plt.show()
Using
http://nbviewer.ipython.org/urls/raw.github.com/dpsanders/matplotlib-examples/master/colorline.ipynb ,
In[4], you can add something like:
x = np.linspace(0, 4.*np.pi, 1000)
y = np.sin(x)
z = np.zeros(1000)
for i in range(1000):
if math.cos(x[i])>0.7:
z[i]=1
fig, axes = plt.subplots()
colorline(x, y, z)
plt.xlim(x.min(), x.max())
plt.ylim(-1.0, 1.0)
plt.show()
Related
This is a code for a waterfall chart. I'd kindly like to ask:
if there is a way to simplify this code. The code is far too long and I'm sure there is a lot of extra lines of code that could be reduced.
How I can make the first and last bars black?. Since I am creating a waterfall chart I am looking for the first and last value to be black at all times and the values in between to be green or red depending on whether or not it is a negative or positive number.
Bars greater than zero green.
Bars less than zero red.
Any help would be greatly appreciated.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(10, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
Answer to questions 2, 3 and 4: set the colors of the bar patches after plotting them:
for p, c in zip(my_plot.containers[0].patches, np.r_[0, np.sign(trans.amount[1:-1]), 0]):
p.set_color({0: 'k', 1: 'g', -1: 'r'}[c])
Since the code is a lot of lines, I shall first show what the issue is:
I defined a simple loop and am getting the appropriate results.
Here when I attempt to plot it using matplotlib, the range shown on the x-axis is different from the range I inputted. I want 0 to 100 with a step size of 5 but I am getting 0 to 17.5 with a step size of 2.5.
Is there any issue with just the way I have coded this? If not, here is the rest of the code, thank you!:
import random
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib.colors import ListedColormap
import sys
import decimal
sys.setrecursionlimit(4000)
n = 10 # number of rows and columns in the grid
p = 0.9 # probability that each square is open
def gridMakerN(n):
grid = (np.random.rand(n,n) < p).astype(int)
mycolormap = ListedColormap(["grey","blue"])
#plt.imshow(grid, cmap=mycolormap)
return grid
# define an exception that we will raise if percolation is detected
class percolationException(Exception): pass
# query() looks for a path from (row, col) to the bottom of the grid
# recursive function: it calls itself to determine if a path exists
def query(row, col, grid, visited):
#print("Visiting square ", row, ",", col) <- This was previously part of the code
# mark row, col as visited
visited[row,col] = 1
# is row equal to the bottom row? If so, output "path found"
(numRows,numCols) = np.shape(grid)
if row == numRows - 1:
#print("PERCOLATION FOUND!!!") <- This was previously part of the code
raise percolationException
else:
# if square below is open and unvisited, then is there a path from that square?
if grid[row+1,col] == 1 and visited[row+1,col] == 0:
query(row+1, col, grid, visited)
# if square at left is open and unvisited, then is there a path from that square?
if col > 0 and grid[row, col-1] == 1 and visited[row, col-1] == 0:
query(row, col-1, grid, visited)
# if square at right is open and unvisited, then is there a path from that square?
if col+1 < numCols and grid[row, col+1] == 1 and visited[row, col+1] == 0:
query(row, col+1, grid, visited)
# if square above is open and unvisited, then is there a path from that square?
if row > 0 and grid[row-1, col] == 1 and visited[row-1, col] == 0:
query(row-1, col, grid, visited)
# driver function to manage the whole percolation detection process
def findPercolation(grid):
# create an empty visited matrix
(numRows, numCols) = np.shape(grid)
visited = np.zeros( (numRows, numCols) )
# look for a percolation path, starting at each open square in the top row
try:
for c in range(numCols): # consider all squares in the top row
if grid[0,c] == 1:
query(0, c, grid, visited)
except percolationException:
#print("percolationException occurred") <- This was previously part of the code
return 1 # <- Here I put 1 instead of "True"
else:
#print("percolation not found") <- This was previously part of the code
return 0 # <- Here I put 0 instead of "False"
def findPercolationFixedP(n):
return findPercolation(gridMakerN(n))
def percAvgFixedP(n):
iterations = 100
results = [] #Making an Empty List
for _ in range(iterations): #Repeat the Same Step x times
results.append(findPercolationFixedP(n))
#print(results)
#print(sum(results))
return sum(results)/iterations
def avgFixedPGraph():
results = []
for x in range(10,100,5):
results.append(percAvgFixedP(x))
plt.plot(results,"c")
plt.grid()
plt.show()
avgFixedPGraph()
When plot() is only given one array:
plt.plot(results, "c")
that array is treated as the y values, and the x values default to a numeric range. In this case results has 18 values, so it plots x from 0 to 17.
To assign custom x values, pass them in explicitly, e.g.:
x = range(10, 100, 5)
results = [percAvgFixedP(value) for value in x]
plt.plot(x, results, "c")
I have a python program that reads tsv data and plots it using the matplotlib library.
I feel like my code works pretty well:
def main(compsPath: str, gibbsPath: str):
"""
Given the file paths for comps.tsv and
gibbs.tsv, this main function will
produce two separate plots - one for each file.
"""
# Read tsv data into np record arrays
# Slice off header text
with open(compsPath, 'r') as fcomps:
reader = csv.reader(fcomps, delimiter='\t')
compsHeader = next(reader)
compsData = np.array(list(reader)).astype(np.double)
with open(gibbsPath, 'r') as fgibbs:
reader = csv.reader(fgibbs, delimiter='\t')
gibbsHeader = next(reader)
gibbsData = np.array(list(reader)).astype(np.double)
# Get data dimensions:
# - - - M := Number of metabolites
# - - - N := Number of reactions
M = compsData.shape[1] - 1
N = gibbsData.shape[1] - 1
plotComps(M, compsData, compsHeader)
plotGibbs(N, gibbsData, gibbsHeader)
plt.show()
The plotGibbs function produces the following graphic for the tsv file I'm working with. For this graphic, N=3 (3 reactions).
I would like to indicate at what point in time each reaction becomes unfavorable (in the context of my project, this just means that the reaction stops). This occurs when the gibbs free energy value (∆G) of the reaction is greater than or equal to 0.
I feel like I could best emphasize this by color-coding the line plots my program generates. For negative ∆G values, I would like the line to be green, and for positive or zero ∆G values, I would like the line to be red.
Here is my current code for generating the gibbs free energy plots (does not color-code):
def plotGibbs(N: int, gibbsData: np.ndarray, gibbsHeader):
gibbsFig = plt.figure()
gibbsFig.suptitle("∆G˚ Yield Plotted over Time (days)")
numCols = ceil(N / 2)
numRows = (N // numCols) + 1
for n in range (1, N+1):
ax = gibbsFig.add_subplot(numRows, numCols, n)
ax.set_ylabel(gibbsHeader[n])
ax.set_xlabel(gibbsHeader[0])
ax.plot(gibbsData[:, 0], gibbsData[:, n])
gibbsFig.tight_layout()
How could I make it so that negative values are plotted green, and non-negative values are plotted red?
You could try to find where a change of sign occurs in your data using np.where with a simple condition like gibbsData[:, n]>0 then plot negative/positive data accordingly:
def plotGibbs(N: int, gibbsData: np.ndarray, gibbsHeader):
gibbsFig = plt.figure()
gibbsFig.suptitle("∆G˚ Yield Plotted over Time (days)")
numCols = ceil(N / 2)
numRows = (N // numCols) + 1
for n in range (1, N+1):
ax = gibbsFig.add_subplot(numRows, numCols, n)
ax.set_ylabel(gibbsHeader[n])
ax.set_xlabel(gibbsHeader[0])
# idx where sign change occurs for data n
idx_zero = np.where(gibbsData[:, n]>0)[0][0]
# negatives y values
ax.plot(gibbsData[:idx_zero, 0], gibbsData[:idx_zero,n],'g')
# positive y values
ax.plot(gibbsData[idx_zero:, 0], gibbsData[idx_zero:,n],'r')
gibbsFig.tight_layout()
I am having a problem with waterfall. I took this chart from matplotlib site and added my own data frame with 2 simple columns with some integer numbers. My waterfall was produced but without numbers, just empty bars. I am a bit lost and I would appreciate any suggestions.
What I am trying to build is the custom waterfall that takes one dataframe with column names, values, and some values for filters like countries. I haven't found anything like that anywhere so I am trying to build my own.
import numpy as np;
import pandas as pd;
import matplotlib.pyplot as plt;
from matplotlib.ticker import FuncFormatter;
dataset = pd.read_csv('waterfall_test_data.csv')
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = dataset['columns']
data = dataset['amount']
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(15, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
I want to efficiently calculate the average of a variable (say temperature) over multiple areas of the plane.
I essentially want to do the following.
import numpy as np
num = 10000
XYT = np.random.uniform(0, 1, (num, 3))
X = np.transpose(XYT)[0]
Y = np.transpose(XYT)[1]
T = np.transpose(XYT)[2]
size = 10
bins = np.empty((size, size))
for i in range(size):
for j in range(size):
if rescaled X,Y in bin[i][j]:
bins[i][j] = mean T
I would use pandas (although im sure you can achieve basically the same with vanilla numpy)
df = pandas.DataFrame({'x':npX,'y':npY,'z':npZ})
# solve quadrants
df['quadrant'] = (df['x']>=0)*2 + (df['y']>=0)*1
# group by and aggregate
mean_per_quadrant = df.groupby(['quadrant'])['temp'].aggregate(['mean'])
you may need to create multiple quadrant cutoffs to get unique groupings
for example (df['x']>=50)*4 + (df['x']>=0)*2 + (df['y']>=0)*1 would add an extra 2 quadrants to our group (one y>=0, and one y<0) (just make sure you use powers of 2)