I have a python program that reads tsv data and plots it using the matplotlib library.
I feel like my code works pretty well:
def main(compsPath: str, gibbsPath: str):
"""
Given the file paths for comps.tsv and
gibbs.tsv, this main function will
produce two separate plots - one for each file.
"""
# Read tsv data into np record arrays
# Slice off header text
with open(compsPath, 'r') as fcomps:
reader = csv.reader(fcomps, delimiter='\t')
compsHeader = next(reader)
compsData = np.array(list(reader)).astype(np.double)
with open(gibbsPath, 'r') as fgibbs:
reader = csv.reader(fgibbs, delimiter='\t')
gibbsHeader = next(reader)
gibbsData = np.array(list(reader)).astype(np.double)
# Get data dimensions:
# - - - M := Number of metabolites
# - - - N := Number of reactions
M = compsData.shape[1] - 1
N = gibbsData.shape[1] - 1
plotComps(M, compsData, compsHeader)
plotGibbs(N, gibbsData, gibbsHeader)
plt.show()
The plotGibbs function produces the following graphic for the tsv file I'm working with. For this graphic, N=3 (3 reactions).
I would like to indicate at what point in time each reaction becomes unfavorable (in the context of my project, this just means that the reaction stops). This occurs when the gibbs free energy value (∆G) of the reaction is greater than or equal to 0.
I feel like I could best emphasize this by color-coding the line plots my program generates. For negative ∆G values, I would like the line to be green, and for positive or zero ∆G values, I would like the line to be red.
Here is my current code for generating the gibbs free energy plots (does not color-code):
def plotGibbs(N: int, gibbsData: np.ndarray, gibbsHeader):
gibbsFig = plt.figure()
gibbsFig.suptitle("∆G˚ Yield Plotted over Time (days)")
numCols = ceil(N / 2)
numRows = (N // numCols) + 1
for n in range (1, N+1):
ax = gibbsFig.add_subplot(numRows, numCols, n)
ax.set_ylabel(gibbsHeader[n])
ax.set_xlabel(gibbsHeader[0])
ax.plot(gibbsData[:, 0], gibbsData[:, n])
gibbsFig.tight_layout()
How could I make it so that negative values are plotted green, and non-negative values are plotted red?
You could try to find where a change of sign occurs in your data using np.where with a simple condition like gibbsData[:, n]>0 then plot negative/positive data accordingly:
def plotGibbs(N: int, gibbsData: np.ndarray, gibbsHeader):
gibbsFig = plt.figure()
gibbsFig.suptitle("∆G˚ Yield Plotted over Time (days)")
numCols = ceil(N / 2)
numRows = (N // numCols) + 1
for n in range (1, N+1):
ax = gibbsFig.add_subplot(numRows, numCols, n)
ax.set_ylabel(gibbsHeader[n])
ax.set_xlabel(gibbsHeader[0])
# idx where sign change occurs for data n
idx_zero = np.where(gibbsData[:, n]>0)[0][0]
# negatives y values
ax.plot(gibbsData[:idx_zero, 0], gibbsData[:idx_zero,n],'g')
# positive y values
ax.plot(gibbsData[idx_zero:, 0], gibbsData[idx_zero:,n],'r')
gibbsFig.tight_layout()
Related
I'm new to Python and want to perform a rather simple task. I've got a two-dimensional point set, which is stored as binary data (i.e. (x, y)-coordinates) in a file, which I want to visualize. The output should look as in the picture below.
However, I'm somehow overwhelmed by the amount of google results on this topic. And many of them seem to be for three-dimensional point cloud visualization and/or a massive amount of data points. So, if anyone could point me to a suitable solution for my problem, I would be really thankful.
EDIT: The point set is contained in a file which is formatted as follows:
0.000000000000000 0.000000000000000
1.000000000000000 1.000000000000000
1
0.020375738732779 0.026169010160356
0.050815740313746 0.023209931647163
0.072530406907906 0.023975230642589
The first data vector is the one in the line below the single "1"; i.e. (0.020375738732779, 0.026169010160356). How do I read this into a vector in python? I can open the file using f = open("pointset file")
Install and import matplotlib and pyplot:
import matplotlib.pyplot as plt
Assuming this is your data:
x = [1, 2, 5, 1, 5, 7, 8, 3, 2, 6]
y = [6, 7, 1, 2, 6, 2, 1, 6, 3, 1]
If you need, you can use a comprehension to split the coordinates into seperate lists:
x = [p[0] for p in points]
y = [p[1] for p in points]
Plotting is as simple as:
plt.scatter(x=x, y=y)
Result:
Many customizations are possible.
EDIT: following question edit
In order to read the file:
x = []
y = []
with open('pointset_file.txt', 'r') as f:
for line in f:
coords = line.split(' ')
x.append(float(coords[0]))
y.append(float(coords[1]))
You could read your data as follow, and plot using scattr plot. this method is considering for small number of data and not csv, just the format you have presented.
import matplotlib.pyplot as plt
with open("pointset file") as fid:
lines = fid.read().split("\n")
# lines[:2] looks like the bounds for each axis, if yes use it in plot
data = [[float(d) for d in line.split(" ") if d] for line in lines[3:]]
plt.scatter(data[0], data[1])
plt.show()
Assuming you want a plot looking pretty much exactly like the sample image you give, and you want the plot to display the data with both axes in equal proportion, one could use a general purpose multimedia library like pygame to achieve this:
#!/usr/bin/env python3
import sys
import pygame
# windows will never be larger than this in their largest dimension
MAX_WINDOW_SIZE = 400
BG_COLOUR = (255, 255, 255,)
FG_COLOUR = (0, 0, 0,)
DATA_POINT_SIZE = 2
pygame.init()
if len(sys.argv) < 2:
print('Error: need filename to read data from')
pygame.quit()
sys.exit(1)
else:
data_points = []
# read in data points from file first
with open(sys.argv[1], 'r') as file:
[next(file) for _ in range(3)] # discard first 3 lines of file
# now the rest of the file contains actual data to process
data_points.extend(tuple(float(x) for x in line.split()) for line in file)
# file read complete. now let's find the min and max bounds of the data
top_left = [float('+Inf'), float('+Inf')]
bottom_right = [float('-Inf'), float('-Inf')]
for datum in data_points:
if datum[0] < top_left[0]:
top_left[0] = datum[0]
if datum[1] < top_left[1]:
top_left[1] = datum[1]
if datum[0] > bottom_right[0]:
bottom_right[0] = datum[0]
if datum[1] > bottom_right[1]:
bottom_right[1] = datum[1]
# calculate space dimensions
space_dimensions = (bottom_right[0] - top_left[0], bottom_right[1] - top_left[1])
# take the biggest of the X or Y dimensions of the point space and scale it
# up to our maximum window size
biggest = max(space_dimensions)
scale_factor = MAX_WINDOW_SIZE / biggest # all points will be scaled up by this factor
# screen dimensions
screen_dimensions = tuple(sd * scale_factor for sd in space_dimensions)
# basic init and draw all points to screen
display = pygame.display.set_mode(screen_dimensions)
display.fill(BG_COLOUR)
for point in data_points:
# translate and scale each point
x = point[0] * scale_factor - top_left[0] * scale_factor
y = point[1] * scale_factor - top_left[1] * scale_factor
pygame.draw.circle(display, FG_COLOUR, (x, y), DATA_POINT_SIZE)
pygame.display.update()
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit(0)
pygame.time.wait(50)
Execute this script and pass the name of the file which holds your data in as the first argument. It will spawn a window with the data points displayed.
I generated a bunch of uniformly distributed random x,y points to test it, with:
from random import random
for _ in range(1000):
print(random(), random())
This produces a window looking like the following:
If the space your data points are within is not of square size, the window shape will change to reflect this. The largest dimension of the window, either width or height, will always stay at a specified size (I used 400px as a default in my demo).
Admittedly, this is not the most elegant or concise solution, and reinvents the wheel a little bit, however it gives you the most control on how to display your data points, and it also deals with both the reading in of the file data and the display of it.
To read your file:
import pandas as pd
import numpy as np
df = pd.read_csv('your_file',
sep='\s+',
header=None,
skiprows=3,
names=['x','y'])
For now I've created a random dataset
import random
df = pd.DataFrame({'x':[random.uniform(0, 1) for n in range(100)],
'y':[random.uniform(0, 1) for n in range(100)]})
I prefer Plotly for any kind of figure
import plotly.express as px
fig = px.scatter(df,
x='x',
y='y')
fig.show()
From here you can easily update labels, colors, etc.
I want to efficiently calculate the average of a variable (say temperature) over multiple areas of the plane.
I essentially want to do the following.
import numpy as np
num = 10000
XYT = np.random.uniform(0, 1, (num, 3))
X = np.transpose(XYT)[0]
Y = np.transpose(XYT)[1]
T = np.transpose(XYT)[2]
size = 10
bins = np.empty((size, size))
for i in range(size):
for j in range(size):
if rescaled X,Y in bin[i][j]:
bins[i][j] = mean T
I would use pandas (although im sure you can achieve basically the same with vanilla numpy)
df = pandas.DataFrame({'x':npX,'y':npY,'z':npZ})
# solve quadrants
df['quadrant'] = (df['x']>=0)*2 + (df['y']>=0)*1
# group by and aggregate
mean_per_quadrant = df.groupby(['quadrant'])['temp'].aggregate(['mean'])
you may need to create multiple quadrant cutoffs to get unique groupings
for example (df['x']>=50)*4 + (df['x']>=0)*2 + (df['y']>=0)*1 would add an extra 2 quadrants to our group (one y>=0, and one y<0) (just make sure you use powers of 2)
I have a file with velocity magnitude data and vorticity magnitude data from a fluid simulation.
I want to find out what is the frequency for these two data sets.
my code:
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import re
import math
import matplotlib.pyplot as plt
import numpy as np
probeU1 = []
probeV1 = []
# this creates an array containig all the timesteps, cutting of the first 180, because the system has to stabelize.
number = [ round(x * 0.1, 1) for x in range(180, 301)]
# this function loops over the different time directories, and reads the velocity file.
for i in range(len(number)):
filenamepath = "/Refinement/Vorticity4/probes/" +str(number[i]) + "/U"
data= open(filenamepath,"r")
temparray = []
#removes all the formatting around the data
for line in data:
if line.startswith('#'):
continue
else:
line = re.sub('[()]', "", line)
values = line.split()
#print values[1], values[2]
xco = values[1::3]
yco = values[2::3]
#here it extracts all the velocity data from all the different probes
for i in range(len(xco)):
floatx = float(xco[i])
floaty = float(yco[i])
temp1 = math.pow(floatx,2)
temp2 = math.pow(floaty,2)
#print temp2, temp1
temp3 = temp1+temp2
temp4 = math.sqrt(temp3)
#takes the magnitude of the velocity
#print temp4
temparray.append(temp4)
probeU1.append(temparray)
#
#print probeU1[0]
#print len(probeU1[0])
#
# this function loops over the different time directories, and reads the vorticity file.
for i in range(len(number)):
filenamepath = "/Refinement/Vorticity4/probes/" +str(number[i]) + "/vorticity"
data= open(filenamepath,"r")
# print data.read()
temparray1 = []
for line in data:
if line.startswith('#'):
continue
else:
line = re.sub('[()]', "", line)
values = line.split()
zco = values[3::3]
#because the 2 dimensionallity the z-component of the vorticity is already the magnitude
for i in range(len(zco)):
abso = float(zco[i])
add = np.abs(abso)
temparray1.append(add)
probeV1.append(temparray1)
#Old code block to display the data and check that it made a wave pattern(which it did)
##Printing all probe data from 180-300 in one graph(unintelligible)
#for i in range(len(probeU1[1])):
# B=[]
# for l in probeU1:
# B.append(l[i])
## print 'B=', B
## print i
# plt.plot(number,B)
#
#
#plt.ylabel('magnitude of velocity')
#plt.show()
#
##Printing all probe data from 180-300 in one graph(unintelligible)
#for i in range(len(probeV1[1])):
# R=[]
# for l in probeV1:
# R.append(l[i])
## print 'R=', R
## print i
# plt.plot(number,R)
#
#
#plt.ylabel('magnitude of vorticity')
#plt.show()
#Here is where the magic happens, (i hope)
ans=[]
for i in range(len(probeU1[1])):
b=[]
#probeU1 is a nested list, because there are 117 different probes, wich all have the data from timestep 180-301
for l in probeU1:
b.append(l[i])
#the freqeuncy was not oscillating around 0, so moved it there by substracting the mean
B=b-np.mean(b)
#here the fft happens
u = np.fft.fft(B)
#This should calculate the frequencies?
freq = np.fft.fftfreq(len(B), d= (number[1] - number[0]))
# If im not mistakes this finds the peak frequency for 1 probe and passes it another list
val = np.argmax(np.abs(u))
ans.append(np.abs(freq[val]))
plt.plot(freq, np.abs(u))
#print np.mean(ans)
plt.xlabel('frequency?')
plt.savefig('velocitiy frequency')
plt.show()
# just duplicate to the one above it
ans1=[]
for i in range(len(probeV1[1])):
c=[]
for l in probeU1:
c.append(l[i])
C=c-np.mean(c)
y = np.fft.fft(C)
freq1 = np.fft.fftfreq(len(C), d= (number[1] - number[0]))
val = np.argmax(np.abs(y))
ans1.append(np.abs(freq1[val]))
plt.plot(freq1, np.abs(y))
#print np.mean(ans1)
plt.ylabel('frequency?')
plt.savefig('vorticity frequency')
plt.show()
data.close()
My data contains 117 probes each having their own 121 point of velocity magnitude data.
My aim is to find the dominate frequency for each probe and then collect all those and plot them in a histogram.
My question is about the part where it says this is where the magic happens. I believe the fft is already working correctly
y = np.fft.fft(C)
freq1 = np.fft.fftfreq(len(C), d= (number[1] - number[0]))
And if I'm not mistaken the freq1 list should contain all the frequencies for a given probe. I've checked this list visually and the amount of different frequencies is very high(20+) so the signal is probably very noisy.
# If im not mistakes this finds the peak frequency for 1 probe and passes it another list
val = np.argmax(np.abs(u))
ans.append(np.abs(freq1[val]))
That this part should in theory take the biggest signal from one probe and than put in the "ans" list. But I'm a bit confused as to how i can no correctly identify the right frequency. As there should i theory be one main frequency. How can I correctly estimate the "main" frequency from all this data from all the noise
For reference I'm modeling an Von Karmann vortex street and I'm looking for the frequency of vortex shedding. https://en.wikipedia.org/wiki/K%C3%A1rm%C3%A1n_vortex_street
Can anyone help me on how to solve this?
The line
freq1 = np.fft.fftfreq(len(C), d= (number[1] - number[0]))
Only generates an index going from
freq1 = [0, 1, ..., len(C)/2-1, -len(C)/2, ..., -1] / (d*len(C))
Which is useful to compute your frequencies array as
freq[i] = freq1[i]*alpha
Where alpha is your basic wavenumber computed as
alpha = 1/Ts
Being Ts your sampling period. I think that because freq1 is not scaled you array of frequencies is so high.
Note that if you are sampling your data using different time steps you will need to interpolate it at in a evenly space domain using numpy.interp (for example).
To estimate the main frequency just find the index where the fft-transformed variable is higher and relate that index to freq[i].
I’m trying to plot data an in order to check my code, I’m making a comparison of the resulting plots with what has already been generated with Matlab. I am encountering several issues however with this:
Generally, the parsing of RINEX files works, and the general pattern of the presentation of the data looks similar to that the Matlab scripts plotted. However there are small deviations in data that should become apparent when zooming in on the data i.e. when using a smaller time series, for example plotting over a special 2 hour period, not 24 hours. In Matlab, this small discrepancy can be seen, and a polynomial fitting applied. However for the Python plots (the first plot shown below), the curved line of this two hour period appears “smooth” and does not deviate at all, like that seen in the Matlab script (the second plot shows the blue line as the data, against the red line of the polyfit, hence, the blue line shows a slight discrepancy at x=9.4). The Matlab script is assumed correct, as this deviation is because of an Seismic activity that disrupts the ionosphere temporarily. Please refer to the plots below:
The third plot is in Matlab, where this is simply the polyfit minus the live data.
Therefore, it is not clear just how this data is being plotted on the axes for the Python script, because the data appears to smooth? Nor if my code is wrong (see below) and somehow “smooths” out the data somehow:
#Calculating by looping through
for sv in range(32):
sat = self.obs_data_chunks_dataframe[sv, :]
#print "sat.index_{0}: {1}".format(sv+1, sat.index)
phi1 = sat['L1'] * LAMBDA_1 #Change units of L1 to meters
phi2 = sat['L2'] * LAMBDA_2 #Change units of L2 to meters
pr1 = sat['P1']
pr2 = sat['P2']
#CALCULATION: teqc Calculation
iono_teqc = COEFF * (pr2 - pr1) / 1000000 #divide to make values smaller (tbc)
print "iono_teqc_{0}: {1}".format(sv+1, iono_teqc)
#PLOTTING
#Plotting of the data
plt.plot(sat.index, iono_teqc, label=‘teqc’)
plt.xlabel('Time (UTC)')
plt.ylabel('Ionosphere Delay (meters)')
plt.title("Ionosphere Delay on {0} for Satellite {1}.".format(self.date, sv+1))
plt.legend()
ax = plt.gca()
ax.ticklabel_format(useOffset=False)
plt.grid()
if sys.platform.startswith('win'):
plt.savefig(winpath + '\Figure_SV{0}'.format(sv+1))
elif sys.platform.startswith('darwin'):
plt.savefig(macpath + 'Figure_SV{0}'.format(sv+1))
plt.close()
Following on from point 1, the polynomial fitting code below does not run the way I’d like, so I’m overlooking something here. I assume this has to do with the data used upon the x,y-axes but can’t pinpoint exactly what. Would anyone know where I am going wrong here?
#Zoomed in plots
if sv == 19:
#Plotting of the data
plt.plot(sat.index, iono_teqc, label=‘teqc’) #sat.index to plot for time in UTC
plt.xlim(8, 10)
plt.xlabel('Time (UTC)')
plt.ylabel('Ionosphere Delay (meters)')
plt.title("Ionosphere Delay on {0} for Satellite {1}.".format(self.date, sv+1))
plt.legend()
ax = plt.gca()
ax.ticklabel_format(useOffset=False)
plt.grid()
#Polynomial fitting
coefficients = np.polyfit(sat.index, iono_teqc, 2)
plt.plot(coefficients)
if sys.platform.startswith('win'):
#os.path.join(winpath, 'Figure_SV{0}'.format(sv+1))
plt.savefig(winpath + '\Zoom_SV{0}'.format(sv+1))
elif sys.platform.startswith('darwin'):
plt.savefig(macpath + 'Zoom_SV{0}'.format(sv+1))
plt.close()
My RINEX file comprises 32 satellites. However when trying to generate the plots for all 32, I receive:
IndexError: index 31 is out of bounds for axis 0 with size 31
Changing the code below to 31 solves this partly, only excluding the 32nd satellite. I’d like to also plot for satellite 32. The functions for the parsing, and formatting of the data are given below:
def read_obs(self, RINEXfile, n_sat, sat_map):
obs = np.empty((TOTAL_SATS, len(self.obs_types)), dtype=np.float64) * np.NaN
lli = np.zeros((TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
signal_strength = np.zeros((TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
for i in range(n_sat):
# Join together observations for a single satellite if split across lines.
obs_line = ''.join(padline(RINEXfile.readline()[:-1], 16) for _ in range((len(self.obs_types) + 4) / 5))
#obs_line = ''.join(padline(RINEXfile.readline()[:-1], 16) for _ in range(2))
#while obs_line
for j in range(len(self.obs_types)):
obs_record = obs_line[16*j:16*(j+1)]
obs[sat_map[i], j] = floatornan(obs_record[0:14])
lli[sat_map[i], j] = digitorzero(obs_record[14:15])
signal_strength[sat_map[i], j] = digitorzero(obs_record[15:16])
return obs, lli, signal_strength
def read_data_chunk(self, RINEXfile, CHUNK_SIZE = 10000):
obss = np.empty((CHUNK_SIZE, TOTAL_SATS, len(self.obs_types)), dtype=np.float64) * np.NaN
llis = np.zeros((CHUNK_SIZE, TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
signal_strengths = np.zeros((CHUNK_SIZE, TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
epochs = np.zeros(CHUNK_SIZE, dtype='datetime64[us]')
flags = np.zeros(CHUNK_SIZE, dtype=np.uint8)
i = 0 #ggfrfg
while True:
hdr = self.read_epoch_header(RINEXfile)
if hdr is None:
break
epoch_time, flags[i], sats = hdr
#epochs[i] = np.datetime64(epoch_time)
epochs[i] = epoch_time
sat_map = np.ones(len(sats)) * -1
for n, sat in enumerate(sats):
if sat[0] == 'G':
sat_map[n] = int(sat[1:]) - 1
obss[i], llis[i], signal_strengths[i] = self.read_obs(RINEXfile, len(sats), sat_map)
i += 1
if i >= CHUNK_SIZE:
break
return obss[:i], llis[:i], signal_strengths[:i], epochs[:i], flags[:i]
def read_data(self, RINEXfile):
obs_data_chunks = []
while True:
obss, _, _, epochs, _ = self.read_data_chunk(RINEXfile)
epochs = epochs.astype(np.int64)
epochs = np.divide(epochs, float(3600.000))
if obss.shape[0] == 0:
break
obs_data_chunks.append(pd.Panel(
np.rollaxis(obss, 1, 0),
items=['G%02d' % d for d in range(1, 33)],
major_axis=epochs,
minor_axis=self.obs_types
).dropna(axis=0, how='all').dropna(axis=2, how='all'))
self.obs_data_chunks_dataframe = obs_data_chunks[0]
Any suggestions?
Cheers, pymat.
I managed to solve Qu1 as it was a conversion issue with my calculation that was overlooked, the other two points are however open...
I'm trying to change the colour of a line on matplotlib subject to a condition.
Basically I take a rolling average and a rolling standard deviation. I plot the rolling average, but I would like to change the line colour if the standard deviation corresponding to that average is over the threshold I set. This is not the color of the whole line, just the bits that are over the threshol. Mostly my data is set using pandas
Alternatively I could shade it instead.
This link is useful, although I cannot figure out how to apply it to my situation.
http://nbviewer.ipython.org/urls/raw.github.com/dpsanders/matplotlib-examples/master/colorline.ipynb
EDIT COde: although, overly complicated for the question,
(I know the functions are too long at the moment)
def av_rel_track(rel_values):
#blade==0
avg_rel_track=[]
for i in range(0, int(nb)):
av_values=Series([])
rel_blade=rel_values[i]
rel_blade=rel_blade.fillna(0)
av_values=[]
for num in range(0, int (navg)):
av_values.append( np.nan)
#loops over each revolution(row)
for rev in range(int(navg),len(rel_blade)):
#select section to be number of averages long
N=rev-int(navg)+1
section=rel_blade.loc[N:rev]
#check section for five consecutive zeros
checker=check5(section)
#if there is five con zeros, av_value is zero
if checker==True:
av_value=0
else:
#finds the number of zeros in the section
nz=len (section)-len(section.nonzero()[0])
while nz>0:
#whilst there is a zero, extend average by one
N=N-1
if N<0:
break
new_val=rel_blade.ix[N]
section=rel_blade[N:rev+1]
#checks if new value is zero
if new_val!=0:
nz=nz-1
#checks extended section does not contain 5 consec zeros
checker=check5(section)
if checker==True:
av_value=0
else:
#sets av_value to 0if the range extends beyond the first value of rel_values
if N<0:
av_value=0
else:
#calculates the mean of the sctinon(not including nans)
section=zero_to_nan(section)
av_value=stats.nanmean(section)
av_values.append(av_value)
av_values=zero_to_nan(av_values)
rel_values["a%s" % i]=av_values
av_track=DataFrame({1:rel_values['a0'], 2:rel_values['a1'],3:rel_values['a2'],4:rel_values['a3'],5:rel_values['a4']})
return av_track
def sd_rel_track(rel_values):
for i in range(0, int(nb)):
sd_values=Series([])
rel_blade=rel_values[i]
rel_blade=rel_blade.fillna(0)
sd_values=[]
for num in range(0, int (navg)):
sd_values.append( np.nan)
#loops over each revolution(row)
for rev in range(int(navg),len(rel_blade)):
#select section to be number of averages long
N=rev-int(navg)+1
section=rel_blade.loc[N:rev]
#check section for five consecutive zeros
checker=check5(section)
#if there is five con zeros, av_value is zero
if checker==True:
sd_value=0
else:
#finds the number of zeros in the section
nz=len (section)-len(section.nonzero()[0])
while nz>0:
#whilst there is a zero, extend average by one
N=N-1
if N<0:
break
new_val=rel_blade.ix[N]
section=rel_blade[N:rev+1]
#checks if new value is zero
if new_val!=0:
nz=nz-1
#checks extended section does not contain 5 consec zeros
checker=check5(section)
if checker==True:
sd_value=0
else:
#sets av_value to 0if the range extends beyond the first value of rel_values
if N<0:
sd_value=0
else:
#calculates the mean of the sctinon(not including nans)
section=zero_to_nan(section)
sd_value=stats.nanstd(section)
sd_values.append(sd_value)
sd_values=zero_to_nan(sd_values)
rel_values["sd%s" % i]=sd_values
sd_track=DataFrame({1:rel_values['sd0'], 2:rel_values['sd1'],3:rel_values['sd2'],4:rel_values['sd3'],5:rel_values['sd4']})
sumsd= sd_track.sum(axis=1)
return sumsd
def plot():
plt.figure()
plt.plot(av_values)
plt.show()
plt.figure()
plt.plot(sd_values)
plt.show()
Using
http://nbviewer.ipython.org/urls/raw.github.com/dpsanders/matplotlib-examples/master/colorline.ipynb ,
In[4], you can add something like:
x = np.linspace(0, 4.*np.pi, 1000)
y = np.sin(x)
z = np.zeros(1000)
for i in range(1000):
if math.cos(x[i])>0.7:
z[i]=1
fig, axes = plt.subplots()
colorline(x, y, z)
plt.xlim(x.min(), x.max())
plt.ylim(-1.0, 1.0)
plt.show()