manipulating a .dat file and plotting cumulative data - python

I want to plot a quantity from a tedious-to-look-at .dat file, the #time column in the file extends from 0s to 70s, but I need to take a closer look at data (Nuclear Energy, in this case) from 25s to 35s.
I was wondering if there is a way I can manipulate the time column and corresponding other columns to record and plot data only for the required time span.
I already have some code which does the job for me for 0-70s:
import matplotlib
matplotlib.use('Agg')
import os
import numpy as np
import matplotlib.pyplot as plt
import string
import math
# reads from flash.dat
def getQuantity(folder, basename, varlist):
# quantities[0] should contain only the quantities of varlist[0]
quantities =[]
for i in range(len(varlist)):
quantities.append([])
with open(folder + "/" + basename + ".dat", 'r') as f: # same as f = open(...) but closes the file afterwards.
for line in f:
if not ('#' or 'Inf') in line: # the first line and restarting lines look like this.
for i in range(len(varlist)):
if(varlist[i]==NUCLEAR_ENERGY and len(quantities[i])>0):
quantities[i].append(float(line.split()[varlist[i]])+quantities[i][-1])
else:
quantities[i].append(float(line.split()[varlist[i]]))
return quantities
# end def getQuantity
#create plot
plt.figure(1)
TIME = 0
NUCLEAR_ENERGY = 18
labels = ["time", "Nuclear Energy"]
flashFolder1 = '/home/trina/Pictures' # should be the flash NOT the flash/object folder.
lab1 = '176'
filename = 'flash' # 'flash' for flash.dat
nHorizontal = 1 # number of Plots in Horizontal Direction. Vertical Direction is set by program.
outputFilename = 'QuantityPlots_Nuclear.png'
variables = [NUCLEAR_ENERGY]
#Adjustments to set the size
nVertical = math.ceil(float(len(variables))/nHorizontal) # = 6 for 16 = len(variables) & nHorizontal = 3.
F = plt.gcf() #get figure
DPI = F.get_dpi()
DefaultSize = F.get_size_inches()
F.set_size_inches( DefaultSize[0]*nHorizontal, DefaultSize[1]*nVertical ) #build no of subplots in figure
variables.insert(0,TIME) # time as needed as well
data1 = getQuantity(flashFolder1, filename, variables)
time1 = np.array(data1[0]) #time is first column
for n in [n+1 for n in range(len(variables)-1)]: #starts at 1
ax=plt.subplot(nVertical, nHorizontal, n) #for example (6,3,0 to 15) inside loop for 16 variables
if (min(data1[n])<0.0 or abs((min(data1[n]))/(max(data1[n])))>=1.e-2):
plt.plot(time1, data1[n],label=lab1) #, label = labels[variables[n]])
legend = ax.legend(loc='upper right', frameon=False)
else:
plt.semilogy(time1, data1[n],label=lab1) #, label = labels[variables[n]])
legend = ax.legend(loc='upper right', frameon=False)
plt.savefig(outputFilename)
Here is the figure I can produce from this code:
and for your convenience I am also sharing the .dat file:
https://www.dropbox.com/s/w4jbxmln9e83355/flash.dat?dl=0
Your suggestions are most appreciated.

UPDATE: plot cumulative nuclear energy:
x = df.query('25 <= time <= 35').set_index('time')
x['cum_nucl_energy'] = x.Nuclear_Energy.cumsum()
x.cum_nucl_energy.plot(figsize=(12,10))
Old answer:
Using Pandas module
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
fn = r'D:\temp\.data\flash.dat'
df = pd.read_csv(fn, sep='\s+', usecols=[0, 18], header=None, skiprows=[0], na_values=['Infinity'])
df.columns=['time', 'Nuclear_Energy']
df.query('25 <= time <= 35').set_index('time').plot(figsize=(12,10))
plt.show()
plt.savefig('d:/temp/out.png')
Result:
Explanation:
In [43]: pd.options.display.max_rows
Out[43]: 50
In [44]: pd.options.display.max_rows = 12
In [45]: df
Out[45]:
time Nuclear_Energy
0 0.000000e+00 0.000000e+00
1 1.000000e-07 -4.750169e+29
2 2.200000e-07 -5.699325e+29
3 3.640000e-07 -6.838392e+29
4 5.368000e-07 -8.206028e+29
5 7.441600e-07 -9.837617e+29
... ... ...
10210 6.046702e+01 7.160630e+44
10211 6.047419e+01 7.038907e+44
10212 6.048137e+01 6.934600e+44
10213 6.048856e+01 6.847015e+44
10214 6.049577e+01 6.765220e+44
10215 6.050298e+01 6.661930e+44
[10216 rows x 2 columns]
In [46]: df.query('25 <= time <= 35')
Out[46]:
time Nuclear_Energy
4534 25.001663 1.559398e+43
4535 25.006781 1.567793e+43
4536 25.011900 1.575844e+43
4537 25.017021 1.583984e+43
4538 25.022141 1.592015e+43
4539 25.027259 1.600200e+43
... ... ...
6521 34.966427 8.181516e+41
6522 34.972926 8.538806e+41
6523 34.979425 8.913695e+41
6524 34.985925 9.304403e+41
6525 34.992429 9.731310e+41
6526 34.998941 1.019862e+42
[1993 rows x 2 columns]
In [47]: df.query('25 <= time <= 35').set_index('time')
Out[47]:
Nuclear_Energy
time
25.001663 1.559398e+43
25.006781 1.567793e+43
25.011900 1.575844e+43
25.017021 1.583984e+43
25.022141 1.592015e+43
25.027259 1.600200e+43
... ...
34.966427 8.181516e+41
34.972926 8.538806e+41
34.979425 8.913695e+41
34.985925 9.304403e+41
34.992429 9.731310e+41
34.998941 1.019862e+42
[1993 rows x 1 columns]

Related

Adding a 45 degree line to a time series stock data plot

I guess this is supposed to be simple.. But I cant seem to make it work.
I have some stock data
import pandas as pd
import numpy as np
df = pd.DataFrame(index=pd.date_range(start = "06/01/2018", end = "08/01/2018"),
data = np.random.rand(62)*100)
I am doing some analysis on it, this results of my drawing some lines on the graph.
And I want to plot a 45 line somewhere on the graph as a reference for lines I drew on the graph.
What I have tried is
x = df.tail(len(df)/20).index
x = x.reset_index()
x_first_val = df.loc[x.loc[0].date].adj_close
In order to get some point and then use slope = 1 and calculate y values.. but this sounds all wrong.
Any ideas?
Here is a possibility:
import pandas as pd
import numpy as np
df = pd.DataFrame(index=pd.date_range(start = "06/01/2018", end = "08/01/2018"),
data=np.random.rand(62)*100,
columns=['data'])
# Get values for the time:
index_range = df.index[('2018-06-18' < df.index) & (df.index < '2018-07-21')]
# get the timestamps in nanoseconds (since epoch)
timestamps_ns = index_range.astype(np.int64)
# convert it to a relative number of days (for example, could be seconds)
time_day = (timestamps_ns - timestamps_ns[0]) / 1e9 / 60 / 60 / 24
# Define y-data for a line:
slope = 3 # unit: "something" per day
something = time_day * slope
trendline = pd.Series(something, index=index_range)
# Graph:
df.plot(label='data', alpha=0.8)
trendline.plot(label='some trend')
plt.legend(); plt.ylabel('something');
which gives:
edit - first answer, using dayofyear instead of the timestamps:
import pandas as pd
import numpy as np
df = pd.DataFrame(index=pd.date_range(start = "06/01/2018", end = "08/01/2018"),
data=np.random.rand(62)*100,
columns=['data'])
# Define data for a line:
slope = 3 # unit: "something" per day
index_range = df.index[('2018-06-18' < df.index) & (df.index < '2018-07-21')]
dayofyear = index_range.dayofyear # it will not work around the new year...
dayofyear = dayofyear - dayofyear[0]
something = dayofyear * slope
trendline = pd.Series(something, index=index_range)
# Graph:
df.plot(label='data', alpha=0.8)
trendline.plot(label='some trend')
plt.legend(); plt.ylabel('something');

Converting relative time from CSV file into absolute time

import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import Rbf, InterpolatedUnivariateSpline
data = np.genfromtxt('FTIR Data.csv', skip_header=1, delimiter=',', usecols=(1,2,3), names=['Time','Peakat2188cm1', 'water'] )
x=data['Time']
y1=data['Peakat2188cm1']
y2=data['water']
fig=plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
ius=InterpolatedUnivariateSpline
xs = np.linspace(x.min(), x.max(), 100)
s1=ius(x,y1)
s2=ius(x,y2)
ys1 = s1(xs)
ys2 = s2(xs)
ax2.plot(xs,ys1)
ax2.plot(xs,ys2)
ax1.set_ylabel('Peak at 2188 cm-1')
ax2.set_ylabel('water')
ax1.set_xlabel('RT (mins)')
plt.title('RT Vs Conc')
This is my code for reading data from a csv file which is an export data from my instrument. In excel file, i have manually converted the relative time into Time in minutes and got the right plot. But i want to convert the relative time directly in matplotlib when reading the relative time column in csv file. I have tried from different examples but couldnt get through. I am very new to python so can anyone please help with editing in my code. My actual data is in the following format. (this code is used to plot absolute time i.e. Time, which i already converted in excel before ploting in matplotlib)[enter image description here][1]
Relative Time,Peak at 2188 cm-1,water
00:00:51,0.572157,0.179023
00:02:51,0.520037,0.171217
00:04:51,0.551843,0.221285
00:06:50,0.566279,0.209182
00:09:26,0.022696,0.0161351
00:10:51,-0.00344509,0.0141303
00:12:51,0.555898,0.21082
00:14:51,0.519753,0.179563
00:16:51,0.503512,0.150133
00:18:51,0.498554,0.154512
00:20:51,0.00128343,-0.0129148
00:22:51,0.349077,0.0414234
00:24:50,0.360565,0.0522027
00:26:51,0.403705,0.0667703
Plot
At this moment, the Time column is still a string. You will have to convert this to minutes in some way
pandas.to_timedelta
import pandas as pd
column_names = ['Time','Peakat2188cm1', 'water']
df_orig = pd.read_csv(filename, sep=',')
df_orig.columns = column_names
time_in_minutes = pd.to_timedelta(df_orig['Time']).dt.total_seconds() / 60
semi-manually
time_in_minutes = [sum(int(x) * 60**i for i, x in enumerate(reversed(t.split(':')), -1)) for t in data['Time']]
explanation
This is the same as:
time_in_minutes = []
for t in data['Time']:
minutes = 0
# t = '00:00:51'
h_m_s = t.split(':')
# h_m_s = ['00', '00', '51']
s_m_h = list(enumerate(reversed(h_m_s), -1))
# s_m_h = [(-1, '51'), (0, '00'), (1, '00')]
for i, x in s_m_h:
# i = -1
# x = '51'
minutes += int(x) * 60 ** i
# minutes = 0.85
time_in_minutes.append(minutes)

How to index List/ numpy array in order to plot the data with matplotlib

I have a function f(x,t) = cos(t)*t + x and i want to display the change of the result over the width x and time t at discretised time steps t_i and discretised width steps x_j.
Now I am a while here on SX and feel really embarrassed to only can post such little code or in other words nothing (since nothing worked I have done...):
Nevertheless if someone has the time to help, I`d appreciate it.
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as pyplot
from astropy.io.ascii.latex import AASTex
def func(xi, ti):
res = np.cos(ti)*ti + xi
return res
timeSpacing = 100
timeStart = 0
timeEnd = 1
time = np.linspace(timeStart, timeEnd, timeSpacing)
widthSpacing = 300
widthStart = 0
widthEnd = 3
width = np.linspace(widthStart, widthEnd, widthSpacing)
resultList = [None]*timeSpacing
resultListInner = [None]*widthSpacing
for i, ithTime in enumerate(time):
for j, jthWidth in enumerate(width):
aas = np.zeros_like(width)
aas.fill(ithTime)
resultListInner[j] = ithTime, jthWidth, func(jthWidth, aas)
resultList[i] = resultListInner
So how do I correctly index the list and array and plot my data using matplotlib?
My plot should look like this:
where in my case the aperature should be the width x, the sky annulus is my time t and the RMS is my func(x,t).
A couple of points:
Numpy provides a very nice function for doing differences of array elements: diff
Matplotlib uses plot_wireframe for creating a plot that you would want (also using Numpy's meshgrid)
Now, combining these into what you may want would look something like this.
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
def func(xi, ti):
res = np.cos(ti)*np.sin(xi)
return res
timeSpacing = 20
timeStart = 0
timeEnd = 1
time = np.linspace(timeStart, timeEnd, timeSpacing)
widthSpacing = 50
widthStart = 0
widthEnd = 3
width = np.linspace(widthStart, widthEnd, widthSpacing)
X,T = np.meshgrid(width,time)
F = func(X,T)
DF = np.diff(np.diff(F,axis=0),axis=1)
fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.plot_wireframe(X[:-1,:-1],T[:-1,:-1],DF)
plt.show()
Note that diff is applied twice: once in each dimension axis= . I have also changed the toy function you provided to something that actually looks decent in this case.
For your more general use, it seems that you would want to just collect all of your F data into a 2D array, then proceed from the DF = line.

Graph Customization in python

currently I have a program that takes data and makes a histogram out of it, I know how to change the labels and stuff, but is there a way to make x-axis display the number range more frequently (badly worded I'll just give an example):so right now on the x-axis is shows the number values in increments of 5, but how can I make it show up in like increments of 2 or 1 or 3.
Current code:
#!/usr/bin/python
import operator
import matplotlib.pyplot as plt
import numpy as np
l=[]
with open("testdata") as f:
line = f.next()
f.next()# skip headers
nat = int(line.split()[
print nat
for line in f:
if line.strip():
l.append(map(float,line.split()[1:]))
b = 0
a = 1
distances = []
for b in range(53):
for a in range(b+1,54):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
distances.append(dp)
num_bins = 200 # <- number of bins for the histogram
(n, bins, patches) = plt.hist(distances, num_bins)
plt.title('Histogram')
plt.xlabel('Distance')
plt.ylabel('Frequency')
plt.show()
label_positions = np.arange(start, end, step, endpoint=True)
plt.xticks(label_positions)

Plotting Repeating Data Set from File using matplotlib and lists

this is my first post here, so I hope it goes well.
I have a file of data(about 2mb) in the format
angle (space) energy (space) counts
angle (space) energy (space) counts
angle (space) energy (space) counts, etc.
(this is data recorded from a particle accelerator running for ~170 hours, so the file is large)
Angle starts out at 0, and is 0 while energy goes up to about 4500, and then
angle increases by one and energy starts again at 0 and goes up to 4500. This repeats
until theta = 255.
I am trying to create a program that plots the number of counts versus the energy level, energy level being my x axis, and counts being my y axis. I have tried many solutions, but to no avail.
Any help given to me on this would be much appreciated.
My code is posted below.
import matplotlib.pyplot as plt
import numpy as np
import pylab
from numpy import *
from matplotlib.pyplot import *
import math
import sys
import scipy.optimize
"""
Usage
---------------
Takes a file in the format of
Theta |Rel_MeV |Counts
97 4024 0
97 4025 0
97 4026 6
97 4027 2
and graphs it
fileURL is the input for the file to put into the program
txt_Title is the graph label
"""
DEBUG = 1
fileURL = './ne19_peaks_all.dat'
txt_Title = 'Oxygen and Alpha Particle Relative Energy'
MeV_divide_factor = 100
ptSize = 5
MarkerType = '+'
MeV_max = 5000
def main():
# Read the file.
f2 = open(fileURL, 'r')
# read the whole file into a single variable, which is a list of every row of the file.
lines = f2.readlines()
f2.close()
# initialize some variable to be lists:
list_MeV = []
list_counts = []
for i in range(MeV_max):
list_MeV.append(i)
list_counts.append(0)
# scan the rows of the file stored in lines, and put the values into some variables:
for line in lines:
p = line.split()
MeV = float(p[1])/MeV_divide_factor
count = float(p[2])
list_counts[int(MeV)] += count
x_arr = np.array(list_MeV)
y_arr = np.array(list_counts)
plt.plot(x_arr, y_arr, MarkerType)
plt.title(txt_Title)
plt.show()
return 0
def func(x, a, b):
return a*x + b
if __name__ == '__main__':
status = main()
sys.exit(status)
Used a dictionary where each energy level was a key, and with the counts being the values

Categories

Resources