Efficient writing and saving of real-time data in Python? - python

I have a script that I am using to pull data from an accelerometer. I am able to specify the bandwidth of the accelerometer (250 Hz), which sets the data rate to 500 Hz for sampling.
The problem that I am facing is that the .txt file is not saving the number of samples that I am expecting, and I am asking for support here to understand if my lines of code to write the data to the text file are the reason for my bottleneck.
Once this file is saved in the data collection scipt, I am opening the .txt file in a Spyder script, that is creating a real-time calculation and visualization of the data, as it is being saved by the first script.
The following lines are used in the 'data_pull' script, which is reading the data from the accelerometer. The lines below are what I'm using to write to the text file.
file = open("C:\\Users\\User\\Documents\\test.txt","a")
writer = csv.writer(file)
while True:
writer.writerow(convert2g(readField()))
file.flush()
For Reference, two of the functions in the data pulling script from the accelerometer are shown below. I didn't include the entire script, because it is unrelated to the saving and writing to the text file.
def convert2g(raw_accs):
return [acc / CONVERT_CONST for acc in raw_accs]
def readField():
values = myBoard.Read(0x02, 18) #check register map
bin_mask = 0b11110000
acc_x_lsb = values[0] & bin_mask
acc_x_msb = values[1] << 8
acc_x = (acc_x_msb | acc_x_lsb)
twos_x = twos_comp(acc_x) >> 4
acc_y_lsb = values[2] & bin_mask
acc_y_msb = values[3] << 8
acc_y = (acc_y_msb | acc_y_lsb)
twos_y = twos_comp(acc_y) >> 4
acc_z_lsb = values[4] & bin_mask
acc_z_msb = values[5] << 8
acc_z = (acc_z_msb | acc_z_lsb)
twos_z = twos_comp(acc_z) >> 4
return twos_x, twos_y, twos_z
The code below is what I'm using to build the animation.
import pandas as pd
import sys
import numpy as np
import easygui
import matplotlib.pyplot as plt
import matplotlib.animation as animation
def animate(i):
data = pd.read_csv("C:\\Users\\User\\Documents\\test.txt")
data.columns = ['X', 'Y', 'Z']
xar = range(len(data))
yar = pd.DataFrame(data['Z'])
yar = yar[1050:]
xar = xar[1050:]
std = yar.rolling(window=10000).std()
std = std.as_matrix()
yar = data.as_matrix()
yar = yar[1050:len(data)]
ax1.clear()
ax1.set_xlabel("Sample Number")
ax1.set_ylabel("Standard Deviation (g)")
ax3.clear()
ax3.set_xlabel("Sample Number")
ax3.set_ylabel("Acceleration (g)")
ax1.plot(xar, std)
ax3.plot(xar, yar)
ax1.set_title('Rolling Standard Deviation')
ax3.set_title('Original Data')
fig, (ax1, ax3) = plt.subplots(2, sharex = True)
fig.subplots_adjust(hspace=1.5)
ani = animation.FuncAnimation(fig, animate, interval=.01)
plt.show()

Related

Plotting multiple realtime diagrams via multithreading in python

I used threading library to plot 2 real-time diagrams using matplotlib.
from fbm import MBM
import matplotlib.pyplot as plt
import threading
plt.style.use('ggplot')
fig, ax = plt.subplots(nrows=2, ncols=1)
def h_90(t):
return 0.9
def h_75(t):
return 0.75
def thread_fgn_9():
x_vec = []
y_vec = []
i = 1
while True:
f = MBM(n=1, hurst=h_90, length=1, method='riemannliouville')
fgn_sample = f.mgn()
x_vec.append(i)
y_vec.append(fgn_sample[0])
i += 1
ax[0].plot(x_vec, y_vec, "g-o")
plt.pause(0.1)
plt.show()
def thread_fgn_75():
x_vec_ = []
y_vec_ = []
i = 1
while True:
f = MBM(n=1, hurst=h_75, length=1, method='riemannliouville')
fgn_sample = f.mgn()
x_vec_.append(i)
y_vec_.append(fgn_sample[0])
i += 1
ax[1].plot(x_vec_, y_vec_, "r-o")
plt.pause(0.2)
plt.show()
if __name__ == "__main__":
plt.ion()
x2 = threading.Thread(target=thread_fgn_75(), name="H_75")
x2.daemon = True
x2.start()
x1 = threading.Thread(target=thread_fgn_9(), name="H_90")
x1.daemon = True
x1.start()
I expect to see to plots being plotted real-time but I see something like below:
can anybody understand what is wrong in my code ?? The code is completed and you can simply just copy/paste in your IDE to run it.
Thanks
================= New change ================
I just changed the main section as below:
if __name__ == "__main__":
x1 = threading.Thread(target=thread_fgn_9(), name="H_90").start()
x2 = threading.Thread(target=thread_fgn_75(), name="H_75").start()
plt.show()
but still the result is the same as before.
======New New change ===============
if __name__ == "__main__":
x1 = threading.Thread(target=thread_fgn_9, name="H_90").start()
x2 = threading.Thread(target=thread_fgn_75, name="H_75").start()
#plt.pause(0.2)
plt.show()
I just erased the parentheses in target=function_name
it seems it is correct but the plot is not showing smoothly. Also I see an error in console like this:
File "/usr/local/lib/python3.9/site-packages/matplotlib/transforms.py", line 312, in xmin
return np.min(self.get_points()[:, 0])
File "<__array_function__ internals>", line 5, in amin
RecursionError: maximum recursion depth exceeded
-------Final Change-----------------------
The best way to do this in matplotlib is below code:
plt.style.use('ggplot')
fig, ax = plt.subplots(nrows=2, ncols=1)
mutex = Lock()
def thread_fgn_9():
print(threading.current_thread().getName())
x_vec = []
y_vec = []
i = 1
while True:
#mutex.acquire()
f = MBM(n=1, hurst=h_90, length=1, method='riemannliouville')
fgn_sample = f.mgn()
x_vec.append(i)
y_vec.append(fgn_sample[0])
i += 1
ax[0].plot(x_vec, y_vec, "g-o")
plt.pause(0.01)
#mutex.release()
def thread_fgn_75():
print(threading.current_thread().getName())
x_vec_ = []
y_vec_ = []
i = 1
while True:
#mutex.acquire()
f = MBM(n=1, hurst=h_75, length=1, method='riemannliouville')
fgn_sample = f.mgn()
x_vec_.append(i)
y_vec_.append(fgn_sample[0])
i += 1
ax[1].plot(x_vec_, y_vec_, "r-o")
plt.pause(0.01)
#mutex.release()
if __name__ == "__main__":
x1 = multiprocessing.Process(target=thread_fgn_9, name="H_90").start()
x2 = multiprocessing.Process(target=thread_fgn_75, name="H_75").start()
plt.show()
I believe the reason is because both processes try to write in one single main plot. In order to have a multiple smooth plot changing over time, we need to take another technique.
thread_fgn_9 is being called and blocking even before it is sent to the thread. Be sure to send the function itself.
plt.pause or plt.show need to be called from the main thread. Additionally, Matplotlib makes no thread safety guarantees in general, so you should avoid this concept entirely unless you know exactly what you are doing. Consider the techniques in this question instead: Fast Live Plotting in Matplotlib / PyPlot

Jupyter notebook and live plotting data gathered by a keithley 2450

So I'm running some measurements using a keithley 2450 source meter with this code:
def res_trace(n = None, max_v = None, min_v = None, data_points = None, r_crit = None,
ilim = None):
beep(164,0.5)
# check values and provide defaults
n = n or 0
data_points = data_points or 100
max_v = max_v or 0.5
min_v = min_v or -0.5
r_crit = r_crit or 1e+7
ilim = ilim or 'MAXimum'
v = np.linspace( min_v, max_v, num = data_points)
i = []
res_run = []
# reset keithley
# just so we can use them without any prior settings
reset()
# set up I measurement systems
keith.write(':SENSe:FUNCtion "CURR"')
keith.write(':SENSe:CURRent:RANGe:AUTO 1')
keith.write(':SENSe:CURRent:UNIT AMP')
keith.write(':SENSe:CURRent:NPLCycles DEFault')
keith.write(':SENSe:COUNt DEFault')
keith.write(':SENSe:CURRent:OCOM ON')
# set up V source, (Hi Michale here!)
keith.write(':SOURce:FUNCtion VOLT')
keith.write(':SOURce:VOLTage:RANGe '+str(max_v))
keith.write(':SOURce:VOLTage:ILIMit '+ ilim)
# Turn keith on
keith_output('on')
for j in v:
keith.write(':SOURce:VOLT '+str(j))
itemp = float(keith.query(':MEASure:CURRent?'))
i.append(itemp)
###
# turn them off
keith_output('off')
# plot
plt.figure()
plt.title('Res trace # '+str(n))
plt.plot(v,i, c = 'm')
plt.xlabel('V')
plt.ylabel('I')
plt.grid()
plt.show()
I'm currently running the script and it takes a few seconds for keithley to take measurements and return values. I'd like to get a way to live plot the data within each loop as its being collected, but I have no idea how to go about this and want the process to be as simple as possible. Any suggestions?
Thanks
You can use Jupyterplot and create a realtime plot like this:
from jupyterplot import ProgressPlot
import numpy as np
pp = ProgressPlot()
for i in range(1000):
pp.update(np.sin(i / 100))
pp.finalize()

Converting relative time from CSV file into absolute time

import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import Rbf, InterpolatedUnivariateSpline
data = np.genfromtxt('FTIR Data.csv', skip_header=1, delimiter=',', usecols=(1,2,3), names=['Time','Peakat2188cm1', 'water'] )
x=data['Time']
y1=data['Peakat2188cm1']
y2=data['water']
fig=plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
ius=InterpolatedUnivariateSpline
xs = np.linspace(x.min(), x.max(), 100)
s1=ius(x,y1)
s2=ius(x,y2)
ys1 = s1(xs)
ys2 = s2(xs)
ax2.plot(xs,ys1)
ax2.plot(xs,ys2)
ax1.set_ylabel('Peak at 2188 cm-1')
ax2.set_ylabel('water')
ax1.set_xlabel('RT (mins)')
plt.title('RT Vs Conc')
This is my code for reading data from a csv file which is an export data from my instrument. In excel file, i have manually converted the relative time into Time in minutes and got the right plot. But i want to convert the relative time directly in matplotlib when reading the relative time column in csv file. I have tried from different examples but couldnt get through. I am very new to python so can anyone please help with editing in my code. My actual data is in the following format. (this code is used to plot absolute time i.e. Time, which i already converted in excel before ploting in matplotlib)[enter image description here][1]
Relative Time,Peak at 2188 cm-1,water
00:00:51,0.572157,0.179023
00:02:51,0.520037,0.171217
00:04:51,0.551843,0.221285
00:06:50,0.566279,0.209182
00:09:26,0.022696,0.0161351
00:10:51,-0.00344509,0.0141303
00:12:51,0.555898,0.21082
00:14:51,0.519753,0.179563
00:16:51,0.503512,0.150133
00:18:51,0.498554,0.154512
00:20:51,0.00128343,-0.0129148
00:22:51,0.349077,0.0414234
00:24:50,0.360565,0.0522027
00:26:51,0.403705,0.0667703
Plot
At this moment, the Time column is still a string. You will have to convert this to minutes in some way
pandas.to_timedelta
import pandas as pd
column_names = ['Time','Peakat2188cm1', 'water']
df_orig = pd.read_csv(filename, sep=',')
df_orig.columns = column_names
time_in_minutes = pd.to_timedelta(df_orig['Time']).dt.total_seconds() / 60
semi-manually
time_in_minutes = [sum(int(x) * 60**i for i, x in enumerate(reversed(t.split(':')), -1)) for t in data['Time']]
explanation
This is the same as:
time_in_minutes = []
for t in data['Time']:
minutes = 0
# t = '00:00:51'
h_m_s = t.split(':')
# h_m_s = ['00', '00', '51']
s_m_h = list(enumerate(reversed(h_m_s), -1))
# s_m_h = [(-1, '51'), (0, '00'), (1, '00')]
for i, x in s_m_h:
# i = -1
# x = '51'
minutes += int(x) * 60 ** i
# minutes = 0.85
time_in_minutes.append(minutes)

Producing a sleep log visualisation in Python

I have a data-set of sleep-time information and would like to produce a visualisation of it using Python.
The .csv data-set I have looks like the this:
SleepStartDate,SleepStartTime,SleepStopTime
17/03/2017,23:45,07:25
19/03/2017,01:05,09:10
19/03/2017,23:50,08:25
The visualisation I want to produce should be similar to the following:
Image source: http://quantifiedself.com/wp-content/uploads/2015/08/qs2.png
I know this is a really simple visualisation, and imagine that it's built into some already existing library, but my best Googling efforts have been unable to locate it. I'd much appreciate it if someone could point me in the right direction.
Thanks in advance for your time and wisdom.
Wasn't able to find a library with functionality to do what I wanted, so ended up writing a script to do it for myself:
Script:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import matplotlib.patches as patches
import datetime as dt
import csv
import sys
MINUTES_IN_DAY = 1440.0
COLUMN_COLOUR = 'b'
# Graph data using matplotlib visualization
def plotData(data,columnColour,maxDate,minDate):
# Set up an invisible background scatterplot give graph the correct size
# Make a series of events that are one day apart
x = mpl.dates.drange(minDate,maxDate,dt.timedelta(days=1))
# Offset first event to top of graph to give correct height
x[0] += 0.85
# Extract the time using a modulo 1, and adding an arbitrary base date
# int used so that y-axis starts at midnight
times = x % 1 + int(x[0])
fig = plt.figure()
fig.suptitle('Daily Sleep Patterns', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
# Set background scatterplot to invisible
ax.plot_date(x, times, 'ro', color='w', visible=False)
ax.yaxis_date()
fig.autofmt_xdate()
start, end = ax.get_ylim()
# Fix division sizes and labels to show hours on y-axis
hourDivision = 1.0 / 24.0
ax.yaxis.set_ticks(np.arange(start,end,hourDivision))
ax.set_yticklabels(['Midnight','1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','Midday','1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm','Midnight'])
# Iterate through data
for i in range(0,len(data)):
# If period starts and finishes on different days, slit and add to both days
if data[i].startTime > data[i].stopTime:
currentDataItem = data[i]
currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
currentDate -= dt.timedelta(days=0.5)
tomorrow = currentDate + dt.timedelta(days=1)
plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=1, facecolor=columnColour, alpha=0.5)
theDayAfterTomorrow = tomorrow + dt.timedelta(days=1)
plt.axvspan(xmin=tomorrow, xmax=theDayAfterTomorrow, ymin=0, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)
# Else, add to given day
else:
currentDataItem = data[i]
currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
currentDate -= dt.timedelta(days=0.5)
tomorrow = currentDate + dt.timedelta(days=1)
plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)
ax.set_ylabel('Hours',fontweight='bold')
#ax.legend()
ax.grid(True)
plt.show()
# Read data from csv file
def readDataFromFile(dataFile):
f = open(dataFile,'rt')
listOfInputLists = []
try:
reader = csv.reader(f)
for row in reader:
listOfInputLists.append(row)
finally:
f.close()
return listOfInputLists
# Class to store time and date data read from file
class sleepInstance(object):
def __init__(self,listOfInputLists):
self.day = 0
self.month = 0
self.year = 0
self.formatDate(listOfInputLists[0])
self.startTime = self.formatTime(listOfInputLists[1])
self.stopTime = self.formatTime(listOfInputLists[2])
# Extracts date information variables
def formatDate(self,unformattedDate):
date = dt.datetime.strptime(unformattedDate,"%d/%m/%y")
self.day = int(date.strftime("%d"))
self.month = int(date.strftime("%m"))
self.year = int(date.strftime("%Y"))
# Formats time as a decimal fraction of day, for use in graph
def formatTime(self,unformattedTime):
timeSinceMidnight = dt.datetime.strptime(unformattedTime,'%H:%M:%S')
midnight = dt.datetime(1900,1,1)
minutesSinceMidnight = ((timeSinceMidnight - midnight).total_seconds() / 60.0)
fractionOfDay = minutesSinceMidnight / MINUTES_IN_DAY
return fractionOfDay
# Formats data read from file as a list of sleepInstance objects
def formatDataForPlot(listOfInputLists):
sleeps = []
for i in range(1,len(listOfInputLists)):
sleeps.append(sleepInstance(listOfInputLists[i]))
return sleeps
# Extracts earliest (min) and latest (max) dates from data, for use in setting graph limits
def getMaxAndMinDates(plotDataList):
dateTimeList = []
for item in plotDataList:
nextDate = dt.datetime(item.year,item.month,item.day)
dateTimeList.append(nextDate)
maxDate = max(dateTimeList)
minDate = min(dateTimeList)
return maxDate, minDate
dataFile = 'sleepData.csv'
listOfInputLists = readDataFromFile(dataFile)
plotDataList = formatDataForPlot(listOfInputLists)
maxDate, minDate = getMaxAndMinDates(plotDataList)
plotData(plotDataList,COLUMN_COLOUR,maxDate,minDate)
Input:
Date,Start,Finish
17/03/17,03:15:00,03:55:00
17/03/17,06:20:00,06:35:00
17/03/17,09:00:00,09:40:00
17/03/17,13:10:00,13:35:00
17/03/17,15:45:00,16:30:00
17/03/17,18:45:00,19:25:00
17/03/17,21:15:00,21:35:00
18/03/17,00:30:00,02:00:00
18/03/17,04:50:00,05:05:00
18/03/17,08:20:00,08:40:00
18/03/17,12:30:00,13:10:00
18/03/17,16:30:00,17:00:00
18/03/17,18:45:00,19:00:00
18/03/17,20:30:00,21:00:00
19/03/17,00:00:00,12:00:00
19/03/17,18:00:00,23:59:00
19/03/17,13:00:00,14:00:00
20/03/17,12:00:00,11:00:00
Output:
May fancy it up a bit when I have time: https://github.com/ambidextrous/timeLogGrapher
The classic Python choice would be to use the matplotlib package. Looking at your sample graph it looks like a vertical bar graph.

manipulating a .dat file and plotting cumulative data

I want to plot a quantity from a tedious-to-look-at .dat file, the #time column in the file extends from 0s to 70s, but I need to take a closer look at data (Nuclear Energy, in this case) from 25s to 35s.
I was wondering if there is a way I can manipulate the time column and corresponding other columns to record and plot data only for the required time span.
I already have some code which does the job for me for 0-70s:
import matplotlib
matplotlib.use('Agg')
import os
import numpy as np
import matplotlib.pyplot as plt
import string
import math
# reads from flash.dat
def getQuantity(folder, basename, varlist):
# quantities[0] should contain only the quantities of varlist[0]
quantities =[]
for i in range(len(varlist)):
quantities.append([])
with open(folder + "/" + basename + ".dat", 'r') as f: # same as f = open(...) but closes the file afterwards.
for line in f:
if not ('#' or 'Inf') in line: # the first line and restarting lines look like this.
for i in range(len(varlist)):
if(varlist[i]==NUCLEAR_ENERGY and len(quantities[i])>0):
quantities[i].append(float(line.split()[varlist[i]])+quantities[i][-1])
else:
quantities[i].append(float(line.split()[varlist[i]]))
return quantities
# end def getQuantity
#create plot
plt.figure(1)
TIME = 0
NUCLEAR_ENERGY = 18
labels = ["time", "Nuclear Energy"]
flashFolder1 = '/home/trina/Pictures' # should be the flash NOT the flash/object folder.
lab1 = '176'
filename = 'flash' # 'flash' for flash.dat
nHorizontal = 1 # number of Plots in Horizontal Direction. Vertical Direction is set by program.
outputFilename = 'QuantityPlots_Nuclear.png'
variables = [NUCLEAR_ENERGY]
#Adjustments to set the size
nVertical = math.ceil(float(len(variables))/nHorizontal) # = 6 for 16 = len(variables) & nHorizontal = 3.
F = plt.gcf() #get figure
DPI = F.get_dpi()
DefaultSize = F.get_size_inches()
F.set_size_inches( DefaultSize[0]*nHorizontal, DefaultSize[1]*nVertical ) #build no of subplots in figure
variables.insert(0,TIME) # time as needed as well
data1 = getQuantity(flashFolder1, filename, variables)
time1 = np.array(data1[0]) #time is first column
for n in [n+1 for n in range(len(variables)-1)]: #starts at 1
ax=plt.subplot(nVertical, nHorizontal, n) #for example (6,3,0 to 15) inside loop for 16 variables
if (min(data1[n])<0.0 or abs((min(data1[n]))/(max(data1[n])))>=1.e-2):
plt.plot(time1, data1[n],label=lab1) #, label = labels[variables[n]])
legend = ax.legend(loc='upper right', frameon=False)
else:
plt.semilogy(time1, data1[n],label=lab1) #, label = labels[variables[n]])
legend = ax.legend(loc='upper right', frameon=False)
plt.savefig(outputFilename)
Here is the figure I can produce from this code:
and for your convenience I am also sharing the .dat file:
https://www.dropbox.com/s/w4jbxmln9e83355/flash.dat?dl=0
Your suggestions are most appreciated.
UPDATE: plot cumulative nuclear energy:
x = df.query('25 <= time <= 35').set_index('time')
x['cum_nucl_energy'] = x.Nuclear_Energy.cumsum()
x.cum_nucl_energy.plot(figsize=(12,10))
Old answer:
Using Pandas module
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
fn = r'D:\temp\.data\flash.dat'
df = pd.read_csv(fn, sep='\s+', usecols=[0, 18], header=None, skiprows=[0], na_values=['Infinity'])
df.columns=['time', 'Nuclear_Energy']
df.query('25 <= time <= 35').set_index('time').plot(figsize=(12,10))
plt.show()
plt.savefig('d:/temp/out.png')
Result:
Explanation:
In [43]: pd.options.display.max_rows
Out[43]: 50
In [44]: pd.options.display.max_rows = 12
In [45]: df
Out[45]:
time Nuclear_Energy
0 0.000000e+00 0.000000e+00
1 1.000000e-07 -4.750169e+29
2 2.200000e-07 -5.699325e+29
3 3.640000e-07 -6.838392e+29
4 5.368000e-07 -8.206028e+29
5 7.441600e-07 -9.837617e+29
... ... ...
10210 6.046702e+01 7.160630e+44
10211 6.047419e+01 7.038907e+44
10212 6.048137e+01 6.934600e+44
10213 6.048856e+01 6.847015e+44
10214 6.049577e+01 6.765220e+44
10215 6.050298e+01 6.661930e+44
[10216 rows x 2 columns]
In [46]: df.query('25 <= time <= 35')
Out[46]:
time Nuclear_Energy
4534 25.001663 1.559398e+43
4535 25.006781 1.567793e+43
4536 25.011900 1.575844e+43
4537 25.017021 1.583984e+43
4538 25.022141 1.592015e+43
4539 25.027259 1.600200e+43
... ... ...
6521 34.966427 8.181516e+41
6522 34.972926 8.538806e+41
6523 34.979425 8.913695e+41
6524 34.985925 9.304403e+41
6525 34.992429 9.731310e+41
6526 34.998941 1.019862e+42
[1993 rows x 2 columns]
In [47]: df.query('25 <= time <= 35').set_index('time')
Out[47]:
Nuclear_Energy
time
25.001663 1.559398e+43
25.006781 1.567793e+43
25.011900 1.575844e+43
25.017021 1.583984e+43
25.022141 1.592015e+43
25.027259 1.600200e+43
... ...
34.966427 8.181516e+41
34.972926 8.538806e+41
34.979425 8.913695e+41
34.985925 9.304403e+41
34.992429 9.731310e+41
34.998941 1.019862e+42
[1993 rows x 1 columns]

Categories

Resources