I currently have a script that I am using to plot part of a csv file after converting it to .txt. At this time, it works perfectly, except that when I change the dates in column 0 to ordinal form (I have done this so I can read all values as floats and perform calculations on column 4), Python chops off the hours, minutes and seconds. I still need the hours and minutes, because when I plot the data, it plots all of my points at the beginning of the day. Is there a way I can do this and keep the time as well as the date? I've tried converting the dates to a string and the other column to floats, but it's gotten very messy and confusing. Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
import csv
from numpy import ma
def skip_first(seq,n):
for i, item in enumerate(seq):
if i >= n:
yield item
g = open('soundTransit1_remote_rawMeasurements_15m.txt', 'w')
with open('soundTransit1_remote_rawMeasurements_15m.dat', 'rb') as f:
csvreader = csv.reader(f)
for row in skip_first(csvreader,4):
for row in csv.reader(f,delimiter=',',skipinitialspace=True):
print >>g, "\t".join(row)
g.close()
def date2str(date_str):
date = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
return date.toordinal()
def readfiles(file_list):
""" read <TAB> delemited files as strings
ignoring '# Comment' lines """
data = []
for fname in file_list:
data.append(
np.loadtxt(fname,
usecols=(0,4),
comments='#', # skip comment lines
delimiter='\t',
converters = { 0 : date2str },
dtype=None))
return data
data = readfiles(['soundTransit1_remote_rawMeasurements_15m.txt'])
data_1 = ma.fix_invalid(data, fill_value = 'nan')
column_0 = np.array(data_1)[0][:,0]
airTempRaw = np.array(data_1)[0][:,1]
#Compute Air Temperature
airTempRs_ohms = 23100*(airTempRaw/(1-airTempRaw))
airTemp_degC = -39.17*np.log(airTempRs_ohms) + 410.43
def init_plot(title, yMin=-10, yMax=40):
plt.figure(figsize=(24, 12))
plt.title(title + disclamers)
plt.xlabel(xtext)
plt.ylabel(ytext)
#plt.xlim(xMin,xMax)
plt.ylim(yMin,yMax)
plt.grid()
#plt.xticks(np.arange(xMin,xMax+1))
def end_plot(name=None, cols=5):
plt.legend(bbox_to_anchor=(0, -.1, 1, -0.5), loc=8, ncol=cols,
mode="expand", borderaxespad=-1., scatterpoints=1)
if name:
plt.savefig(name, bbox_inches='tight')
disclamers = ('\nUSGS PROVISIONAL DATA'
'\nSUBJECT TO REVISION'
)
xtext = ('Date & Time')
ytext = ('Air Temperature, deg C')
init_plot('Air Temperature')
plt.plot(column_0, airTemp_degC, linestyle='-', color='b', label='Air Temperature')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d\n%H:%M'))
plt.gca().xaxis.set_minor_locator(mdates.HourLocator(interval=6))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))
end_plot(name='py_airTemp.png')
Thanks in advance for the help!
You don't say what is the format of the date column, but I think the problem lies with your converter in np.loadtxt().
Check this example from matplotlib: http://matplotlib.org/examples/pylab_examples/load_converter.html?highlight=strpdate2num
I believe this should work:
from matplotlib.dates import strpdate2num
def readfiles(file_list):
""" read <TAB> delemited files as strings
ignoring '# Comment' lines """
data = []
for fname in file_list:
data.append(
np.loadtxt(fname,
usecols=(0,4),
comments='#', # skip comment lines
delimiter='\t',
converters = { 0 : strpdate2num('%Y-%m-%d %H:%M:%S') },
dtype=None))
return data
Related
Here I have one csv file with different values and date_time. I wrote the code and run it. Then it gave me an error time data '8/6/2018 6:45' does not match format ' %d/%m/%Y %H:%M:%S'
I want to plot this different three values in one graph. Can anyone help me to solve this problem?
Here is my code:
condition = ""
date_time = []
x1 = []
x2 = []
x3 = []
def convertTime(s):
tm = time.strptime(s, " %d/%m/%Y %H:%M:%S")
return datetime.datetime(date.tm_year,date.tm_mon, date.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec)
with open('data43.csv','r') as csv_file:
csv_data = csv.reader(csv_file, delimiter=',')
row_num = 0
for row in csv_data:
if(row_num == 0):
condition = row[0]
elif(row_num > 2): #Data starts here
if(row[0] != ''):
date_time.append(convertTime(row[0]))
if(row[1] != ''):
x1.append(int(row[1]))
if(row[2] != ''):
x2.append(int(row[2]))
if(row[3] != ''):
x3.append(int(row[3]))
row_num = row_num + 1
fig1 = plt.figure(1)
ax = fig1.add_subplot(2,1,1)
ax.plot(date_time,x1)
ax.stem(date_time,x2,'C1--','C1o',linefmt=None, markerfmt=None, basefmt=None)
ax.stem(date_time,x3,'C2--','C2o',linefmt=None, markerfmt=None, basefmt=None)
ax.legend()
ax.xaxis_date()
ax.get_xaxis().set_major_formatter(DateFormatter('%d/%m/%Y %H:%M:%S'))
plt.xlabel('t')
plt.ylabel('k')
leg = plt.legend( loc = 'upper right')
plt.draw() # Draw the figure so you can find the positon of the legend.
bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
xOffset = 0.3
bb.x0 += xOffset
bb.x1 += xOffset
leg.set_bbox_to_anchor(bb, transform = ax.transAxes)
plt.rcParams["figure.figsize"] = [20,20]
ax.plot(style='.-')
plt.show()
error :
I put datetime as :
so I include seconds as :00 after that I read this csv file.
If I changed time into 12 hours reading then what will happened ?
like this
I think you might have an error with the data, not the python file.
8/6/2018 6:45 Does not match the format %d/%m/%Y %H:%M:%S because there are no seconds.
I would test with a :00 added to your time data and test again.
I think the problem is the following steps, but just in case,I will also write the whole body of my code down blow. The most strange thing is, this code can read over 6000 csv files and a graphic can also successfully show, but when I want more files to be read, then occours an error. The screenshot shows the graphic and the content of the csv files. as you can see, the path = r'C:\Users\AK6PRAKT\Desktop\6daten' includes all datas and path = r'C:\Users\AK6PRAKT\Desktop\daten' includes only parts of them.enter image description here
import os
from matplotlib import pyplot as pyplot
from collections import defaultdict
import csv
import numpy as np
path = r'C:\Users\AK6PRAKT\Desktop\6daten'
dirs = os.listdir(path)
s = []
x = []
y = []
names = []
...(ignore some steps for reading the datas from csv files)
print(list_temp1,list_temp2) #list_temp1 is the datas of xaxise, and list_temp2 of yaxise.
y.append(float(list_temp2))
names.append(list_temp1)
x = range(len(names))
pyplot.ylim((0, 40))
my_y_ticks = np.arange(0, 40, 10)
pyplot.plot(x,y, linewidth=2)
pyplot.xticks(x,names,rotation = 90)
fig = pyplot.figure(figsize=(10,10))
pyplot.show()
and then...the whole body, and i must say something to declare: I had no background about computer science before, it's really a little bit hard for me to deal with such many datas at the very beginning. Actually i am now doing Internship in a german company and i started to learn python one week ago. I got an assignment from my mentor, I tried to devide the whole assignment into several steps, and I searched the commands of each of the steps and then combine them together with some revising. So, it may seem that I did a lot of useless work. Please be kind in commends(If you have suggestions about that, always glad to hear that of course)
import os
from matplotlib import pyplot as pyplot
from collections import defaultdict
import csv
import numpy as np
path = r'C:\Users\AK6PRAKT\Desktop\6daten'
dirs = os.listdir(path)
s = []
x = []
y = []
names = []
fig = pyplot.figure()
for i in dirs:
if os.path.splitext(i)[1] == ".csv":
f = open(path+"/"+i)
iter_f = iter(f);
str = ""
for line in iter_f:
str = str + line
s.append(str)
with open(path+"/"+i,'r') as r:
lines=r.readlines()
with open(path+"/"+i,'w') as w:
for row in lines:
if 'Date' not in row:
w.write(row)
columns = defaultdict(list)
with open(path+"/"+i) as f:
reader = csv.reader(f)
for row in reader:
for (i,v) in enumerate(row):
columns[i].append(v)
list_temp1 = columns[0]
list_temp1 = np.array(list_temp1)
list_temp2 = columns[1]
list_temp2 = np.array(list_temp2)
print(list_temp1,list_temp2)
y.append(float(list_temp2))
names.append(list_temp1)
x = range(len(names))
pyplot.ylim((0, 40))
my_y_ticks = np.arange(0, 40, 10)
pyplot.plot(x,y, linewidth=2)
pyplot.xticks(x,names,rotation = 90)
pyplot.yticks(my_y_ticks)
fig = pyplot.figure(figsize=(10,10))
pyplot.show()
the graphic from parts of datas
the graphic can not show while reading all datas
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import Rbf, InterpolatedUnivariateSpline
data = np.genfromtxt('FTIR Data.csv', skip_header=1, delimiter=',', usecols=(1,2,3), names=['Time','Peakat2188cm1', 'water'] )
x=data['Time']
y1=data['Peakat2188cm1']
y2=data['water']
fig=plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
ius=InterpolatedUnivariateSpline
xs = np.linspace(x.min(), x.max(), 100)
s1=ius(x,y1)
s2=ius(x,y2)
ys1 = s1(xs)
ys2 = s2(xs)
ax2.plot(xs,ys1)
ax2.plot(xs,ys2)
ax1.set_ylabel('Peak at 2188 cm-1')
ax2.set_ylabel('water')
ax1.set_xlabel('RT (mins)')
plt.title('RT Vs Conc')
This is my code for reading data from a csv file which is an export data from my instrument. In excel file, i have manually converted the relative time into Time in minutes and got the right plot. But i want to convert the relative time directly in matplotlib when reading the relative time column in csv file. I have tried from different examples but couldnt get through. I am very new to python so can anyone please help with editing in my code. My actual data is in the following format. (this code is used to plot absolute time i.e. Time, which i already converted in excel before ploting in matplotlib)[enter image description here][1]
Relative Time,Peak at 2188 cm-1,water
00:00:51,0.572157,0.179023
00:02:51,0.520037,0.171217
00:04:51,0.551843,0.221285
00:06:50,0.566279,0.209182
00:09:26,0.022696,0.0161351
00:10:51,-0.00344509,0.0141303
00:12:51,0.555898,0.21082
00:14:51,0.519753,0.179563
00:16:51,0.503512,0.150133
00:18:51,0.498554,0.154512
00:20:51,0.00128343,-0.0129148
00:22:51,0.349077,0.0414234
00:24:50,0.360565,0.0522027
00:26:51,0.403705,0.0667703
Plot
At this moment, the Time column is still a string. You will have to convert this to minutes in some way
pandas.to_timedelta
import pandas as pd
column_names = ['Time','Peakat2188cm1', 'water']
df_orig = pd.read_csv(filename, sep=',')
df_orig.columns = column_names
time_in_minutes = pd.to_timedelta(df_orig['Time']).dt.total_seconds() / 60
semi-manually
time_in_minutes = [sum(int(x) * 60**i for i, x in enumerate(reversed(t.split(':')), -1)) for t in data['Time']]
explanation
This is the same as:
time_in_minutes = []
for t in data['Time']:
minutes = 0
# t = '00:00:51'
h_m_s = t.split(':')
# h_m_s = ['00', '00', '51']
s_m_h = list(enumerate(reversed(h_m_s), -1))
# s_m_h = [(-1, '51'), (0, '00'), (1, '00')]
for i, x in s_m_h:
# i = -1
# x = '51'
minutes += int(x) * 60 ** i
# minutes = 0.85
time_in_minutes.append(minutes)
I have a data-set of sleep-time information and would like to produce a visualisation of it using Python.
The .csv data-set I have looks like the this:
SleepStartDate,SleepStartTime,SleepStopTime
17/03/2017,23:45,07:25
19/03/2017,01:05,09:10
19/03/2017,23:50,08:25
The visualisation I want to produce should be similar to the following:
Image source: http://quantifiedself.com/wp-content/uploads/2015/08/qs2.png
I know this is a really simple visualisation, and imagine that it's built into some already existing library, but my best Googling efforts have been unable to locate it. I'd much appreciate it if someone could point me in the right direction.
Thanks in advance for your time and wisdom.
Wasn't able to find a library with functionality to do what I wanted, so ended up writing a script to do it for myself:
Script:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import matplotlib.patches as patches
import datetime as dt
import csv
import sys
MINUTES_IN_DAY = 1440.0
COLUMN_COLOUR = 'b'
# Graph data using matplotlib visualization
def plotData(data,columnColour,maxDate,minDate):
# Set up an invisible background scatterplot give graph the correct size
# Make a series of events that are one day apart
x = mpl.dates.drange(minDate,maxDate,dt.timedelta(days=1))
# Offset first event to top of graph to give correct height
x[0] += 0.85
# Extract the time using a modulo 1, and adding an arbitrary base date
# int used so that y-axis starts at midnight
times = x % 1 + int(x[0])
fig = plt.figure()
fig.suptitle('Daily Sleep Patterns', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
# Set background scatterplot to invisible
ax.plot_date(x, times, 'ro', color='w', visible=False)
ax.yaxis_date()
fig.autofmt_xdate()
start, end = ax.get_ylim()
# Fix division sizes and labels to show hours on y-axis
hourDivision = 1.0 / 24.0
ax.yaxis.set_ticks(np.arange(start,end,hourDivision))
ax.set_yticklabels(['Midnight','1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','Midday','1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm','Midnight'])
# Iterate through data
for i in range(0,len(data)):
# If period starts and finishes on different days, slit and add to both days
if data[i].startTime > data[i].stopTime:
currentDataItem = data[i]
currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
currentDate -= dt.timedelta(days=0.5)
tomorrow = currentDate + dt.timedelta(days=1)
plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=1, facecolor=columnColour, alpha=0.5)
theDayAfterTomorrow = tomorrow + dt.timedelta(days=1)
plt.axvspan(xmin=tomorrow, xmax=theDayAfterTomorrow, ymin=0, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)
# Else, add to given day
else:
currentDataItem = data[i]
currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
currentDate -= dt.timedelta(days=0.5)
tomorrow = currentDate + dt.timedelta(days=1)
plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)
ax.set_ylabel('Hours',fontweight='bold')
#ax.legend()
ax.grid(True)
plt.show()
# Read data from csv file
def readDataFromFile(dataFile):
f = open(dataFile,'rt')
listOfInputLists = []
try:
reader = csv.reader(f)
for row in reader:
listOfInputLists.append(row)
finally:
f.close()
return listOfInputLists
# Class to store time and date data read from file
class sleepInstance(object):
def __init__(self,listOfInputLists):
self.day = 0
self.month = 0
self.year = 0
self.formatDate(listOfInputLists[0])
self.startTime = self.formatTime(listOfInputLists[1])
self.stopTime = self.formatTime(listOfInputLists[2])
# Extracts date information variables
def formatDate(self,unformattedDate):
date = dt.datetime.strptime(unformattedDate,"%d/%m/%y")
self.day = int(date.strftime("%d"))
self.month = int(date.strftime("%m"))
self.year = int(date.strftime("%Y"))
# Formats time as a decimal fraction of day, for use in graph
def formatTime(self,unformattedTime):
timeSinceMidnight = dt.datetime.strptime(unformattedTime,'%H:%M:%S')
midnight = dt.datetime(1900,1,1)
minutesSinceMidnight = ((timeSinceMidnight - midnight).total_seconds() / 60.0)
fractionOfDay = minutesSinceMidnight / MINUTES_IN_DAY
return fractionOfDay
# Formats data read from file as a list of sleepInstance objects
def formatDataForPlot(listOfInputLists):
sleeps = []
for i in range(1,len(listOfInputLists)):
sleeps.append(sleepInstance(listOfInputLists[i]))
return sleeps
# Extracts earliest (min) and latest (max) dates from data, for use in setting graph limits
def getMaxAndMinDates(plotDataList):
dateTimeList = []
for item in plotDataList:
nextDate = dt.datetime(item.year,item.month,item.day)
dateTimeList.append(nextDate)
maxDate = max(dateTimeList)
minDate = min(dateTimeList)
return maxDate, minDate
dataFile = 'sleepData.csv'
listOfInputLists = readDataFromFile(dataFile)
plotDataList = formatDataForPlot(listOfInputLists)
maxDate, minDate = getMaxAndMinDates(plotDataList)
plotData(plotDataList,COLUMN_COLOUR,maxDate,minDate)
Input:
Date,Start,Finish
17/03/17,03:15:00,03:55:00
17/03/17,06:20:00,06:35:00
17/03/17,09:00:00,09:40:00
17/03/17,13:10:00,13:35:00
17/03/17,15:45:00,16:30:00
17/03/17,18:45:00,19:25:00
17/03/17,21:15:00,21:35:00
18/03/17,00:30:00,02:00:00
18/03/17,04:50:00,05:05:00
18/03/17,08:20:00,08:40:00
18/03/17,12:30:00,13:10:00
18/03/17,16:30:00,17:00:00
18/03/17,18:45:00,19:00:00
18/03/17,20:30:00,21:00:00
19/03/17,00:00:00,12:00:00
19/03/17,18:00:00,23:59:00
19/03/17,13:00:00,14:00:00
20/03/17,12:00:00,11:00:00
Output:
May fancy it up a bit when I have time: https://github.com/ambidextrous/timeLogGrapher
The classic Python choice would be to use the matplotlib package. Looking at your sample graph it looks like a vertical bar graph.
I want to plot a quantity from a tedious-to-look-at .dat file, the #time column in the file extends from 0s to 70s, but I need to take a closer look at data (Nuclear Energy, in this case) from 25s to 35s.
I was wondering if there is a way I can manipulate the time column and corresponding other columns to record and plot data only for the required time span.
I already have some code which does the job for me for 0-70s:
import matplotlib
matplotlib.use('Agg')
import os
import numpy as np
import matplotlib.pyplot as plt
import string
import math
# reads from flash.dat
def getQuantity(folder, basename, varlist):
# quantities[0] should contain only the quantities of varlist[0]
quantities =[]
for i in range(len(varlist)):
quantities.append([])
with open(folder + "/" + basename + ".dat", 'r') as f: # same as f = open(...) but closes the file afterwards.
for line in f:
if not ('#' or 'Inf') in line: # the first line and restarting lines look like this.
for i in range(len(varlist)):
if(varlist[i]==NUCLEAR_ENERGY and len(quantities[i])>0):
quantities[i].append(float(line.split()[varlist[i]])+quantities[i][-1])
else:
quantities[i].append(float(line.split()[varlist[i]]))
return quantities
# end def getQuantity
#create plot
plt.figure(1)
TIME = 0
NUCLEAR_ENERGY = 18
labels = ["time", "Nuclear Energy"]
flashFolder1 = '/home/trina/Pictures' # should be the flash NOT the flash/object folder.
lab1 = '176'
filename = 'flash' # 'flash' for flash.dat
nHorizontal = 1 # number of Plots in Horizontal Direction. Vertical Direction is set by program.
outputFilename = 'QuantityPlots_Nuclear.png'
variables = [NUCLEAR_ENERGY]
#Adjustments to set the size
nVertical = math.ceil(float(len(variables))/nHorizontal) # = 6 for 16 = len(variables) & nHorizontal = 3.
F = plt.gcf() #get figure
DPI = F.get_dpi()
DefaultSize = F.get_size_inches()
F.set_size_inches( DefaultSize[0]*nHorizontal, DefaultSize[1]*nVertical ) #build no of subplots in figure
variables.insert(0,TIME) # time as needed as well
data1 = getQuantity(flashFolder1, filename, variables)
time1 = np.array(data1[0]) #time is first column
for n in [n+1 for n in range(len(variables)-1)]: #starts at 1
ax=plt.subplot(nVertical, nHorizontal, n) #for example (6,3,0 to 15) inside loop for 16 variables
if (min(data1[n])<0.0 or abs((min(data1[n]))/(max(data1[n])))>=1.e-2):
plt.plot(time1, data1[n],label=lab1) #, label = labels[variables[n]])
legend = ax.legend(loc='upper right', frameon=False)
else:
plt.semilogy(time1, data1[n],label=lab1) #, label = labels[variables[n]])
legend = ax.legend(loc='upper right', frameon=False)
plt.savefig(outputFilename)
Here is the figure I can produce from this code:
and for your convenience I am also sharing the .dat file:
https://www.dropbox.com/s/w4jbxmln9e83355/flash.dat?dl=0
Your suggestions are most appreciated.
UPDATE: plot cumulative nuclear energy:
x = df.query('25 <= time <= 35').set_index('time')
x['cum_nucl_energy'] = x.Nuclear_Energy.cumsum()
x.cum_nucl_energy.plot(figsize=(12,10))
Old answer:
Using Pandas module
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
fn = r'D:\temp\.data\flash.dat'
df = pd.read_csv(fn, sep='\s+', usecols=[0, 18], header=None, skiprows=[0], na_values=['Infinity'])
df.columns=['time', 'Nuclear_Energy']
df.query('25 <= time <= 35').set_index('time').plot(figsize=(12,10))
plt.show()
plt.savefig('d:/temp/out.png')
Result:
Explanation:
In [43]: pd.options.display.max_rows
Out[43]: 50
In [44]: pd.options.display.max_rows = 12
In [45]: df
Out[45]:
time Nuclear_Energy
0 0.000000e+00 0.000000e+00
1 1.000000e-07 -4.750169e+29
2 2.200000e-07 -5.699325e+29
3 3.640000e-07 -6.838392e+29
4 5.368000e-07 -8.206028e+29
5 7.441600e-07 -9.837617e+29
... ... ...
10210 6.046702e+01 7.160630e+44
10211 6.047419e+01 7.038907e+44
10212 6.048137e+01 6.934600e+44
10213 6.048856e+01 6.847015e+44
10214 6.049577e+01 6.765220e+44
10215 6.050298e+01 6.661930e+44
[10216 rows x 2 columns]
In [46]: df.query('25 <= time <= 35')
Out[46]:
time Nuclear_Energy
4534 25.001663 1.559398e+43
4535 25.006781 1.567793e+43
4536 25.011900 1.575844e+43
4537 25.017021 1.583984e+43
4538 25.022141 1.592015e+43
4539 25.027259 1.600200e+43
... ... ...
6521 34.966427 8.181516e+41
6522 34.972926 8.538806e+41
6523 34.979425 8.913695e+41
6524 34.985925 9.304403e+41
6525 34.992429 9.731310e+41
6526 34.998941 1.019862e+42
[1993 rows x 2 columns]
In [47]: df.query('25 <= time <= 35').set_index('time')
Out[47]:
Nuclear_Energy
time
25.001663 1.559398e+43
25.006781 1.567793e+43
25.011900 1.575844e+43
25.017021 1.583984e+43
25.022141 1.592015e+43
25.027259 1.600200e+43
... ...
34.966427 8.181516e+41
34.972926 8.538806e+41
34.979425 8.913695e+41
34.985925 9.304403e+41
34.992429 9.731310e+41
34.998941 1.019862e+42
[1993 rows x 1 columns]