I want to create a plot from the following data:
timeArray= ['11:47:46.585', '11:47:46.695', '11:47:46.805', '11:47:46.915', '11:47:47.025', '11:47:47.135', '11:47:47.245', '11:47:47.355', '11:47:47.465', '11:47:47.575', '11:47:47.685', '11:47:47.795', '11:47:47.905', '11:47:48.015', '11:47:48.125', '11:47:48.235', '11:47:48.345', '11:47:48.455', '11:47:48.565', '11:47:48.675', '11:47:48.785', '11:47:48.895', '11:47:49.005', '11:47:49.115', '11:47:49.225', '11:47:49.335', '11:47:49.445', '11:47:49.555', '11:47:49.665', '11:47:49.775', '11:47:49.885', '11:47:49.995', '11:47:50.105', '11:47:50.215', '11:47:50.325', '11:47:50.435', '11:47:50.545', '11:47:50.655', '11:47:50.765', '11:47:50.875', '11:47:50.985', '11:47:51.095', '11:47:51.205', '11:47:51.315', '11:47:51.425', '11:47:51.535', '11:47:51.645', '11:47:51.755', '11:47:51.865', '11:47:51.975', '11:47:52.085', '11:47:52.195', '11:47:52.305', '11:47:52.415']
valueArray = [10382.0, 8372.0, 11117.0, 11804.0, 10164.0, 10221.0, 10488.0, 7910.0, 12911.0, 11422.0, 15361.0, 15424.0, 10629.0, 14993.0, 13827.0, 15164.0, 10514.0, 10356.0, 14638.0, 12272.0, 14980.0, 14391.0, 12984.0, 18967.0, 15792.0, 14753.0, 16205.0, 19187.0, 13922.0, 10787.0, 14500.0, 12918.0, 13985.0, 14695.0, 14014.0, 12087.0, 12163.0, 11424.0, 8598.0, 8573.0, 9986.0, 10315.0, 11449.0, 9146.0, 11160.0, 6861.0, 10211.0, 9097.0, 8443.0, 5446.0, 6354.0, 6829.0, 5786.0, 7860.0]
timeArray will be x-axis, valueArray will be y-axis.
plot line looks like this:
import matplotlib.pyplot as plt
plt.plot(timeArray,valueArray,'r', label='values over time')
And I'm getting this graph:
I have used: plt.gcf().autofmt_xdate(), but still getting one time over the next.
I have also tried:
xaxis = np.linspace(min(timeArray),max(timeArray), 10)
plt.xticks(xaxis)
but i got a typeError: ufunc 'multiply' did not contain a loop with signature matching types dtype('<U32') dtype('<U32') dtype('<U32')
Is there a simple way to keep the data as it is but without showing every single time with microseconds?
I'd suggest you convert your times to datetime objects rather than strings, and then use matplotlib.mdates.DateFormatter() with an appropriate date format:
import matplotlib.dates as mdates
import datetime
fmt = mdates.DateFormatter('%H:%M:%S')
timeArray = [datetime.datetime.strptime(i, '%H:%M:%S.%f') for i in timeArray]
fig, ax = plt.subplots()
plt.plot(timeArray,valueArray,'r', label='values over time')
ax.xaxis.set_major_formatter(fmt)
The result:
You can do so:
import datetime
import matplotlib.pyplot as plt
import pandas as pd
timeArray = pd.to_datetime(pd.Series(timeArray))
plt.plot(timeArray,valueArray,'r', label='values over time')
plt.show()
Output:
Or adding some rotation to the ticks:
plt.xticks(rotation=45) # or 90 to be vertical
Related
I have two lists containing the sunset and sunrise times and the corresponding dates.
It looks like:
sunrises = ['06:30', '06:28', '06:27', ...]
dates = ['3.21', '3.22', '3.23', ...]
I want to make a plot of the sunrise times as the Y axis and the dates as the X axis.
Simply using
ax.plot(dates, sunrises)
ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(7))
ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(7))
plt.show()
can plot the dates correctly, but the time is wrong:
And actually, the sunrise time isn't supposed to be a straight line.
How do I solve this problem?
You need to transform the datetime in string format to the format that matplotlib can comprehend by using datetime
from matplotlib import pyplot as plt
import matplotlib as mpl
from datetime import datetime
import matplotlib.dates as mdates
sunrises = ['06:30', '06:28', '06:27',]
sunrises_dt = [datetime.strptime(item,'%H:%M') for item in sunrises]
dates = ['3.21', '3.22', '3.23',]
fig,ax = plt.subplots()
ax.plot(dates, sunrises_dt)
ax.yaxis.set_major_formatter(mdates.DateFormatter('%H:%M',))
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(1))
plt.show()
This is because your sunrises are not numerical. I'm assuming you'd want them in a form such that "6:30" means 6.5. Which is calculated below:
import matplotlib.pyplot as plt
sunrises = ['06:30', '06:28', '06:27']
# This converts to decimals
sunrises = [float(x[0:2])+(float(x[-2:])/60) for x in sunrises]
dates = ['3.21', '3.22', '3.23']
plt.plot(sunrises, dates)
plt.xlabel('sunrises')
plt.ylabel('dates')
plt.show()
Note, your dates are being treated as decimals. Is this correct?
I have a large Pandas DataFrame that contains three columns: two different dates and one of measurement (floats). I want to plot a 3d figure (eg. trisurf, plot_surface, etc) where the dates are on the x and y axes and measurement is on the z axis. I tried using the suggestions in this post, but it isn't helpful.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as dates
import datetime
import matplotlib.ticker as ticker
import pandas as pd
df = pd.DataFrame()
df['date1'] = pd.date_range(start='2018-01-05', end='2018-04-15', freq='1D')
df['date2'] = pd.date_range(start='2018-01-19', end='2018-04-29', freq='1D')
df['mydata'] = np.sin(2*np.linspace(-1,1,len(df))) # dummy variable
def format_date(x, pos=None):
return dates.num2date(x).strftime('%Y-%m-%d') #use FuncFormatter to format dates
plt.figure()
ax = Axes3D(fig,rect=[0,0.1,1,1]) #make room for date labels
ax.plot_trisurf(df.date1, df.date2, df.mydata, cmap=cm.coolwarm, linewidth=0.2)
ax.w_xaxis.set_major_locator(ticker.FixedLocator(some_dates)) # I want all the dates on my xaxis
ax.w_xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
ax.w_yaxis.set_major_locator(ticker.FixedLocator(some_dates))
ax.w_yaxis.set_major_formatter(ticker.FuncFormatter(format_date))
for tl in ax.w_xaxis.get_ticklabels(): # re-create what autofmt_xdate but with w_xaxis
tl.set_ha('right')
tl.set_rotation(30)
for tl in ax.w_yaxis.get_ticklabels():
tl.set_ha('right')
#tl.set_rotation(30)
ax.set_xlabel('date1')
ax.set_ylabel('date2')
ax.set_zlabel('mydata')
plt.show()
I keep getting the error RuntimeError: Error in qhull Delaunay triangulation calculation: singular input data (exitcode=2); use python verbose option (-v) to see original qhull error. What am I doing wrong and how do I resolve it?
I'm basically trying to plot a graph where the x axis represent the month of the year. The data is stored in a numpy.array, with dimensions k x months. Here it follows a minimal example (my data is not this crazy):
import numpy
import matplotlib
import matplotlib.pyplot as plt
cmap = plt.get_cmap('Set3')
colors = [cmap(i) for i in numpy.linspace(0, 1, len(complaints))]
data = numpy.random.rand(18,12)
y = range(data.shape[1])
plt.figure(figsize=(15, 7), dpi=200)
for i in range(data.shape[0]):
plt.plot(y, data[i,:], color=colors[i], linewidth=5)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xticks(numpy.arange(0, 12, 1))
plt.xlabel('Hour of the Day')
plt.ylabel('Number of Complaints')
plt.title('Number of Complaints per Hour in 2015')
I'd like to have the xticks as strings instead of numbers. I'm wondering if I have to create a list of strings, manually, or if there is another way to translate the numbers to months. I have to do the same for weekdays, for example.
I've been looking to these examples:
http://matplotlib.org/examples/pylab_examples/finance_demo.html
http://matplotlib.org/examples/pylab_examples/date_demo2.html
But I'm not using datetime.
Althought this answer works well, for this case you can avoid defining your own FuncFormatter by using the pre-defined ones from matplotlib for dates, by using matplotlib.dates rather than matplotlib.ticker:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
# Define time range with 12 different months:
# `MS` stands for month start frequency
x_data = pd.date_range('2018-01-01', periods=12, freq='MS')
# Check how this dates looks like:
print(x_data)
y_data = np.random.rand(12)
fig, ax = plt.subplots()
ax.plot(x_data, y_data)
# Make ticks on occurrences of each month:
ax.xaxis.set_major_locator(mdates.MonthLocator())
# Get only the month to show in the x-axis:
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
# '%b' means month as locale’s abbreviated name
plt.show()
Obtaining:
DatetimeIndex(['2018-01-01', '2018-02-01', '2018-03-01', '2018-04-01',
'2018-05-01', '2018-06-01', '2018-07-01', '2018-08-01',
'2018-09-01', '2018-10-01', '2018-11-01', '2018-12-01'],
dtype='datetime64[ns]', freq='MS')
This is an alternative plotting method plot_date, which you might want to use if your independent variable are datetime like, instead of using the more general plot method:
import datetime
data = np.random.rand(24)
#a list of time: 00:00:00 to 23:00:00
times = [datetime.datetime.strptime(str(i), '%H') for i in range(24)]
#'H' controls xticklabel format, 'H' means only the hours is shown
#day, year, week, month, etc are not shown
plt.plot_date(times, data, fmt='H')
plt.setp(plt.gca().xaxis.get_majorticklabels(),
'rotation', 90)
The benefit of it is that now you can easily control the density of xticks, if we want to have a tick every hour, we will insert these lines after plot_date:
##import it if not already imported
#import matplotlib.dates as mdates
plt.gca().xaxis.set_major_locator(mdates.HourLocator())
You can still use formatters to format your results in the way you want. For example, to have month names printed, let us first define a function taking an integer to a month abbreviation:
def getMonthName(month_number):
testdate=datetime.date(2010,int(month_number),1)
return testdate.strftime('%b')
Here, I have created an arbitrary date with the correct month and returned that month. Check the datetime documentation for available format codes if needed. If that is always easier than just setting a list by hand is another question. Now let us plot some monthly testdata:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
x_data=np.arange(1,12.5,1)
y_data=x_data**2 # Just some arbitrary data
plt.plot(x_data,y_data)
plt.gca().xaxis.set_major_locator(mtick.FixedLocator(x_data)) # Set tick locations
plt.gca().xaxis.set_major_formatter(mtick.FuncFormatter(lambda x,p:getMonthName(x)))
plt.show()
The message here is that you can use matplotlib.ticker.FuncFormatter to use any function to obtain a tick label. The function takes two arguments (value and position) and returns a string.
I have an array of timestamps in the format (HH:MM:SS.mmmmmm) and another array of floating point numbers, each corresponding to a value in the timestamp array.
Can I plot time on the x axis and the numbers on the y-axis using Matplotlib?
I was trying to, but somehow it was only accepting arrays of floats. How can I get it to plot the time? Do I have to modify the format in any way?
Update:
This answer is outdated since matplotlib version 3.5. The plot function now handles datetime data directly. See https://matplotlib.org/3.5.1/api/_as_gen/matplotlib.pyplot.plot_date.html
The use of plot_date is discouraged. This method exists for historic
reasons and may be deprecated in the future.
datetime-like data should directly be plotted using plot.
If you need to plot plain numeric data as Matplotlib date format or
need to set a timezone, call ax.xaxis.axis_date / ax.yaxis.axis_date
before plot. See Axis.axis_date.
Old, outdated answer:
You must first convert your timestamps to Python datetime objects (use datetime.strptime). Then use date2num to convert the dates to matplotlib format.
Plot the dates and values using plot_date:
import matplotlib.pyplot
import matplotlib.dates
from datetime import datetime
x_values = [datetime(2021, 11, 18, 12), datetime(2021, 11, 18, 14), datetime(2021, 11, 18, 16)]
y_values = [1.0, 3.0, 2.0]
dates = matplotlib.dates.date2num(x_values)
matplotlib.pyplot.plot_date(dates, y_values)
You can also plot the timestamp, value pairs using pyplot.plot (after parsing them from their string representation). (Tested with matplotlib versions 1.2.0 and 1.3.1.)
Example:
import datetime
import random
import matplotlib.pyplot as plt
# make up some data
x = [datetime.datetime.now() + datetime.timedelta(hours=i) for i in range(12)]
y = [i+random.gauss(0,1) for i,_ in enumerate(x)]
# plot
plt.plot(x,y)
# beautify the x-labels
plt.gcf().autofmt_xdate()
plt.show()
Resulting image:
Here's the same as a scatter plot:
import datetime
import random
import matplotlib.pyplot as plt
# make up some data
x = [datetime.datetime.now() + datetime.timedelta(hours=i) for i in range(12)]
y = [i+random.gauss(0,1) for i,_ in enumerate(x)]
# plot
plt.scatter(x,y)
# beautify the x-labels
plt.gcf().autofmt_xdate()
plt.show()
Produces an image similar to this:
7 years later and this code has helped me.
However, my times still were not showing up correctly.
Using Matplotlib 2.0.0 and I had to add the following bit of code from Editing the date formatting of x-axis tick labels in matplotlib by Paul H.
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(myFmt)
I changed the format to (%H:%M) and the time displayed correctly.
All thanks to the community.
I had trouble with this using matplotlib version: 2.0.2. Running the example from above I got a centered stacked set of bubbles.
I "fixed" the problem by adding another line:
plt.plot([],[])
The entire code snippet becomes:
import datetime
import random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# make up some data
x = [datetime.datetime.now() + datetime.timedelta(minutes=i) for i in range(12)]
y = [i+random.gauss(0,1) for i,_ in enumerate(x)]
# plot
plt.plot([],[])
plt.scatter(x,y)
# beautify the x-labels
plt.gcf().autofmt_xdate()
myFmt = mdates.DateFormatter('%H:%M')
plt.gca().xaxis.set_major_formatter(myFmt)
plt.show()
plt.close()
This produces an image with the bubbles distributed as desired.
Pandas dataframes haven't been mentioned yet. I wanted to show how these solved my datetime problem. I have datetime to the milisecond 2021-04-01 16:05:37. I am pulling linux/haproxy throughput from /proc so I can really format it however I like. This is nice for feeding data into a live graph animation.
Here's a look at the csv. (Ignore the packets per second column I'm using that in another graph)
head -2 ~/data
date,mbps,pps
2021-04-01 16:05:37,113,9342.00
...
By using print(dataframe.dtype) I can see how the data was read in:
(base) ➜ graphs ./throughput.py
date object
mbps int64
pps float64
dtype: object
Pandas pulls the date string in as "object", which is just type char. Using this as-is in a script:
import matplotlib.pyplot as plt
import pandas as pd
dataframe = pd.read_csv("~/data")
dates = dataframe["date"]
mbps = dataframe["mbps"]
plt.plot(dates, mbps, label="mbps")
plt.title("throughput")
plt.xlabel("time")
plt.ylabel("mbps")
plt.legend()
plt.xticks(rotation=45)
plt.show()
Matplotlib renders all the milisecond time data. I've added plt.xticks(rotation=45) to tilt the dates but it's not what I want. I can convert the date "object" to a datetime64[ns]. Which matplotlib does know how to render.
dataframe["date"] = pd.to_datetime(dataframe["date"])
This time my date is type datetime64[ns]
(base) ➜ graphs ./throughput.py
date datetime64[ns]
mbps int64
pps float64
dtype: object
Same script with 1 line difference.
#!/usr/bin/env python
import matplotlib.pyplot as plt
import pandas as pd
dataframe = pd.read_csv("~/data")
# convert object to datetime64[ns]
dataframe["date"] = pd.to_datetime(dataframe["date"])
dates = dataframe["date"]
mbps = dataframe["mbps"]
plt.plot(dates, mbps, label="mbps")
plt.title("throughput")
plt.xlabel("time")
plt.ylabel("mbps")
plt.legend()
plt.xticks(rotation=45)
plt.show()
This might not have been ideal for your usecase but it might help someone else.
My data looks as follows:
2012021305, 65217
2012021306, 82418
2012021307, 71316
2012021308, 66833
2012021309, 69406
2012021310, 76422
2012021311, 94188
2012021312, 111817
2012021313, 127002
2012021314, 141099
2012021315, 147830
2012021316, 136330
2012021317, 122252
2012021318, 118619
2012021319, 115763
2012021320, 121393
2012021321, 130022
2012021322, 137658
2012021323, 139363
Where the first column is the data YYYYMMDDHH . I'm trying to graph the data using the csv2rec module. I can get the data to graph but the x axis and labels are not showing up the way that I expect them to.
import matplotlib
matplotlib.use('Agg')
from matplotlib.mlab import csv2rec
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pylab import *
output_image_name='plot1.png'
input_filename="data.log"
input = open(input_filename, 'r')
input.close()
data = csv2rec(input_filename, names=['time', 'count'])
rcParams['figure.figsize'] = 10, 5
rcParams['font.size'] = 8
fig = plt.figure()
plt.plot(data['time'], data['count'])
ax = fig.add_subplot(111)
ax.plot(data['time'], data['count'])
hours = mdates.HourLocator()
fmt = mdates.DateFormatter('%Y%M%D%H')
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(fmt)
ax.grid()
plt.ylabel("Count")
plt.title("Count Log Per Hour")
fig.autofmt_xdate(bottom=0.2, rotation=90, ha='left')
plt.savefig(output_image_name)
I assume this has something to do with the date format. Any suggestions?
You need to convert the x-values to datetime objects
Something like:
time_vec = [datetime.strp(str(x),'%Y%m%d%H') for x in data['time']]
plot(time_vec,data['count'])
Currently, you are telling python to format integers (2012021305) as a date, which it does not know how to do, so it returns and empty string (although, I suspect that you are getting errors raised someplace).
You should also check your format string mark up.