I want to save a plot in python as a PDF. My problem ist, when I run my code I get a plot that looks like this:
When I now maximise the window I get a plot that looks like this:
Note that the second one looks much better, because it is not so "cramped". If I now save the plot using plt.savefig('floors.pdf') the saved plot looks like the first picture. I need a way to save it so that it looks like in the second picture.
I found this: How to make savefig() save image for 'maximized' window instead of default size but it is not helping because I don't want to save a set resolution in pixels. I want to save it as an PDF, because that way it stays a vector graphic.
My code:
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from uncertainties import ufloat
os.chdir('PlayGround/SAOIF floors')
# data ------------------------------------------
floors = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,20,25,27,35,40,47,48,50,55,61,68,74,75,77,81])
N = np.arange(len(floors))+1
# plot raw data ---------------------------------
fig = plt.figure()
plt.suptitle('SAOIF Floors')
ax1 = plt.subplot(121)
ax1.plot(N,floors,'o-')
ax1.set_title('All Floors')
ax1.set_ylabel('Floor Number')
ax1.set_xticks(np.arange(1, len(N)+1, len(N)//8))
ax1.set_yticks(np.arange(floors[0], floors[-1]+1, (floors[-1] - floors[0])//8))
ax1.grid()
# fit data --------------------------------------
floors = floors[13::]
N = N[13::]
x = N
y = floors
linera_function = lambda x, m, b: m*x+b
popt, pcov = curve_fit(linera_function, x, y)
error = np.sqrt(np.diag(pcov))
m = round(popt[0],2)
b = round(popt[1],2)
dm = round(error[0],2)
db = round(error[1],2)
# plot fit --------------------------------------
ax2 = plt.subplot(122)
ax2.set_title('Floors 14-81')
ax2.plot(x, y,'o', label='data') # plot data
ax2.plot(x, linera_function(x, *popt), label=f'fit: $f(x)=m \cdot x + b$ \n$m={m}\pm{dm}$ \n$b={b}\pm{db}$') # plot fit
# predicting next floor -------------------------
m = ufloat(m, dm)
b = ufloat(b, db)
next_floor = linera_function(N[-1]+1, m, b)
print('m = {:L}'.format(m))
print('b = {:L}'.format(b))
print('Next floor: {:L}'.format(next_floor))
# plot prediction -------------------------------
x = list(x)
y = list(y)
x.append(x[-1]+1)
y.append(next_floor.n)
next_floor_n = int(round(next_floor.n))
next_floor_s = int(round(next_floor.s))
ax2.errorbar(N[-1]+1, next_floor.n, yerr=next_floor.s, label=f'predicted next floor \nnumber$={next_floor_n} \pm {next_floor_s}$', capsize=5, fmt='o')
ax2.plot([N[-1], N[-1]+1.7], [linera_function(N[-1], m, b).n, linera_function(N[-1]+1.7, m, b).n],'--', color='#ff7f0e')
ax2.set_xticks(np.arange(x[0], x[-1]+1, round((x[-1] - x[0])/8)))
ax2.set_yticks(np.arange(y[0], y[-1]+1, round((y[-1] - y[0])/8)))
ax2.legend()
plt.gcf()
fig.supxlabel('common_x')
plt.savefig('floors.pdf')
plt.show()
Related
This is not a duplicate question since other answers only explain how to plot the cross-correlation function and do not explain how you can get the time difference.
Given a sin signal and shifted version, we should be able to get the time delay between them.
I have created a sin signal and shifted it by t_d=0.05. The following is my code and its output:
import numpy as np
import matplotlib.pyplot as plt
fs = 1000
x = np.linspace(0, 1, fs)
f = 5
t_shift = 0.05
y = np.sin(2*np.pi*f*x)
y_shifted = np.sin(2*np.pi*f*(x-t_shift))
fig, ax = plt.subplots()
ax.plot(x, y, x, y_shifted)
plt.show()
By normalizing signals and applying numpy.correlate we get the following:
y_norm = (y-y.mean())/y.std()
y_shifted_norm = (y_shifted - y_shifted.mean())/y_shifted.std()
cc = np.correlate(y_norm, y_shifted_norm, 'full')
fig, ax = plt.subplots()
ax.plot(range(len(cc)), cc)
plt.show()
Question
From the indices of cross-correlation function, how can I get t_shift=0.05?
#Sepide. It seems to me as if you are trying to maximise the correlation between the signal y and a shifted version of y_shifted. This might be accomplished using np.correlate() but it seems nontrivial indeed to recover the time shifts in the signals. In the solution below I manually shift the time series and compute the correlation coefficient using np.corrcoef. As soon as this Pearson correlation coefficient equals 1, the two signals are aligned.
import numpy as np
import matplotlib.pyplot as plt
# Setting
fs = 1000
x = np.linspace(0, 1, fs)
f = 5
t_shift = 0.05
t_step = 1/fs
# Data
y = np.sin(2*np.pi*f*x)
y_shifted = np.sin(2*np.pi*f*(x-t_shift))
# Compute correlation
MaxTimeShift = 200
CorrelationList = np.empty((MaxTimeShift,1));
CorrelationList[:] = np.NaN
# Compute correlation for various shifts
for iter in range(MaxTimeShift):
CorrelationList[iter] = np.corrcoef( y[0:801].T, y_shifted[iter:(801+iter)].T)[0,1]
# Plot 1
plt.figure(1)
plt.plot(x, y, x, y_shifted)
plt.show()
# Plot 2
plt.figure(2)
ShiftList = t_step*np.arange(MaxTimeShift)
plt.plot(ShiftList, CorrelationList)
plt.title("Correlation coefficient")
plt.show()
print("The time shift between the signals is: ", ShiftList[np.argmax(CorrelationList)])
hey how can I plot a 2D heatmap in 3D? Now I create a python script to make a 2D Heatmap Plot with data from CSV (CSV format: x,y,z,v).
For example:
First csv
0,000;-110,000;110,000;0,101
Second csv
0,000;-66,000;110,000;0,104
Third csv
0,000;-22,000;110,000;0,119
....
In this example, it is a heatmap in xz-plane and I create e.g. five more plots, so that I can insert six xz-plane Plots in a 3D room.
In 4D heatmap plot with matplotlib there is a very nice example for doing it. But I don't know how to use it in my case.
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
'Create a list for every parameter'
x = []
y = []
z = []
v = []
file_path = "path/."
'Insert data from csv into lists'
for root, dirs, files in os.walk(file_path, topdown=False):
for name in files:
if name[-4:] != '.csv': continue
with open(os.path.join(root, name)) as data:
data = np.genfromtxt((line.replace(',', '.') for line in data), delimiter=";")
if data[1] == 22:
x.append(data[0])
y.append(data[1])
z.append(data[2])
v.append(data[3])
'Create axis data'
xi = np.linspace(min(x), max(x), 1000)
zi = np.linspace(min(z), max(z), 1000)
vi = griddata((x, z), v, (xi[None,:], zi[:,None]), method='cubic')
'Create the contour plot'
CS = plt.contourf(xi, zi, vi, 20, cmap=plt.cm.rainbow)
plt.title("Heatmap xz-plane", y=1.05,
fontweight="bold")
plt.xlabel("length x in cm")
plt.xticks(np.arange(0, 201, step=40))
plt.ylabel("height z in cm")
plt.yticks(np.arange(110, 251, step=20))
cbar = plt.colorbar()
cbar.set_label("velocity v in m/s", labelpad=10)
plt.savefig('testplot.png', dpi=400)
plt.show()
Satisfying the request of #keepAlive wishing to see the result of his untested answer... :
it actually works great :-)
Disclaimer: I am the author of the cited example, so I think that copying/pasting myself is not really a problem.
Note that your dataset does not look (at least) 3-dimensional. But I will assume there is an unwilling selection bias.
You first need to aggregate your "points" per level of altitude, which I assume is the third component of your vectors. They will be constitutive of your planes once gathered.
# libraries
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import scipy.interpolate as si
from matplotlib import cm
import collections as co # <------------------
import pandas as pd
import numpy as np
planes = co.defaultdict(list)
for root, dirs, files in os.walk(file_path, topdown=False):
# [...]
# [...]
# [...]
# [...]
# [...]
level = data[2] # <------ third component.
planes[level].append(data)
Now, at that stage, we have a list of arrays per level. Let's define our grids_maker function
def grids_maker(arrays_list, colnames=list('xyzg')):
# 0- The idea behind `list('xyzg')` is only to change the order
# of names, not the names as such. In case for example you
# want to use another component than the third to organize
# your planes.
# 1- Instantiate a dataframe so as to minimize the modification
# of the function copied/pasted pasted from
# https://stackoverflow.com/a/54075350/4194079
# 2- Pandas is also going to do some other jobs for us, such as
# stacking arrays, etc....
df = pd.DataFrame(arrays_list, columns=colnames)
# Make things more legible
xy = df.loc[:, ['x', 'y']]
x = xy.x
y = xy.y
z = df.z
g = df.g
reso_x = reso_y = 50
interp = 'cubic' # or 'nearest' or 'linear'
# Convert the 4d-space's dimensions into grids
grid_x, grid_y = np.mgrid[
x.min():x.max():1j*reso_x,
y.min():y.max():1j*reso_y
]
grid_z = si.griddata(
xy, z.values,
(grid_x, grid_y),
method=interp
)
grid_g = si.griddata(
xy, g.values,
(grid_x, grid_y),
method=interp
)
return {
'x' : grid_x,
'y' : grid_y,
'z' : grid_z,
'g' : grid_g,
}
Let's use grids_maker over our list of arrays and get the extrema of each z-level's 4th dimension.
g_mins = []
g_maxs = []
lgrids = {}
for level, arrays_list in planes.items():
lgrids[level] = grids = grids_maker(arrays_list)
g_mins.append(grids['g'].min())
g_maxs.append(grids['g'].max())
Let's create our (all-file unifying) color-scale and show the plot.
# Create the 4th color-rendered dimension
scam = plt.cm.ScalarMappable(
norm=cm.colors.Normalize(min(g_mins), max(g_maxs)),
cmap='jet' # see https://matplotlib.org/examples/color/colormaps_reference.html
)
fig = plt.figure()
ax = fig.gca(projection='3d')
for grids in lgrids.values():
scam.set_array([])
ax.plot_surface(
grids['x'], grids['y'], grids['z'],
facecolors = scam.to_rgba(grids['g']),
antialiased = True,
rstride=1, cstride=1, alpha=None
)
plt.show()
I would be glad to see the result.
I'm trying to plot blackbody wavelength vs flux for 288 Kelvin temperature (the Earth) and 6000 Kelvin temperature (the sun). I want both of these to be on the same plot and know I will need a log x-axis but I keep having issues having both lines appear. This is the code I have so far:
# Import libraries
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
# Constants
c = 3.0e8 # m/s
h = 6.626e-34 # Js
k = 1.38e-23 # J/K
c1 = 2*np.pi*h*c**2
c2 = (h*c)/k
T1 = 6000
T2 = 288
lam = np.logspace(-8,-3,2000) # Generate x-axis values
F1 = c1/(lam**5*(np.exp(c2/(lam*T1))-1)) # Calculate y-values
F1 = F1/1e9
F2 = c1/(lam**5*(np.exp(c2/(lam*T2))-1)) # Calculate y-values
F2 = F2/1e9
# Create plot
ax = plt.gca()
plt.xlabel(r'$\lambda$ (nm)')
plt.ylabel(r'$F_{BB\lambda}(W\/m^{-2}nm^{-1})$')
plt.text(0.05,.8, 'T = {0:d}K'.format(T1), transform = ax.transAxes, size = 'small')
plt.text(0.05,.3, 'T = {0:d}K'.format(T2), transform = ax.transAxes, size = 'small')
plt.xticks(), plt.yticks()
plt.semilogx(lam*1e9, F1, lam*1e9, F2, color= 'black') # Create figure and axis objects
plt.xlim(10,1e6)
plt.ylim(0,)
plt.show() # Display plot to screen
This plots the attached picture which is correct for 6000K but for some reason it's not plotting the 288K curve and I'm not sure how to fix it.
I have data in the form x-y-z and want to create a power spectrum along x-y. Here is a basic example I am posting to check where I might be going wrong with my actual data:
import numpy as np
from matplotlib import pyplot as plt
fq = 10; N = 20
x = np.linspace(0,8,N); y = x
space = x[1] -x[0]
xx, yy = np.meshgrid(x,y)
fnc = np.sin(2*np.pi*fq*xx)
ft = np.fft.fft2(fnc)
ft = np.fft.fftshift(ft)
freq_x = np.fft.fftfreq(ft.shape[0], d=space)
freq_y = np.fft.fftfreq(ft.shape[1], d=space)
plt.imshow(
abs(ft),
aspect='auto',
extent=(freq_x.min(),freq_x.max(),freq_y.min(),freq_y.max())
)
plt.figure()
plt.imshow(fnc)
This results in the following function & frequency figures with the incorrect frequency. Thanks.
One of your problems is that matplotlib's imshow using a different coordinate system to what you expect. Provide a origin='lower' argument, and the peaks now appear at y=0, as expected.
Another problem that you have is that fftfreq needs to be told your timestep, which in your case is 8 / (N - 1)
import numpy as np
from matplotlib import pyplot as plt
fq = 10; N = 20
x = np.linspace(0,8,N); y = x
xx, yy = np.meshgrid(x,y)
fnc = np.sin(2*np.pi*fq*xx)
ft = np.fft.fft2(fnc)
ft = np.fft.fftshift(ft)
freq_x = np.fft.fftfreq(ft.shape[0], d=8 / (N - 1)) # this takes an argument for the timestep
freq_y = np.fft.fftfreq(ft.shape[1], d=8 / (N - 1))
plt.imshow(
abs(ft),
aspect='auto',
extent=(freq_x.min(),freq_x.max(),freq_y.min(),freq_y.max()),
origin='lower' , # this fixes your problem
interpolation='nearest', # this makes it easier to see what is happening
cmap='viridis' # let's use a better color map too
)
plt.grid()
plt.show()
You may say "but the frequency is 10, not 0.5!" However, if you want to sample a frequency of 10, you need to sample a lot faster than 8/19! Nyquist's theorem says you need to exceed a sampling rate of 20 to have any hope at all
I'm trying to plot some data using a pcolormesh from the matplotlib.pyplot but I'm having some difficulty when saving the output (specifically, in scaling the image appropriately).
I'm using Python v3.4 with matplotlib v1.51 if that makes a difference.
This is what my code currently looks like:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
def GetData(data_entries, num_of_channels):
data_dict = {'timestamp' : np.linspace(1, data_entries*21, data_entries, endpoint=True)}
for chan in range(0, num_of_channels, 1):
data_dict['random%03d'%chan] = np.random.rand(data_entries, 1).flatten()
num_at_each_end_to_highlight = 10
data_dict['random%03d'%chan][0:num_at_each_end_to_highlight] = 1.5
data_dict['random%03d'%chan][-num_at_each_end_to_highlight:] = 1.5
for chan in range(0, num_of_channels, 1):
data_dict['periodic%03d' % chan] = np.zeros(data_entries)#.flatten()
data_dict['periodic%03d' % chan][::65] = 5000
return pd.DataFrame(data_dict)
def GetSubPlotIndex(totalRows, totalCols, row):
return totalRows*100+totalCols*10+row
def PlotData(df, num_of_channels, field_names):
# Calculate the range of data to plot
data_entries = len(df.index)
# Create the x/y mesh that the data will be plotted on
x = df['timestamp']
y = np.linspace(0, num_of_channels - 1, num_of_channels)
X,Y = np.meshgrid(x,y)
# Iterate through all of the field types and produce one plot for each but share the X axis
for idx, field_name in enumerate(field_names):
# Create this sub-plot
subPlotIndex = GetSubPlotIndex(len(field_names), 1, idx + 1)
ax = plt.subplot(subPlotIndex)
if idx is 0:
ax.set_title('Raw Data Time Series')
# Set the axis scale to exactly meet the limits of the data set.
ax.set_autoscale_on(False)
plt.axis([x[0], x[data_entries-1], 0, num_of_channels - 1])
# Set up the colour palette used to render the data.
# Make bad results (those that are masked) invisible so the background shows instead.
palette = plt.cm.get_cmap('autumn')
palette.set_bad(alpha=0.0)
ax.set_axis_bgcolor('black') # Set the background to zero
# Grab the data and transpose it so we can stick it in the time series running along the X axis.
firstFftCol = df.columns.get_loc(field_name + "%03d"%(0))
lastFftCol = df.columns.get_loc(field_name + "%03d"%(num_of_channels - 1))
data = df.ix[:,firstFftCol:lastFftCol]
data = data.T # Transpose so that time runs along the X axis and bin index is on the Y
# Mask off data with zero's so that it doesn't obscure the data we're actually interested in.
data = np.ma.masked_where(data == 0.0, data)
# Actually create the data mesh so we can plot it
z_min, z_max = data.min().min(), data.max().max()
p = ax.pcolormesh(X,Y, data, cmap=palette, vmin=z_min, vmax=z_max)
# Render it
plt.plot()
# Label the plot and add a key
plt.ylabel(field_name)
plt.colorbar(p)
# Label the plot
plt.xlabel('Time (ms)')
# Record the result
plt.savefig('test.png', edgecolor='none', transparent=False)
if __name__ == '__main__':
data_entries = 30000 # Large values here cause issues
num_of_channels = 255
fields_to_plot = ('random', 'periodic')
data = GetData(data_entries, num_of_channels)
width_in_pixels = len(data.index)+200
additional_vertical_space_per_plot = 50
num_of_plots = len(fields_to_plot)
height_in_pixels = (num_of_channels+additional_vertical_space_per_plot)*num_of_plots
dpi = 80 # The default according to the documentation.
fig = plt.figure(1,figsize=(width_in_pixels/dpi, height_in_pixels/dpi), dpi=dpi)
PlotData(data, num_of_channels, fields_to_plot)
With 1000 entries, the result looks fine:
If I increase the number of samples to the sort of size I actually want to plot (30000), the image is the correct size (30200 pixels wide) but I see a lot of dead space. This is a zoomed-out summary of the issues I see:
Is there a way to more accurately fill the image with the data?
Thanks to the prompt from #Dusch, this seems to solve things rather neatly:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
def GetData(data_entries, num_of_channels):
data_dict = {'timestamp' : np.linspace(1, data_entries*21, data_entries, endpoint=True)}
for chan in range(0, num_of_channels, 1):
data_dict['random%03d'%chan] = np.random.rand(data_entries, 1).flatten()
num_at_each_end_to_highlight = 10
data_dict['random%03d'%chan][0:num_at_each_end_to_highlight] = 1.5
data_dict['random%03d'%chan][-num_at_each_end_to_highlight:] = 1.5
for chan in range(0, num_of_channels, 1):
data_dict['periodic%03d' % chan] = np.zeros(data_entries)#.flatten()
data_dict['periodic%03d' % chan][::65] = 5000
return pd.DataFrame(data_dict)
def GetSubPlotIndex(totalRows, totalCols, row):
return totalRows*100+totalCols*10+row
def PlotData(df, num_of_channels, field_names):
# Calculate the range of data to plot
data_entries = len(df.index)
# Create the x/y mesh that the data will be plotted on
x = df['timestamp']
y = np.linspace(0, num_of_channels - 1, num_of_channels)
X,Y = np.meshgrid(x,y)
# Iterate through all of the field types and produce one plot for each but share the X axis
for idx, field_name in enumerate(field_names):
# Create this sub-plot
subPlotIndex = GetSubPlotIndex(len(field_names), 1, idx + 1)
ax = plt.subplot(subPlotIndex)
if idx is 0:
ax.set_title('Raw Data Time Series')
# Set the axis scale to exactly meet the limits of the data set.
ax.set_autoscale_on(False)
plt.axis([x[0], x[data_entries-1], 0, num_of_channels - 1])
# Set up the colour palette used to render the data.
# Make bad results (those that are masked) invisible so the background shows instead.
palette = plt.cm.get_cmap('autumn')
palette.set_bad(alpha=0.0)
ax.set_axis_bgcolor('black') # Set the background to zero
# Grab the data and transpose it so we can stick it in the time series running along the X axis.
firstFftCol = df.columns.get_loc(field_name + "%03d"%(0))
lastFftCol = df.columns.get_loc(field_name + "%03d"%(num_of_channels - 1))
data = df.ix[:,firstFftCol:lastFftCol]
data = data.T # Transpose so that time runs along the X axis and bin index is on the Y
# Mask off data with zero's so that it doesn't obscure the data we're actually interested in.
data = np.ma.masked_where(data == 0.0, data)
# Actually create the data mesh so we can plot it
z_min, z_max = data.min().min(), data.max().max()
p = ax.pcolormesh(X,Y, data, cmap=palette, vmin=z_min, vmax=z_max)
# Render it
plt.plot()
# Label this sub-plot
plt.ylabel(field_name)
# Sort out the color bar
fig = plt.gcf()
image_width = fig.get_size_inches()[0] * fig.dpi # size in pixels
colorbar_padding_width_in_pixels = 20
colorbar_padding = colorbar_padding_width_in_pixels/image_width
plt.colorbar(p, pad=colorbar_padding)
# Label the plot
plt.xlabel('Time (ms)')
# Record the result
plt.savefig('test.png', edgecolor='none', transparent=False, bbox_inches='tight')
plt.tight_layout()
if __name__ == '__main__':
data_entries = 30000 # Large values here cause issues
num_of_channels = 255
fields_to_plot = ('random', 'periodic')
data = GetData(data_entries, num_of_channels)
width_in_pixels = len(data.index)+200
additional_vertical_space_per_plot = 50
num_of_plots = len(fields_to_plot)
height_in_pixels = (num_of_channels+additional_vertical_space_per_plot)*num_of_plots
dpi = 80 # The default according to the documentation.
fig = plt.figure(1,figsize=(width_in_pixels/dpi, height_in_pixels/dpi), dpi=dpi)
PlotData(data, num_of_channels, fields_to_plot)
The secret sauce in the end was:
Add plt.tight_layout() immediately before the plt.savefig call.
Add bbox_inches='tight' to the plt.savefig call.
Add , pad=colorbar_padding after calculating colorbar_padding by checking what proportion of the overall image width a 20 pixel padding equates to.