Matching y axis points together - python

I have successfully read in data from a catalog, and I have graphed what I need. However, I need one more thing. I would like to correspond the different "standard_deviation" values with the "number" via "half-light radius." In the graph shown, the "number" is not an axis on the graph, however, there will in this case be ten "number 9's" for example. I would like a way to match the points of these same-numbered points with some sort of line, as I showed in the image below (I just drew lines randomly to give you an idea of what I want).
In this example, assume that every point one one of the drawn lines is of the same "number." A point of a "number" will have ten different "standard_deviation" values, 1 through 10, and ten different "half_light radius" values, which are the values I would like to match. I've pasted my read/plot code below. How would I do this?
newvid = asciitable.read('user4.cat')
n_new = newvid['n']
re_new = newvid['re']
number = newvid['number']
standard_deviation = newvid['standard_deviation']
plt.title('sersic parameter vs. standard deviation distribution of noise')
plt.xlabel('standard deviation')
plt.ylabel('sersic parameter')
plt.xlim(0,12)
plt.ylim(0,5)
plt.scatter(standard_deviation, n_new)
plt.show()
plt.title('half-light radius vs. standard deviation distribution of noise')
plt.xlabel('standard deviation')
plt.ylabel('half-light radius')
plt.xlim(0,12)
plt.ylim(-2,15)
plt.scatter(standard_deviation,re_new)
plt.show()

To do what I think you want, you'll have to use the plot function instead of scatter in order to connect the lines. Depending on how your data is arranged, you may have to split or sort your data, so that you can plot all points of each number at once, sorted by standard deviation.
Try this:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
newvid = asciitable.read('user4.cat')
n_new = newvid['n']
re_new = newvid['re']
number = newvid['number']
std_dev = newvid['standard_deviation']
n_max = float(number.max()) # for coloring later
plt.figure()
plt.title('sersic parameter vs. standard deviation distribution of noise')
plt.xlabel('standard deviation')
plt.ylabel('sersic parameter')
plt.xlim(0,12)
plt.ylim(0,5)
for n in np.unique(number):
n_mask = number == n # pick out just where n_new is the current n
order = np.argsort(std_dev[n_mask]) # sort by std_dev, so line doesn't zig zag
plt.plot(std_dev[n_mask][order], n_new[n_mask][order],
label=str(n), color=cm.jet(n/n_max)) # label and color by n
plt.legend()
plt.show()
plt.figure()
plt.title('half-light radius vs. standard deviation distribution of noise')
plt.xlabel('standard deviation')
plt.ylabel('half-light radius')
plt.xlim(0,12)
plt.ylim(-2,15)
# do one plot per number
for n in np.unique(number):
n_mask = number == n # pick out just where n_new is the current n
order = np.argsort(std_dev[n_mask]) # sort by std_dev, so line doesn't zig zag
plt.plot(std_dev[n_mask][order], re_new[n_mask][order],
label=str(n), color=cm.jet(n/n_max)) # label and color by n
plt.legend()
plt.show()
With random data:
To do a colorbar instead of a legend:
m = cm.ScalarMappable(cmap=cm.jet)
m.set_array(number)
plt.colorbar(m)

Related

Histogram line of best fit line is jagged and not smooth?

I can't quite seem to figue out how to get my curve to be displayed smoothly instead of having so many sharp turns.
I am hoping to show a boltzmann probability distribution. With a nice smooth curve.
I'll expect it is a simple fix but I can't see it. Can someone please help?
My code is below:
from matplotlib import pyplot as plt
import numpy as np
import scipy.stats
dE = 1
N = 500
n = 10000
# This is creating an array filled with all twos
def Create_Array(N):
Particle_State_List_set = np.ones(N, dtype = int)
Particle_State_List_twos = Particle_State_List_set + 1
return(Particle_State_List_twos)
Array = Create_Array(N)
def Select_Random_index(N):
Seed = np.random.default_rng()
Partcle_Index = Seed.integers(low=0, high= N - 1)
return(Partcle_Index)
def Exchange(N):
Particle_Index_A = Select_Random_index(N) #Selects a particle to be used as particle "a"
Particle_Index_B = Select_Random_index(N) #Selects a particle to be used as particle "b"
# Checks to see if the energy on particle "a" is zero, if so it selects anbother until it isn't.
while Array[Particle_Index_A] == 1:
Particle_Index_A = Select_Random_index(N)
#This loop is making sure that Particle "a" and "b" aren't the same particle, it chooses again until the are diffrent.
while Particle_Index_B == Particle_Index_A:
Particle_Index_B = Select_Random_index(N)
# This assignes variables to the chosen particle's energy values
a = Array[Particle_Index_A]
b = Array[Particle_Index_B]
# This updates the values of the Energy levels of the interacting particles
Array[Particle_Index_A] = a - dE
Array[Particle_Index_B] = b + dE
return (Array[Particle_Index_A], Array[Particle_Index_B])
for i in range(n):
Exchange(N)
# This part is making the histogram the curve will be made from
_, bins, _ = plt.hist(Array, 12, density=1, alpha=0.15, color="g")
# This is using scipy to find the mean and standard deviation in order to plot the curve
mean, std = scipy.stats.norm.fit(Array)
# This part is drawing the best fit line, using the established bins value and the std and mean from before
best_fit = scipy.stats.norm.pdf(bins, mean, std)
# Plotting the best fit curve
plt.plot(bins, best_fit, color="r", linewidth=2.5)
#These are instructions on how python with show the graph
plt.title("Boltzmann Probablitly Curve")
plt.xlabel("Energy Value")
plt.ylabel('Percentage at this Energy Value')
plt.tick_params(top=True, right=True)
plt.tick_params(direction='in', length=6, width=1, colors='0')
plt.grid()
plt.show()
Whats happening is that in these lines:
best_fit = scipy.stats.norm.pdf(bins, mean, std)
plt.plot(bins, best_fit, color="r", linewidth=2.5)
'bins' the histogram bin edges is being used as the x coordinates of the data points forming the best fit line. The resulting plot is jagged because they are so widely spaced. Instead you can define a tighter packed set of x coordinates and use that:
bfX = np.arange(bins[0],bins[-1],.05)
best_fit = scipy.stats.norm.pdf(bfX, mean, std)
plt.plot(bfX, best_fit, color="r", linewidth=2.5)
For me that gives a nice smooth curve, but you can always use a tighter packing than .05 if its not to your liking yet.

How to plot for frequency only?

Question
How can I plot the following scenario, just like shown in the attached image? This is for the purpose of visualising frequency allocation in a network
Scenario
I have a range of frequency values in a list-tuple like so, where the 1st value is the centre frequency, 2nd is total width, 3rd is guard band:
frequencies = [('195.71250000', '59.00000000', '2.50000000'), ('195.78750000', '59.00000000', '2.50000000'), ('195.86250000', '59.00000000', '2.50000000')]
and the range of these values are:
range = [('191.32500000', '196.12500000')]
Note: These are dummy values, the actual data is much larger but follows the same general structure
There are several ways to create this plot. One way is to use ax.vlines to plot the dashed lines for the frequencies and to use ax.bar for the rectangles representing the frequency ranges.
Here is an example where the frequencies are occupied at regular intervals within the range you have given (boundaries included) but with widths of randomly varying size. No guards are computed seeing as they should be automatically apparent thanks to the position of the frequencies and the widths, as far as I understand.
Also, the widths are much smaller compared to the sample data you have provided, else the bars will be very wide and will all overlap with one another, which would look very different from the image you have shared.
import numpy as np # v 1.19.2
import matplotlib.pyplot as plt # v 3.3.2
# Create sample dataset
rng = np.random.default_rng(seed=1) # random number generator
frequencies = np.arange(191.325, 196.125, step=0.3)
widths = rng.uniform(0.05, 0.25, size=frequencies.size)
# Create figure with single Axes and loop through frequencies and widths to plot
# vertical dashed lines for the frequencies and bars for the widths
fig, ax = plt.subplots(figsize=(10,3))
for freq, width in zip(frequencies, widths):
ax.vlines(x=freq, ymin=0, ymax=10, colors='tab:blue', linestyle='--', zorder=1)
ax.bar(x=freq, height=6, width=width, color='tab:blue', zorder=2)
# Additional formatting
ax.set_xlabel('Frequency (THZ)', labelpad=15, size=12)
ax.set_xticks(frequencies[::2])
ax.yaxis.set_visible(False)
for spine in ['top', 'left', 'right']:
ax.spines[spine].set_visible(False)
plt.show()

Matplotlib - Contourf - How to have a non-uniform ticks spacing?

I would like to plot contourf with (lat,depth,temp) and then have similar spacing as in the figure below (the temperature vary more near the surface then at depth, so I want to emphasized this region).
My depth array is not uniform (i.e. depth = [5,15,...,4975,5185,...]. I want to have such non-uniform vertical spacing.
I would like to show yticks = [10,100,500,1000,1500,2000,3000,4000,5000], and depth array does not have those exact values.
z = np.arange(0,50) # I want uniform spacing
pos = ([0,2,5,10,15,20,30,40,48]) # I want some yticks (not all of them)
ax=plt.contourf(lat,z,temp) # temp is a variable with dimensions (lat,depth)
plt.colorbar()
plt.gca().yaxis.set_ticks(pos) # Set some yticks, not all of them
plt.yticks(z[pos],depth[pos].astype(int)) # Replace the dummy values of z-array by something meaningful
plt.gca().invert_yaxis()
plt.grid(linestyle=':')
plt.gca().set(ylabel='depth (m)',xlabel='Latitude')'''
Potential Temperature of the Atlantic Ocean:
Per the matplotlib docs on yticks, you can specify the labels you want to use. In your case, if you want to show the labels [10,100,500,1000,1500,2000,3000,4000,5000] you can simply pass that list as the second argument in plt.yticks(), like so
plt.yticks(z[pos], [10,100,500,1000,1500,2000,3000,4000,5000])
and it will display the yticks accordingly. The issue arises in the specification of the positions - since the depth array does not have points corresponding exactly to the desired ytick values you will need to interpolate in order to find the exact position at which to place the labels. Unless the approximate positions specified in pos are already sufficient, in which case the above suffices.
If the depth data are not uniformly spaced then you can use numpy.interp to perform the interpolation, as shown below
import matplotlib.pyplot as plt
import numpy as np
# Create some depth data that is not uniformly spaced over [0, 5500]
depth = [(np.random.random() - 0.5)*25 + ii for ii in np.linspace(0, 5500, 50)]
lat = np.linspace(-75, 75, 50)
z = np.linspace(0,50, 50)
yticks = [10,100,500,1000,1500,2000,3000,4000,5000]
# Interpolate depths to get z-positions
pos = np.interp(yticks, depth, z)
temp = np.outer(lat, z) # Arbitrarily populate temp for demonstration
ax = plt.contourf(lat,z,temp)
plt.colorbar()
plt.gca().yaxis.set_ticks(pos)
plt.yticks(pos,yticks) # Place yticks at interpolated z-positions
plt.gca().invert_yaxis()
plt.grid(linestyle=':')
plt.gca().set(ylabel='Depth (m)',xlabel='Latitude')
plt.show()
This will find the exact positions where the yticks would fall if the depth array had data at those positions and place them accordingly as shown below.

Plot histogram normalized by fixed parameter

I need to plot a plot a normalized histogram (by normalized I mean divided by a fixed value) using the histtype='step' style.
The issue is that plot.bar() doesn't seem to support that style and if I use instead plot.hist() which does, I can't (or at least don't know how) plot the normalized histogram.
Here's a MWE of what I mean:
import matplotlib.pyplot as plt
import numpy as np
def rand_data():
return np.random.uniform(low=10., high=20., size=(200,))
# Generate data.
x1 = rand_data()
# Define histogram params.
binwidth = 0.25
x_min, x_max = x1.min(), x1.max()
bin_n = np.arange(int(x_min), int(x_max + binwidth), binwidth)
# Obtain histogram.
hist1, edges1 = np.histogram(x1, bins=bin_n)
# Normalization parameter.
param = 5.
# Plot histogram normalized by the parameter defined above.
plt.ylim(0, 3)
plt.bar(edges1[:-1], hist1 / param, width=binwidth, color='none', edgecolor='r')
plt.show()
(notice the normalization: hist1 / param) which produces this:
I can generate a histtype='step' histogram using:
plt.hist(x1, bins=bin_n, histtype='step', color='r')
and get:
but then it wouldn't be normalized by the param value.
The step plot will generate the appearance that you want from a set of bins and the count (or normalized count) in those bins. Here I've used plt.hist to get the counts, then plot them, with the counts normalized. It's necessary to duplicate the first entry in order to get it to actually have a line there.
(a,b,c) = plt.hist(x1, bins=bin_n, histtype='step', color='r')
a = np.append(a[0],a[:])
plt.close()
step(b,a/param,color='r')
This is not quite right, because it doesn't finish the plot correctly. the end of the line is hanging in free space rather than dropping down the x axis.
you can fix that by adding a 0 to the end of 'a' and one more bin point to b
a=np.append(a[:],0)
b=np.append(b,(2*b[-1]-b[-2]))
step(b,a/param,color='r')
lastly, the ax.step mentioned would be used if you had used
fig, ax = plt.subplots()
to give you access to the figure and axis directly. For examples, see http://matplotlib.org/examples/ticks_and_spines/spines_demo_bounds.html
Based on tcaswell's comment (use step) I've developed my own answer. Notice that I need to add elements to both the x (one zero element at the beginning of the array) and y arrays (one zero element at the beginning and another at the end of the array) so that step will plot the vertical lines at the beginning and the end of the bars.
Here's the code:
import matplotlib.pyplot as plt
import numpy as np
def rand_data():
return np.random.uniform(low=10., high=20., size=(5000,))
# Generate data.
x1 = rand_data()
# Define histogram params.
binwidth = 0.25
x_min, x_max = x1.min(), x1.max()
bin_n = np.arange(int(x_min), int(x_max + binwidth), binwidth)
# Obtain histogram.
hist1, edges1 = np.histogram(x1, bins=bin_n)
# Normalization parameter.
param = 5.
# Create arrays adding elements so plt.bar will plot the first and last
# vertical bars.
x2 = np.concatenate((np.array([0.]), edges1))
y2 = np.concatenate((np.array([0.]), (hist1 / param), np.array([0.])))
# Plot histogram normalized by the parameter defined above.
plt.xlim(min(edges1) - (min(edges1) / 10.), max(edges1) + (min(edges1) / 10.))
plt.bar(x2, y2, width=binwidth, color='none', edgecolor='b')
plt.step(x2, y2, where='post', color='r', ls='--')
plt.show()
and here's the result:
The red lines generated by step are equal to those blue lines generated by bar as can be seen.

How can I plot ca. 20 million points as a scatterplot?

I am trying to create a scatterplot with matplotlib that consists of ca. ca. 20 million data points. Even after setting the alpha value to its lowest before ending up with no visible data at all the result is just a completely black plot.
plt.scatter(timedPlotData, plotData, alpha=0.01, marker='.')
The x-axis is a continuous timeline of about 2 months and the y-axis consists of 150k consecutive integer values.
Is there any way to plot all the points so that their distribution over time is still visible?
Thank you for your help.
There's more than one way to do this. A lot of folks have suggested a heatmap/kernel-density-estimate/2d-histogram. #Bucky suggesed using a moving average. In addition, you can fill between a moving min and moving max, and plot the moving mean over the top. I often call this a "chunkplot", but that's a terrible name. The implementation below assumes that your time (x) values are monotonically increasing. If they're not, it's simple enough to sort y by x before "chunking" in the chunkplot function.
Here are a couple of different ideas. Which is best will depend on what you want to emphasize in the plot. Note that this will be rather slow to run, but that's mostly due to the scatterplot. The other plotting styles are much faster.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
np.random.seed(1977)
def main():
x, y = generate_data()
fig, axes = plt.subplots(nrows=3, sharex=True)
for ax in axes.flat:
ax.xaxis_date()
fig.autofmt_xdate()
axes[0].set_title('Scatterplot of all data')
axes[0].scatter(x, y, marker='.')
axes[1].set_title('"Chunk" plot of data')
chunkplot(x, y, chunksize=1000, ax=axes[1],
edgecolor='none', alpha=0.5, color='gray')
axes[2].set_title('Hexbin plot of data')
axes[2].hexbin(x, y)
plt.show()
def generate_data():
# Generate a very noisy but interesting timeseries
x = mdates.drange(dt.datetime(2010, 1, 1), dt.datetime(2013, 9, 1),
dt.timedelta(minutes=10))
num = x.size
y = np.random.random(num) - 0.5
y.cumsum(out=y)
y += 0.5 * y.max() * np.random.random(num)
return x, y
def chunkplot(x, y, chunksize, ax=None, line_kwargs=None, **kwargs):
if ax is None:
ax = plt.gca()
if line_kwargs is None:
line_kwargs = {}
# Wrap the array into a 2D array of chunks, truncating the last chunk if
# chunksize isn't an even divisor of the total size.
# (This part won't use _any_ additional memory)
numchunks = y.size // chunksize
ychunks = y[:chunksize*numchunks].reshape((-1, chunksize))
xchunks = x[:chunksize*numchunks].reshape((-1, chunksize))
# Calculate the max, min, and means of chunksize-element chunks...
max_env = ychunks.max(axis=1)
min_env = ychunks.min(axis=1)
ycenters = ychunks.mean(axis=1)
xcenters = xchunks.mean(axis=1)
# Now plot the bounds and the mean...
fill = ax.fill_between(xcenters, min_env, max_env, **kwargs)
line = ax.plot(xcenters, ycenters, **line_kwargs)[0]
return fill, line
main()
For each day, tally up the frequency of each value (a collections.Counter will do this nicely), then plot a heatmap of the values, one per day. For publication, use a grayscale for the heatmap colors.
My recommendation would be to use a sorting and moving average algorithm on the raw data before you plot it. This should leave the mean and trend intact over the time period of interest while providing you with a reduction in clutter on the plot.
Group values into bands on each day and use a 3d histogram of count, value band, day.
That way you can get the number of occurrences in a given band on each day clearly.

Categories

Resources