I am trying to incorporate a gradient fill with multiple histograms using seaborn facet grid where the gradient is determined by the spread of values under each curve, not just by a sequence of row or col using hue. There are some links below that partly perform somewhat similar functions in python:
How to fill histogram with gradient color fills a diverging gradient but each histogram is independent of the others so comparison between histograms is somewhat void. Using the figure below each histogram should be relative to the others. Furthermore, it does not use the seaborn facet grid, which is the central question here.
How to generate series of histograms doesn't plot histograms. It just fills the area under a curve.
I've found a few images displaying what I'm hoping to execute but they all seem to be generated in R with nothing in python. My assumption is the functionality doesn't exist as yet using seaborn and I'll have to use R but I think this will be applicable for many users.
Using the code below, we can change adjust the gradient using hue to either row or col but this doesn't consider the area under the curve.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(120)
g = np.tile(list("ABCD"), 30)
h = np.tile(list("XYZ"), 40)
# Generate df
df = pd.DataFrame(dict(x = x, g = g, h = h))
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(4, rot = -0.25, light = 0.7)
g = sns.FacetGrid(df, col = 'h', hue = 'h', row = 'g', aspect = 3, height= 1, palette = pal)
# Draw the densities
g = g.map(sns.kdeplot, 'x', shade = True, alpha = 0.8, lw = 1, bw = 0.8)
g = g.map(sns.kdeplot, 'x', color= 'w', lw = 1, bw = 0.8)
g = g.map(plt.axhline, y = 0, lw = 1)
# Adjust title and axis labels directly
g.axes[0,0].set_ylabel('L 1')
g.axes[1,0].set_ylabel('L 2')
g.axes[2,0].set_ylabel('L 3')
g.axes[3,0].set_ylabel('L 4')
g.axes[0,0].set_title('Top 1')
g.axes[0,1].set_title('Top 2')
g.axes[0,2].set_title('Top 3')
g.axes[1,0].set_title('')
g.axes[1,1].set_title('')
g.axes[1,2].set_title('')
g.axes[2,0].set_title('')
g.axes[2,1].set_title('')
g.axes[2,2].set_title('')
g.axes[3,0].set_title('')
g.axes[3,1].set_title('')
g.axes[3,2].set_title('')
g.set_axis_labels(x_var = 'Total Amount')
g.set(yticks = [])
Out:
There is a gradient that can be adjusted for row or col but I'm hoping to pass this gradient to the area underneath each histogram curve. Similar to the figure above. So the area underneath each curve would be lighter when lower than zero and darker when higher than zero.
Even adjusting the area under the curve to the median value may suffice.
You can create an image gradient, and use the histogram itself as a clipping path for the image, so that the only visible part is the part under the curve.
As such, you can play around with any cmaps and normalization that are available when creating images.
Here is a quick example:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(120)
g = np.tile(list("ABCD"), 30)
h = np.tile(list("XYZ"), 40)
# Generate df
df = pd.DataFrame(dict(x = x, g = g, h = h))
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(4, rot = -0.25, light = 0.7)
g = sns.FacetGrid(df, col = 'h', hue = 'h', row = 'g', aspect = 3, height= 1, palette = pal)
# Draw the densities
g = g.map(sns.kdeplot, 'x', shade = True, alpha = 0.8, lw = 1, bw = 0.8)
g = g.map(sns.kdeplot, 'x', color= 'w', lw = 1, bw = 0.8)
g = g.map(plt.axhline, y = 0, lw = 1)
for ax in g.axes.flat:
ax.set_title("")
# Adjust title and axis labels directly
for i in range(4):
g.axes[i,0].set_ylabel('L {:d}'.format(i))
for i in range(3):
g.axes[0,i].set_title('Top {:d}'.format(i))
# generate a gradient
cmap = 'coolwarm'
x = np.linspace(0,1,100)
for ax in g.axes.flat:
im = ax.imshow(np.vstack([x,x]), aspect='auto', extent=[*ax.get_xlim(), *ax.get_ylim()], cmap=cmap, zorder=10)
path = ax.collections[0].get_paths()[0]
patch = matplotlib.patches.PathPatch(path, transform=ax.transData)
im.set_clip_path(patch)
g.set_axis_labels(x_var = 'Total Amount')
g.set(yticks = [])
Related
I have this power spectrum that I am trying to find the Nu_max in. I have done so by using a gaussian filter, however, I have some noise in the start of the signal.
Is there somehow to balance this or remove it from my curve?
Or a way to remove this, or at least make it better?
Since it is obviously affecting Nu_Max.
import matplotlib.pyplot as plt
import pandas as pd
from scipy.ndimage import gaussian_filter[enter image description here][1]
from scipy.signal import find_peaks
import numpy as np
# Importing data
power_list = pd.read_table(r'G:\Downloads\New_power_list_KIC.txt')
frequency_list = pd.read_table(r'G:\Downloads\List_Frequency.txt')
# Gaussian filter
df = gaussian_filter(power_list, sigma=1800)
List = np.linspace(0, 141699, num = int(141699) , endpoint = True)
for i in range(141699):
List[i] = df[i]
# Finding peaks
peaks = find_peaks(List, height =0, threshold = None, distance = None, prominence = 3, width = None)
print(peaks[1])
print(peaks[1].keys() )
height = peaks[1]['peak_heights'] #List of heights
peak_pos = frequency_list.iloc[peaks[0]] #List of peak pos
data_peak = peak_pos
# Plotting
fig = plt.figure()
ax = fig.subplots()
# plt.ylim([0,100])
# plt.xlim([115,120])
# plt.ylim([0,90])
# plt.xlim([70,75])
# plt.ylim([0,120])
# plt.xlim([75,80])
# plt.ylim([0,150])
# plt.xlim([85,90])
# plt.ylim([0,150])
# plt.xlim([95,100])
# plt.ylim([0,80])
# plt.xlim([105,110])
# plt.ylim([0,50])
# plt.xlim([100,120])
plt.xlim([0,130])
plt.ylim([0,100])
plt.xlabel('Frequeny(μHz)')
plt.ylabel('Power density p.p.m')
ax.plot(frequency_list*1E6, df-10, 'k', linewidth=.5)
# ax.scatter(peak_pos*1E6, height, c='r', s = 10, marker = 'D', label = 'Maxima')
ax.legend()
ax.grid()
plt.show()
Smoothened graph
I'm totally new at using Python for Power BI (or anything really).
I would like to add the value of the bar/scatter at the end of the line. (the datalabel)
Also to have a version where I could have the label inside of the scatter bubble would be cool.
Anyone who could help out here ?
All help appreciated
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a dataframe
df = pd.DataFrame({'group': dataset.Genre , 'values': dataset.Revenue})
val = list(dataset.SelectedGenre)
# Reorder it following the values:
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# Create a color if the group is "B"
my_color=np.where(ordered_df ['group']== val, 'orange', 'skyblue')
my_size=np.where(ordered_df ['group']== val , 150, 150)
# The vertival plot is made using the hline function
# I load the seaborn library only to benefit the nice looking feature
import seaborn as sns
val = ordered_df['values']
plt.hlines(y=my_range, xmin=0, xmax=val, color=my_color, alpha=1 , linewidth=8)
plt.scatter(val, my_range, color=my_color, s=my_size, alpha=1)
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("What about the B group?", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
plt.box(False) #Turn of Black bx around visual
plt.show()
Found it myself
import matplotlib.pyplot as plt
import numpy as np
# Data
x = dataset.Revenue
y = dataset.Genre
labels = dataset.Revenue
val = list(dataset.SelectedGenre)
# Create the figure and axes objects
fig, ax = plt.subplots(1, figsize=(10, 6))
fig.suptitle('Example Of Labelled Scatterpoints')
my_color=np.where(y == val, 'orange', 'skyblue')
my_size=np.where( y == val , 2000, 2000)
# Plot the scatter points
ax.scatter(x, y,
color= my_color, # Color of the dots
s=1000, # Size of the dots
alpha=1, # Alpha of the dots
linewidths=1) # Size of edge around the dots
ax.hlines(y, xmin=0, xmax=x, color= my_color, alpha=1 , linewidth=8)
def human_format(num):
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000
# add more suffixes if you need them
return '%.0f%s' % (round(num), ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
# Add the participant names as text labels for each point
for x_pos, y_pos, label in zip(x, y, labels):
ax.annotate(
human_format(label), # The label for this point
xy=(x_pos, y_pos), # Position of the corresponding point
xytext=(-8, 0), # Offset text by 7 points to the right
textcoords='offset points', # tell it to use offset points
ha='left', # Horizontally aligned to the left
va='center',
color = 'white') # Vertical alignment is centered
plt.box(False) #Turn of Black bx around visual
# Show the plot
plt.show()
I have tried this and got the result as in the image:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list("", ["red","grey","green"])
df = pd.read_csv('t.csv', header=0)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax = ax1.twiny()
# Scatter plot of positive points, coloured blue (C0)
ax.scatter(np.argwhere(df['real'] > 0), df.loc[df['real'] > 0, 'real'], color='C2')
# Scatter plot of negative points, coloured red (C3)
ax.scatter(np.argwhere(df['real'] < 0), df.loc[df['real'] < 0, 'real'], color='C3')
# Scatter neutral values in grey (C7)
ax.scatter(np.argwhere(df['real'] == 0), df.loc[df['real'] == 0, 'real'], color='C7')
ax.set_ylim([df['real'].min(), df['real'].max()])
index = len(df.index)
ymin = df['prediction'].min()
ymax= df['prediction'].max()
ax1.imshow([np.arange(index),df['prediction']],cmap=cmap,
extent=(0,index-1,ymin, ymax), alpha=0.8)
plt.show()
Image:
I was expecting one output where the color is placed according to the figure. I am getting green color and no reds or greys.
I want to get the image or contours spread as the values are. How I can do that? See the following image, something similar:
Please let me know how I can achieve this. The data I used is here: t.csv
For a live version, have a look at Tensorflow Playground
There are essentially 2 tasks required in a solution like this:
Plot the heatmap as the background;
Plot the scatter data;
Output:
Source code:
import numpy as np
import matplotlib.pyplot as plt
###
# Plot heatmap in the background
###
# Setting up input values
x = np.arange(-6.0, 6.0, 0.1)
y = np.arange(-6.0, 6.0, 0.1)
X, Y = np.meshgrid(x, y)
# plot heatmap colorspace in the background
fig, ax = plt.subplots(nrows=1)
im = ax.imshow(X, cmap=plt.cm.get_cmap('RdBu'), extent=(-6, 6, -6, 6), interpolation='bilinear')
cax = fig.add_axes([0.21, 0.95, 0.6, 0.03]) # [left, bottom, width, height]
fig.colorbar(im, cax=cax, orientation='horizontal') # add colorbar at the top
###
# Plot data as scatter
###
# generate the points
num_samples = 150
theta = np.linspace(0, 2 * np.pi, num_samples)
# generate inner points
circle_r = 2
r = circle_r * np.random.rand(num_samples)
inner_x, inner_y = r * np.cos(theta), r * np.sin(theta)
# generate outter points
circle_r = 4
r = circle_r + np.random.rand(num_samples)
outter_x, outter_y = r * np.cos(theta), r * np.sin(theta)
# plot data
ax.scatter(inner_x, inner_y, s=30, marker='o', color='royalblue', edgecolors='white', linewidths=0.8)
ax.scatter(outter_x, outter_y, s=30, marker='o', color='crimson', edgecolors='white', linewidths=0.8)
ax.set_ylim([-6,6])
ax.set_xlim([-6,6])
plt.show()
To keep things simple, I kept the colorbar range (-6, 6) to match the data range.
I'm sure this code can be changed to suit your specific needs. Good luck!
Here is a possible solution.
A few notes and questions:
What are the 'prediction' values in your data file? They do not seem to correlate with the values in the 'real' column.
Why do you create a second axis? What is represented on the bottom X-axis in your plot? I removed the second axis and labelled the remaining axes (index and real).
When you slice a pandas DataFrame, the index comes with it. You don't need to create a separate index (argwhere and arange(index) in your code). I simplified the first part of the code, where scatterplots are produced.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list("", ["red","grey","green"])
df = pd.read_csv('t.csv', header=0)
print(df)
fig = plt.figure()
ax = fig.add_subplot(111)
# Data limits
xmin = 0
xmax = df.shape[0]
ymin = df['real'].min()
ymax = df['real'].max()
# Scatter plots
gt0 = df.loc[df['real'] > 0, 'real']
lt0 = df.loc[df['real'] < 0, 'real']
eq0 = df.loc[df['real'] == 0, 'real']
ax.scatter(gt0.index, gt0.values, edgecolor='white', color='C2')
ax.scatter(lt0.index, lt0.values, edgecolor='white', color='C3')
ax.scatter(eq0.index, eq0.values, edgecolor='white', color='C7')
ax.set_ylim((ymin, ymax))
ax.set_xlabel('index')
ax.set_ylabel('real')
# We want 0 to be in the middle of the colourbar,
# because gray is defined as df['real'] == 0
if abs(ymax) > abs(ymin):
lim = abs(ymax)
else:
lim = abs(ymin)
# Create a gradient that runs from -lim to lim in N number of steps,
# where N is the number of colour steps in the cmap.
grad = np.arange(-lim, lim, 2*lim/cmap.N)
# Arrays plotted with imshow must be 2D arrays. In this case it will be
# 1 pixel wide and N pixels tall. Set the aspect ratio to auto so that
# each pixel is stretched out to the full width of the frame.
grad = np.expand_dims(grad, axis=1)
im = ax.imshow(grad, cmap=cmap, aspect='auto', alpha=1, origin='bottom',
extent=(xmin, xmax, -lim, lim))
fig.colorbar(im, label='real')
plt.show()
This gives the following result:
What I want is like this:
What I get is this:
So how to merge the markers into one label?
also for the lines, for the lines, of course, u can realize it by not assigning label to the second line while using the same linetype, but for the markers, you can not, since they are of different shapes.
Note that in recent versions of matplotlib you can achieve this using class matplotlib.legend_handler.HandlerTuple as illustrated in this answer and also in this guide:
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
fig, ax1 = plt.subplots(1, 1)
# First plot: two legend keys for a single entry
p2, = ax1.plot([3, 4], [2, 3], 'o', mfc="white", mec="k")
p1, = ax1.plot([1, 2], [5, 6], 's', mfc="gray", mec="gray")
# `plot` returns a list, but we want the handle - thus the comma on the left
p3, = ax1.plot([1, 5], [4, 4], "-k")
p4, = ax1.plot([2, 6], [3, 2], "-k")
# Assign two of the handles to the same legend entry by putting them in a tuple
# and using a generic handler map (which would be used for any additional
# tuples of handles like (p1, p3)).
l = ax1.legend([(p1, p2), p3], ['data', 'models'],
handler_map={tuple: HandlerTuple(ndivide=None)})
plt.savefig("demo.png")
I think it's best to use a full legend - otherwise, how will your readers know the difference between the two models, or the two datasets? I would do it this way:
But, if you really want to do it your way, you can use a custom legend as shown in this guide. You'll need to create your own class, like they do, that defines the legend_artist method, which then adds squares and circles as appropriate. Here is the plot generated and the code used to generate it:
#!/usr/bin/env python
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
# ==================================
# Define the form of the function
# ==================================
def model(x, A=190, k=1):
return A * np.exp(-k*x/50)
# ==================================
# How many data points are generated
# ==================================
num_samples = 15
# ==================================
# Create data for plots
# ==================================
x_model = np.linspace(0, 130, 200)
x_data1 = np.random.rand(num_samples) * 130
x_data1.sort()
x_data2 = np.random.rand(num_samples) * 130
x_data2.sort()
data1 = model(x_data1, k=1) * (1 + np.random.randn(num_samples) * 0.2)
data2 = model(x_data2, k=2) * (1 + np.random.randn(num_samples) * 0.15)
model1 = model(x_model, k=1)
model2 = model(x_model, k=2)
# ==================================
# Plot everything normally
# ==================================
fig = plt.figure()
ax = fig.add_subplot('111')
ax.plot(x_data1, data1, 'ok', markerfacecolor='none', label='Data (k=1)')
ax.plot(x_data2, data2, 'sk', markeredgecolor='0.5', markerfacecolor='0.5', label='Data (k=2)')
ax.plot(x_model, model1, '-k', label='Model (k=1)')
ax.plot(x_model, model2, '--k', label='Model (k=2)')
# ==================================
# Format plot
# ==================================
ax.set_xlabel('Distance from heated face($10^{-2}$ m)')
ax.set_ylabel('Temperature ($^\circ$C)')
ax.set_xlim((0, 130))
ax.set_title('Normal way to plot')
ax.legend()
fig.tight_layout()
plt.show()
# ==================================
# ==================================
# Do it again, but with custom
# legend
# ==================================
# ==================================
class AnyObject(object):
pass
class data_handler(object):
def legend_artist(self, legend, orig_handle, fontsize, handlebox):
scale = fontsize / 22
x0, y0 = handlebox.xdescent, handlebox.ydescent
width, height = handlebox.width, handlebox.height
patch_sq = mpatches.Rectangle([x0, y0 + height/2 * (1 - scale) ], height * scale, height * scale, facecolor='0.5',
edgecolor='0.5', transform=handlebox.get_transform())
patch_circ = mpatches.Circle([x0 + width - height/2, y0 + height/2], height/2 * scale, facecolor='none',
edgecolor='black', transform=handlebox.get_transform())
handlebox.add_artist(patch_sq)
handlebox.add_artist(patch_circ)
return patch_sq
# ==================================
# Plot everything
# ==================================
fig = plt.figure()
ax = fig.add_subplot('111')
d1 = ax.plot(x_data1, data1, 'ok', markerfacecolor='none', label='Data (k=2)')
d2 = ax.plot(x_data2, data2, 'sk', markeredgecolor='0.5', markerfacecolor='0.5', label='Data (k=1)')
m1 = ax.plot(x_model, model1, '-k', label='Model (k=1)')
m2 = ax.plot(x_model, model2, '-k', label='Model (k=2)')
# ax.legend([d1], handler_map={ax.plot: data_handler()})
ax.legend([AnyObject(), m1[0]], ['Data', 'Model'], handler_map={AnyObject: data_handler()})
# ==================================
# Format plot
# ==================================
ax.set_xlabel('Distance from heated face($10^{-2}$ m)')
ax.set_ylabel('Temperature ($^\circ$C)')
ax.set_xlim((0, 130))
ax.set_title('Custom legend')
fig.tight_layout()
plt.show()
I also found this link very useful (code below), it's an easier way to handle this issue. It's basically using a list of legend handles to make one of the markers of the first handle invisible and overplot it with the marker of the second handle. This way, you have both markers next to each other with one label.
fig, ax = plt.subplots()
p1 = ax.scatter([0.1],[0.5],c='r',marker='s')
p2 = ax.scatter([0.3],[0.2],c='b',marker='o')
l = ax.legend([(p1,p2)],['points'],scatterpoints=2)
With the above code, a TupleHandler is used to create legend handles which
simply overplot two handles (there are red squares behind the blue
circles if you look carefylly. What you want to do is make the second
marker of first handle and the first marker of the second handle
invisible. Unfortunately, the TupleHandler is a rather recent addition
and you need a special function to get all the handles. Otherwise, you
can use the Legend.legendHandles attribute (it only show the first
handle for the TupleHandler).
def get_handle_lists(l):
"""returns a list of lists of handles.
"""
tree = l._legend_box.get_children()[1]
for column in tree.get_children():
for row in column.get_children():
yield row.get_children()[0].get_children()
handles_list = list(get_handle_lists(l))
handles = handles_list[0] # handles is a list of two PathCollection.
# The first one is for red squares, and the second
# is for blue circles.
handles[0].set_facecolors(["r", "none"]) # for the fist
# PathCollection, make the
# second marker invisible by
# setting their facecolor and
# edgecolor to "none."
handles[0].set_edgecolors(["k", "none"])
handles[1].set_facecolors(["none", "b"])
handles[1].set_edgecolors(["none", "k"])
fig
Here is a new solution that will plot any collection of markers with the same label. I have not figured out how to make it work with markers from a line plot, but you can probably do a scatter plot on top of a line plot if you need to.
from matplotlib import pyplot as plt
import matplotlib.collections as mcol
import matplotlib.transforms as mtransforms
import numpy as np
from matplotlib.legend_handler import HandlerPathCollection
from matplotlib import cm
class HandlerMultiPathCollection(HandlerPathCollection):
"""
Handler for PathCollections, which are used by scatter
"""
def create_collection(self, orig_handle, sizes, offsets, transOffset):
p = type(orig_handle)(orig_handle.get_paths(), sizes=sizes,
offsets=offsets,
transOffset=transOffset,
)
return p
fig, ax = plt.subplots()
#make some data to plot
x = np.arange(0, 100, 10)
models = [.05 * x, 8 * np.exp(- .1 * x), np.log(x + 1), .01 * x]
tests = [model + np.random.rand(len(model)) - .5 for model in models]
#make colors and markers
colors = cm.brg(np.linspace(0, 1, len(models)))
markers = ['o', 'D', '*', 's']
markersize = 50
plots = []
#plot points and lines
for i in xrange(len(models)):
line, = plt.plot(x, models[i], linestyle = 'dashed', color = 'black', label = 'Model')
plot = plt.scatter(x, tests[i], c = colors[i], s = markersize, marker = markers[i])
plots.append(plot)
#get attributes
paths = []
sizes = []
facecolors = []
edgecolors = []
for plot in plots:
paths.append(plot.get_paths()[0])
sizes.append(plot.get_sizes()[0])
edgecolors.append(plot.get_edgecolors()[0])
facecolors.append(plot.get_facecolors()[0])
#make proxy artist out of a collection of markers
PC = mcol.PathCollection(paths, sizes, transOffset = ax.transData, facecolors = colors, edgecolors = edgecolors)
PC.set_transform(mtransforms.IdentityTransform())
plt.legend([PC, line], ['Test', 'Model'], handler_map = {type(PC) : HandlerMultiPathCollection()}, scatterpoints = len(paths), scatteryoffsets = [.5], handlelength = len(paths))
plt.show()
I have a solution for you if you're willing to use all circles for markers and differentiate by color only. You can use a circle collection to represent the markers, and then have a legend label for the collection as a whole.
Example code:
import matplotlib.pyplot as plt
import matplotlib.collections as collections
from matplotlib import cm
import numpy as np
#make some data to plot
x = np.arange(0, 100, 10)
models = [.05 * x, 8 * np.exp(- .1 * x), np.log(x + 1), .01 * x]
tests = [model + np.random.rand(len(model)) - .5 for model in models]
#make colors
colors = cm.brg(np.linspace(0, 1, len(models)))
markersize = 50
#plot points and lines
for i in xrange(len(models)):
line, = plt.plot(x, models[i], linestyle = 'dashed', color = 'black', label = 'Model')
plt.scatter(x, tests[i], c = colors[i], s = markersize)
#create collection of circles corresponding to markers
circles = collections.CircleCollection([markersize] * len(models), facecolor = colors)
#make the legend -- scatterpoints needs to be the same as the number
#of markers so that all the markers show up in the legend
plt.legend([circles, line], ['Test', 'Model'], scatterpoints = len(models), scatteryoffsets = [.5], handlelength = len(models))
plt.show()
You can do this by plotting data without any label and then adding the label separately:
from matplotlib import pyplot as plt
from numpy import random
xs = range(10)
data = random.rand(10, 2)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
kwargs = {'color': 'r', 'linewidth': 2, 'linestyle': '--'}
ax.plot(xs, data, **kwargs)
ax.plot([], [], label='Model', **kwargs)
ax.legend()
plt.show()
I have this graph so far, it's kind of ugly. Each type of marker is an accuracy for a type of algorithm.
There are two problems with this:
I'd like there to be space between the content and the axises, but only show ticks for y [0, 1] and have the x axis show no negative values (no such thing as negative time).
I'd like to display the x values as their log transform, but keep the original values on the ticks so you can see the actual values.
For #1 I tried playing with xticks as you can see below but without much success.
For #2, it's simple to throw the data into an np.log10() but then the axis ticks are also log transformed. I feel like there should be a simple way to do this log display (seems like a pretty normal thing to do)?
Here's my code so far:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import random
# create fake data
data = {}
data['A'] = []
data['B'] = []
data['C'] = []
n = 5
data['A'] = zip(np.random.uniform(0, 10000, size=n), np.random.uniform(0, 0.6, size=n))
data['B'] = zip(np.random.uniform(0, 200, size=n), np.random.uniform(0, 0.6, size=n))
data['C'] = zip(np.random.uniform(0, 5000, size=n), np.random.uniform(0, 0.6, size=n))
# make graph
markers = ['+', '*', 'x']
colors = ['b', 'r', 'g']
fig = plt.figure()
ax1 = fig.add_subplot(111)
plots = []
labels = []
# extract data
i = 0
for algorithm in ['A', 'B', 'C']:
results = data[algorithm]
testing = np.array([float(x[1]) for x in results if x > 0.0])
ts = np.array([int(x[0]) for x in results if x > 0.0])
color = colors[i]
marker = markers[i]
plot = ax1.scatter(ts, testing, color=color, marker=marker, s=10)
plots.append(plot)
labels.append(algorithm)
i += 1
# set axis and title
ax1.legend(plots, labels, loc='lower right')
ax1.set_xlabel("Time (sec)")
ax1.set_ylabel("Testing Accuracy")
ax1.set_title("Time versus testing accuracy")
# set axis limits
xticks, xticklabels = plt.xticks()
xmin = (3*xticks[0] - xticks[1])/2.
xmax = (3*xticks[-1] - xticks[-2])/2.
plt.xlim(xmin, xmax)
plt.xticks(xticks)
plt.ylim(0.0, 1.0)
# save to disk
plt.savefig("scatter.eps")
why you are not doing this?
ax1.set_ylim( -.1, 1.1 )
ax1.set_yticks( np.linspace(0, 1, 10) )
ax1.set_xscale('log')