Data value of a variable in each marker within subplots - python

I'm developing the following function: extract_name_value() that generates a step chart taking the values of a pandas DataFrame in Python, for now it works fine, but I want to add the values of the variable points_axisyvalue or values_list to it in each marker: Script Here
I tried to use the following examples:Data value at each marker, Matplotlib scatter plot with different text at each data point or How to put individual tags for a matplotlib scatter plot?, which would be something like what I want; also I even tried using plt.annotate(), but the data of the values does not come out the way I want it, plus I think it would cover up the graph a lot and not appreciate well. Below I put the code in which I'm using plt.annotate():
# Function to extract the Name and Value attributes
def extract_name_value(signals_df, rootXML):
# print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
num_axisx = len(signals_df["Name"])
values_list = [value for pos, value in enumerate(signals_df["Value"])]
print(values_list)
points_axisy = signals_df["Value"]
print(len(points_axisy))
colors = ['b', 'g', 'r', 'c', 'm', 'y']
# Creation Graphic
fig, ax = plt.subplots(nrows=num_names_list, figsize=(20, 30), sharex=True)
plt.suptitle(f'File XML: {rootXML}', fontsize=16, fontweight='bold', color='SteelBlue', position=(0.75, 0.95))
plt.xticks(np.arange(-1, num_axisx), color='SteelBlue', fontweight='bold')
labels = ['value: {0}'.format(j) for j in values_list]
print(labels)
i = 1
for pos, name in enumerate(names_list):
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
print(data)
# get color
j = random.randint(0, len(colors) - 1)
# get plots by index = pos
x = np.hstack([-1, data.index.values, len(signals_df) - 1])
y = np.hstack([0, data.values, data.iloc[-1]])
ax[pos].plot(x, y, drawstyle='steps-post', marker='o', color=colors[j], linewidth=3)
ax[pos].set_ylabel(name, fontsize=8, fontweight='bold', color='SteelBlue', rotation=30, labelpad=35)
ax[pos].yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
ax[pos].yaxis.set_tick_params(labelsize=6)
ax[pos].grid(alpha=0.4)
i += 1
for label, x, y in zip(labels, x, y):
plt.annotate(label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))
plt.show()
What I get is the annotations spliced and in different positions.
But, What does my code need to show each value at each point?
I've also been trying to use the code from the Matplotlib reference and couldn't get it done: Marker Reference. Thank you very much in advance, any comment helps.

You can use plt.annotate function in a loop to solve your problem.
I randomly generated some data and plotted it as a single plot. You can do the same inside a subplot, the function would be the same.
# sample data points for the plot
x=np.arange(1,10)
y=np.linspace(20,40,9)
plt.figure(figsize=[15,5],dpi=200)
plt.plot(x,y,drawstyle='steps-post', marker='o')
# using annotate function to show the changepoints in a loop
for i in range(len(x)):
# I rounded the y values as string and used the same x and y coords as the locations
# next we can give a constant offset points to offset the annotation from each value
# here I used (-20,20) as the offset values
plt.annotate(f"{str(round((y[i])))}",(x[i],y[i]),xycoords='data',
xytext=(-20,20), textcoords='offset points',color="r",fontsize=12,
arrowprops=dict(arrowstyle="->", color='black'))
You can remove the arrowprops if you don't want the arrows.
Edited
I used the example1.xml file in your GitHub repo and edited the function a bit. All I did was add a loop and an if-else condition to your function.
# Initial part is same as yours
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
num_axisx = len(signals_df["Name"])
values_list = [value for pos, value in enumerate(signals_df["Value"])]
points_axisy = signals_df["Value"]
colors = ['b', 'g', 'r', 'c', 'm', 'y']
# start new figure
plt.figure(figsize=[20,28],dpi=200)
#start a loop with the subplots
for i in range(len(names_list)):
# subplot has 14 rows, 1 column and the i+1 represents the i'th plot
plt.subplot(num_names_list,1,i+1)
# choose color
col=np.random.randint(0, len(colors) - 1)
# get the locations of the values with the similar name in your list
locs=signals_df['Name']==names_list[i]
# get the values in those locations
data=signals_df['Value'][locs]
# arrange the x and y coordinates
x = np.hstack([-1, data.index.values, len(signals_df) - 1])
y = np.hstack([0, data.values, data.iloc[-1]])
# plot the values as usual
plt.plot(x, y, drawstyle='steps-post', marker='o', color=colors[col], linewidth=3)
plt.ylabel(names_list[i], fontsize=8, fontweight='bold', color='SteelBlue', rotation=30, labelpad=35)
plt.grid(alpha=0.4)
# this loop is for annotating the values
for j in range(len(x)):
# I found it is better to alternate the position of the annotations
# so that they wont overlap for the adjacent values
if j%2==0:
# In this condition the xytext position is (-20,20)
# this posts the annotation box over the plot value
plt.annotate(f"Val={round((y[j]))}",(x[j],y[j]),xycoords='data',
xytext=(-20,20), textcoords='offset points',color="r",fontsize=8,
arrowprops=dict(arrowstyle="->", color='black'),
bbox=dict(boxstyle='round', pad=0.5, fc='yellow', alpha=0.5))
else:
# In this condition the xytext position is (-20,-20)
# this posts the annotation box under the plot value
plt.annotate(f"Val={round((y[j]))}",(x[j],y[j]),xycoords='data',
xytext=(-20,-20), textcoords='offset points',color="r",fontsize=8,
arrowprops=dict(arrowstyle="->", color='black'),
bbox=dict(boxstyle='round', pad=0.5, fc='yellow', alpha=0.5))
New Function Result
I hope that it is useful.

I think it should be quite close to what you are after. I randomly generate data, then annotate it using matplotlib.text. It's not very pretty, you might want to add some padding and more refinements, but I hope it gives a good idea!
If two points are too close, you might want to annotate one on the left, and the other one on the right, like I am doing for the first point. I have not seen such a situation in the examples that you have given, so it's not handled.
Function place_label(label, xy, position, ax, pad=0.01) places the label where you want it to be. The rest of the code is demonstrating that it works, using randomly generated data.
import random
import numpy as np
import matplotlib.pyplot as plt
# function that places the label give the desired position
def place_label(label, xy, position, ax, pad=0.01):
# annotate in the initial position, xy is the top right corner of the bounding box
t_ = ax.text(x=xy[0], y=xy[1], s=label, fontsize=16)
# find useful values
tbb = t_.get_window_extent(renderer=rend)
abb = ax.get_window_extent(renderer=rend)
a_xlim, a_ylim = ax.get_xlim(), a_.get_ylim()
# now adjust the position if needed
new_xy = [xy[0], xy[1]]
relative_width = tbb.width/abb.width * (a_xlim[1] - a_xlim[0])
pad_x = pad * (a_xlim[1] - a_xlim[0])
assert(position[0] in ['l', 'c', 'r'])
if position[0] == 'c':
new_xy[0] -= relative_width/2
elif position[0] == 'l':
new_xy[0] -= relative_width + pad_x
else:
new_xy[0] += pad_x
relative_height = tbb.height/abb.height * (a_ylim[1] - a_ylim[0])
pad_y = pad * (a_ylim[1] - a_ylim[0])
assert(position[1] in ['b', 'c', 't'])
if position[1] == 'c':
new_xy[1] -= relative_height/2
elif position[1] == 'b':
new_xy[1] -= relative_height + pad_y
else:
new_xy[1] += pad_y
t_.set_position(new_xy)
return t_
# generate data, plot it and annotate it!
axes_qty = 9
axes_gap = 0.035
fig = plt.figure(figsize=(10, 8))
ax = [plt.axes([axes_gap, axes_gap/2 + i*(1/axes_qty), 1-2*axes_gap, 1/axes_qty-axes_gap]) for i in range(axes_qty)]
rend = fig.canvas.get_renderer()
for a_ in ax:
x_ = [random.randint(0, 10) for _ in range(5)]
x_ = np.unique(x_)
y_ = [random.randint(0, 12) for _ in x_]
# as x is shared, we set the limits in advance, otherwise the adjustments won't be accurate
a_.set_xlim([-0.5, 10.5])
# plotting the data
data_ = [[x_[0], y_[0]]]
for i in range(1, len(x_)):
data_ += [[x_[i-1], y_[i]], [x_[i], y_[i]]]
a_.plot([d[0] for d in data_], [d[1] for d in data_])
mid_y = 0.5 * (a_.get_ylim()[0] + a_.get_ylim()[1])
# now let's label it
for i in range(len(x_)):
# decide what point we annotate
if i == 0:
xy = [x_ [0], y_[0]]
else:
xy = [x_[i-1], y_[i]]
# decide its position
position_0 = 'l' if i == 0 else 'r'
position_1 = 'b' if xy[1] > mid_y else 't'
place_label(label=str(xy[1]), xy=xy, position=position_0+position_1, ax=a_)
plt.show()

Related

How to combine multiple lines in a single legend entry for a quantity which depends on multiple variables?

Goal
Calculate two quantities q1 and q2, which both depend on two variables v1 and v2.
Plot both quantities for a select number of values of variable 1, as function of variable 2.
For each selected value for variable 1: list the value, as well as the average value for quantities 1 and 2 (+ error margins) in one legend entry
I have looked at this question, which shows desired output for a single (line + filled area), but could not get it to work with this more extensive problem.
Question
The main question is to create the legend as described above. Thanks!
Code
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
def calc_quant1(x,y):
q1 = x + y
q1err = np.sqrt(x+y)
return q1,q1err
def calc_quant2(x,y):
q2 = x - y
q2err = np.sqrt((x+y)/2)
return q2,q2err
# variables
var1 = np.linspace(-6,10,5)
var2 = np.linspace(10,20,21)
# resulting quantities (preallocation)
quant1 = np.zeros((len(var1),len(var2)))
quant2 = np.zeros((len(var1),len(var2)))
quant1err = np.zeros((len(var1),len(var2)))
quant2err = np.zeros((len(var1),len(var2)))
dy_dx = np.zeros(len(var2))
# plot colors
colors = cm.jet(np.linspace(0,1,len(var1)))
# reference value
xref = 12
# initialize figure
fig = plt.figure(figsize=(8,6))
fig.suptitle('Title')
ax = fig.add_subplot(111)
ax.axvline(x=xref,color='black',linestyle='dashed',label='$x={}$'.format(xref))
# compute and plot
for i in np.arange(len(var1)):
# select "measured" voltage
v1 = var1[i] * np.ones(len(var2))
v1err = var1err[i] * np.ones(len(var2))
# compute heat flux
quant1[i,:], quant1err[i,:] = calc_quant1(v1,var2)
quant2[i,:], quant2err[i,:] = calc_quant2(v1,var2)
# compute gradient near reference point (suboptimal)
x1 = var2[var2<xref][-1]
x2 = var2[var2>xref][0]
y1 = quant2[i][var2<xref][-1]
y2 = quant2[i][var2>xref][0]
dy_dx[i] = (y2-y1)/(x2-x1)
# plot results
label1 = r'$v_{{1}}={:.0f}$ [unit]'.format(var1[i])
label2 = r'$q_{{1}}(10)={:.0f}$ [unit]'.format(quant1[i,0])
ax.plot(var2,quant1[i,:], linestyle='dotted', linewidth=2, color=colors[i])
ax.plot(var2,quant2[i,:], linestyle='solid', linewidth=1, color=colors[i])
ax.fill_between(var2,quant1[i,:]-quant1err[i,:],quant1[i,:]+quant1err[i,:], alpha=0.16, color=colors[i])
ax.fill_between(var2,quant2[i,:]-quant2err[i,:],quant2[i,:]+quant2err[i,:], alpha=0.16, color=colors[i],\
label=label1+'\n'+label2)
# finalize figure
ax.set_xlim(np.min(var2),np.max(var2))
ax.set_xlabel('$v_{2}$ [unit]')
ax.set_ylabel('$q_{2}$ [unit]')
plt.tight_layout()
ax.annotate(text=r'$Q_{1}=v_{1}+v_{2}$',
xy=(0.50,0.77), xycoords='axes fraction',
xytext=(1.05,0.95), textcoords='axes fraction',
arrowprops=dict(arrowstyle='->',connectionstyle='arc3,rad=0.2'),
fontsize=20)
ax.annotate(text=r'$Q_{2}=v_{1}-v_{2}$',
xy=(0.70,0.35), xycoords='axes fraction',
xytext=(1.05,0.85), textcoords='axes fraction',
arrowprops=dict(arrowstyle='->',connectionstyle='arc3,rad=0.1'),
fontsize=20)
leg = ax.legend(loc='upper right', bbox_to_anchor=(1.475,0.775), labelspacing=1)
ax.grid()
plt.show()
Output

How to resize the x-axis and make it different from the y-axis Matplotlib

I have the following development that I am working on with ElementTree, Pandas and Matplotlib modules in Python:
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
fig = plt.figure(figsize=(18, 20))
plt.suptitle(f'File PXML: {rootXML}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
fig.tight_layout()
i = 1
for name in names_list:
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
x = [n for n in range(len(datax))]
print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
ax = plt.subplot(num_names_list, 1, i)
ax.plot(x, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax.set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax.grid(alpha=0.4)
i += 1
plt.show()
I am getting the following error:
I have been looking for the error and I totally understand that the dimensions of x and y must be equal, but there is the possibility of making a graph where the x-axis is greater than the y-axis? and also the x-axis comes from a variable not related to the y-axis? how would this be?
The x-axis is the count of all the values it has in the Signal element of the xml file: I put it here because of how extensive it is and this value is larger than the y-axis, but how to contemplate the 3 values that I bring from the xml that are Singal Name, Signal Value as y-axis and Count of Signals as x-axis. I really appreciate your comments and help.
IIUC, you are trying to plot several stepped values agains their order of appearance (X-index) in XML file. Then you should plot against original dataframe's X values. I haven't changed your code much for style or such, just fixed a little.
import xml.etree.ElementTree as ET
import pandas as pd
from matplotlib import pyplot as plt
import random
file_xml = 'example_un_child4.xml'
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
#fig = plt.figure(figsize=(18, 20), sharex=True)
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 15), sharex=True)
plt.suptitle(f'File PXML: {file_xml}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
#fig.tight_layout()
i = 1
for pos, name in enumerate(names_list):
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
#x = [n for n in range(len(datax))]
#print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
#ax[pos] = plt.subplot(num_names_list, 1, i)
ax[pos].plot(data.index, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax[pos].set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax[pos].grid(alpha=0.4)
i += 1
fig.tight_layout()
plt.show()
transfor_data_atri(file_xml)

Add Datalabel on the end of barchart (lollypop)

I'm totally new at using Python for Power BI (or anything really).
I would like to add the value of the bar/scatter at the end of the line. (the datalabel)
Also to have a version where I could have the label inside of the scatter bubble would be cool.
Anyone who could help out here ?
All help appreciated
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a dataframe
df = pd.DataFrame({'group': dataset.Genre , 'values': dataset.Revenue})
val = list(dataset.SelectedGenre)
# Reorder it following the values:
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# Create a color if the group is "B"
my_color=np.where(ordered_df ['group']== val, 'orange', 'skyblue')
my_size=np.where(ordered_df ['group']== val , 150, 150)
# The vertival plot is made using the hline function
# I load the seaborn library only to benefit the nice looking feature
import seaborn as sns
val = ordered_df['values']
plt.hlines(y=my_range, xmin=0, xmax=val, color=my_color, alpha=1 , linewidth=8)
plt.scatter(val, my_range, color=my_color, s=my_size, alpha=1)
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("What about the B group?", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
plt.box(False) #Turn of Black bx around visual
plt.show()
Found it myself
import matplotlib.pyplot as plt
import numpy as np
# Data
x = dataset.Revenue
y = dataset.Genre
labels = dataset.Revenue
val = list(dataset.SelectedGenre)
# Create the figure and axes objects
fig, ax = plt.subplots(1, figsize=(10, 6))
fig.suptitle('Example Of Labelled Scatterpoints')
my_color=np.where(y == val, 'orange', 'skyblue')
my_size=np.where( y == val , 2000, 2000)
# Plot the scatter points
ax.scatter(x, y,
color= my_color, # Color of the dots
s=1000, # Size of the dots
alpha=1, # Alpha of the dots
linewidths=1) # Size of edge around the dots
ax.hlines(y, xmin=0, xmax=x, color= my_color, alpha=1 , linewidth=8)
def human_format(num):
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000
# add more suffixes if you need them
return '%.0f%s' % (round(num), ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
# Add the participant names as text labels for each point
for x_pos, y_pos, label in zip(x, y, labels):
ax.annotate(
human_format(label), # The label for this point
xy=(x_pos, y_pos), # Position of the corresponding point
xytext=(-8, 0), # Offset text by 7 points to the right
textcoords='offset points', # tell it to use offset points
ha='left', # Horizontally aligned to the left
va='center',
color = 'white') # Vertical alignment is centered
plt.box(False) #Turn of Black bx around visual
# Show the plot
plt.show()

How to reuse a color from a cycle?

I have two plot statements in a loop. I would like them both to use the same color from a color cycle and only increment the cycle once. The object is to have each curve and its regression line the same color and only step though the entire cycle once. How can I do so?
colormap = plt.cm.hsv
ax.set_prop_cycle('color', [colormap(i) for i in np.linspace(0.0, 0.9, self.numplots)])
# plot the lines
j = 0
start = 40
incrmnt = 10
for line, rank in sortedSymbols:
series = getattr(self, line)["CLOSE"]
dates = pd.to_datetime(getattr(self, line)["DATE"]).dt.date
name = self.symToNames[line]
ax.plot(dates.iloc[-250:], series.iloc[-250:]/series.iloc[-250] * (start+j), label = name)
y = series.iloc[-1]/series.iloc[-250] * (start+j)
ax.annotate(name, xy=(1,y), xytext=(6,0), xycoords = ax.get_yaxis_transform(),
textcoords="offset points", size=10, va="center")
slope, intercept = np.polyfit(dates.iloc[-50:].index,
series.iloc[-50:]/series.iloc[-250] * (start+j),1)
ax.plot(dates.iloc[-50:], slope*dates.iloc[-50:].index+intercept)#, color = "gray")
j += incrmnt

Why is my line clipping in matplotlib?

I am trying to draw a series of lines. The lines are all the same length, and randomly switch colors for a random length (blue to orange). I am drawing the lines in blue and then overlaying orange on top. You can see from my picture there are clipped parts of the lines where it is grey. I cannot figure out why this is happening. Also related I believe is that my labels are not moving to a left alignment like they should. Any help is greatly appreciated.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import random
plt.close('all')
fig, ax = plt.subplots(figsize=(15,11))
def label(xy, text):
y = xy[1] - 2
ax.text(xy[0], y, text, ha="left", family='sans-serif', size=14)
def draw_chromosome(start, stop, y, color):
x = np.array([start, stop])
y = np.array([y, y])
line = mlines.Line2D(x , y, lw=10., color=color)
ax.add_line(line)
x = 50
y = 100
chr = 1
for i in range(22):
draw_chromosome(x, 120, y, "#1C2F4D")
j = 0
while j < 120:
print j
length = 1
if random.randint(1, 100) > 90:
length = random.randint(1, 120-j)
draw_chromosome(j, j+length, y, "#FA9B00")
j = j+length+1
label([x, y], "Chromosome%i" % chr)
y -= 3
chr += 1
plt.axis('equal')
plt.axis('off')
plt.tight_layout()
plt.show()
You're only drawing the blue background from x = 50 to x = 120.
Replace this line:
draw_chromosome(x, 120, y, "#1C2F4D")
with this:
draw_chromosome(0, 120, y, "#1C2F4D")
To draw the blue line all the way across.
Alternately, if you also want to move your labels to the left, you can just set x=0 instead of setting it to 50.
I suggest using LineCollection for this. Below is a little helper function I wrote based on the example at http://matplotlib.org/examples/pylab_examples/multicolored_line.html (it looks long, but there is a lot of comments + docstrings)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm
from matplotlib.ticker import NullLocator
from collections import OrderedDict
def binary_state_lines(ax, chrom_data, xmin=0, xmax=120,
delta_y=3,
off_color = "#1C2F4D",
on_color = "#FA9B00"):
"""
Draw a whole bunch of chromosomes
Parameters
----------
ax : Axes
The axes to draw stuff to
chrom_data : OrderedDict
The chromosome data as a dict, key on the label with a list of pairs
of where the data is 'on'. Data is plotted top-down
xmin, xmax : float, optional
The minimum and maximum limits for the x values
delta_y : float, optional
The spacing between lines
off_color, on_color : color, optional
The colors to use for the the on/off state
Returns
-------
collections : dict
dictionary of the collections added keyed on the label
"""
# base offset
y_val = 0
# make the color map and norm
cmap = ListedColormap([off_color, on_color])
norm = BoundaryNorm([0, 0.5, 1], cmap.N)
# sort out where the text should be
txt_x = (xmax + xmin) / 2
# dictionary to hold the returned artists
ret = dict()
# loop over the input data draw each collection
for label, data in chrom_data.items():
# increment the y offset
y_val += delta_y
# turn the high windows on to alternating
# high/low regions
x = np.asarray(data).ravel()
# assign the high/low state to each one
state = np.mod(1 + np.arange(len(x)), 2)
# deal with boundary conditions to be off
# at start/end
if x[0] > xmin:
x = np.r_[xmin, x]
state = np.r_[0, state]
if x[-1] < xmax:
x = np.r_[x, xmax]
state = np.r_[state, 0]
# make the matching y values
y = np.ones(len(x)) * y_val
# call helper function to create the collection
coll = draw_segments(ax, x, y, state,
cmap, norm)
ret[label] = coll
# set up the axes limits
ax.set_xlim(xmin, xmax)
ax.set_ylim(0, y_val + delta_y)
# turn off x-ticks
ax.xaxis.set_major_locator(NullLocator())
# make the y-ticks be labeled as per the input
ax.yaxis.set_ticks((1 + np.arange(len(chrom_data))) * delta_y)
ax.yaxis.set_ticklabels(list(chrom_data.keys()))
# invert so that the first data is at the top
ax.invert_yaxis()
# turn off the frame and patch
ax.set_frame_on(False)
# return the added artists
return ret
def draw_segments(ax, x, y, state, cmap, norm, lw=10):
"""
helper function to turn boundary edges into the input LineCollection
expects.
Parameters
----------
ax : Axes
The axes to draw to
x, y, state : array
The x edges, the y values and the state of each region
cmap : matplotlib.colors.Colormap
The color map to use
norm : matplotlib.ticker.Norm
The norm to use with the color map
lw : float, optional
The width of the lines
"""
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm)
lc.set_array(state)
lc.set_linewidth(lw)
ax.add_collection(lc)
return lc
An example:
synthetic_data = OrderedDict()
for j in range(21):
key = 'data {:02d}'.format(j)
synthetic_data[key] = np.cumsum(np.random.randint(1, 10, 20)).reshape(-1, 2)
fig, ax = plt.subplots(tight_layout=True)
binary_state_lines(ax, synthetic_data, xmax=120)
plt.show()
Separating the plotting logic from everything else will make your code easier to maintain and more reusable.
I also took the liberty of moving your labels from between the lines (where they can be ambiguous) to the yaxis tick labels.

Categories

Resources