Appending sample number to X-Labels in altair - python

I would like to automatically append the sample # (in parentheses) corresponding to the x-labels of an altair figure. I am open to doing this outside of altair, but I thought there may be a way to do it at the figure level using altair/vega-lite. I am pasting the code using an example from the altair/vega website (part of the vega_dataset), but with a hackneyed, manual method in which I rename the labels explicitly for one of the labels. In this case, I have added the sample number of 73 to Europe.
Link to data
import altair as alt
from vega_datasets import data
df = data.cars()
df['Origin'] = df['Origin'].replace({'Europe':'Europe (n=73)'})
alt.Chart(df).transform_density(
'Miles_per_Gallon',
as_=['Miles_per_Gallon', 'density'],
extent=[5, 50],
groupby=['Origin']
).mark_area(orient='horizontal').encode(
y='Miles_per_Gallon:Q',
color='Origin:N',
x=alt.X(
'density:Q',
stack='center',
impute=None,
title=None,
axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
),
column=alt.Column(
'Origin:N',
header=alt.Header(
titleOrient='bottom',
labelOrient='bottom',
labelPadding=0,
),
)
).properties(
width=100
).configure_facet(
spacing=0
).configure_view(
stroke=None
)

You could use pandas to generate the replacement dictionary and assign it to a new dataframe column:
import altair as alt
from vega_datasets import data
df = data.cars()
group_sizes = df.groupby('Origin').size()
replace_dict = group_sizes.index + ' (n=' + group_sizes.astype(str) + ')'
df['Origin_with_count'] = df['Origin'].replace(replace_dict)
alt.Chart(df).transform_density(
'Miles_per_Gallon',
as_=['Miles_per_Gallon', 'density'],
extent=[5, 50],
groupby=['Origin_with_count', 'Origin']
).mark_area(orient='horizontal').encode(
y='Miles_per_Gallon:Q',
color='Origin:N',
x=alt.X(
'density:Q',
stack='center',
impute=None,
title=None,
axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
),
column=alt.Column(
'Origin_with_count:N',
header=alt.Header(
title=None,
labelOrient='bottom',
labelPadding=0,
),
)
).properties(
width=100
).configure_facet(
spacing=0
).configure_view(
stroke=None
)
You might be able to do something more elegant with labelExpr, not sure.

You could overlay a text mark with the count instead.
I was able to do this with the following code. I was not able to manage the y position of the text (see commented-out line) or use the n datum in the header labelExpr for some reason.
df = data.cars()
violin = alt.Chart(df).transform_density(
'Miles_per_Gallon',
as_=['Miles_per_Gallon', 'density'],
extent=[5, 50],
groupby=['Origin']
).mark_area(orient='horizontal').encode(
y='Miles_per_Gallon:Q',
color='Origin:N',
x=alt.X(
'density:Q',
stack='center',
impute=None,
title=None,
axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
),
).properties(width=100)
text = alt.Chart(df).mark_text().transform_aggregate(
cnt='count()',
groupby=["Origin"]
).transform_calculate(
n = "'n=' + datum.cnt",
).encode(
# y=alt.Y('mean(Miles_per_Gallon):Q'),
text=alt.Text('n:N'),
)
(violin + text).facet(
column=alt.Column('Origin:N'),
).configure_header(
labelExpr="[datum.value, datum.n]",
)

Related

Altair Chart Conditional Text Opacity

Given the image below some values are smaller than 50. I want values smaller than 50 not to be on the chart.
Is it possible to hide, remove or shrink opacity for this?
Code is:
import altair as alt
from vega_datasets import data
import streamlit as st
source=data.barley()
bars = alt.Chart(source).mark_bar().encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
color=alt.Color('site')
)
text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
detail='site:N',
text=alt.Text('sum(yield):Q', format='.1f')
)
st.altair_chart(bars + text, theme="streamlit", use_container_width=True)
You can use an aggregate transform to have access to the aggregated values in a condition and filter based on a threshold:
import altair as alt
from vega_datasets import data
base = alt.Chart(data.barley())
bars = base.mark_bar().encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
color=alt.Color('site'),
)
text = base.mark_text(dx=-2, color='white', align='right').transform_aggregate(
yield_sum='sum(yield)',
groupby=['variety', 'site']
).encode(
x=alt.X('yield_sum:Q', stack='zero'),
y=alt.Y('variety:N'),
text=alt.Text('yield_sum:Q', format='.0f'),
opacity=alt.condition('datum.yield_sum > 50', alt.value(1), alt.value(0)),
order='site' # This is needed because the transform_aggregate return a different order of the values than the bar chart
)
bars + text

Merge two legends in altair

I have a scatter plot in altair where I am representing a column using both shape and color. I would like to have a single legend with both pieces of information, but instead I am getting two legends, one for shape and another for color.
The code is as follows. See this notebook for a reproducible example (you will need to enter your google credentials to load the data).
import altair as alt
alt.themes.enable('fivethirtyeight')
selection = alt.selection_multi(fields=['Domain'], bind='legend')
chart = alt.Chart(df, width=1100, height=600,
title="Parameter count of ML systems through time")\
.mark_point(size=120, filled=False).encode(
x=alt.X('Publication date:T'),
y=alt.Y('Parameters:Q',
scale=alt.Scale(type='log', domain=(1, 3e13)),
axis=alt.Axis(format=".1e")),
color=alt.Color('Domain',
sort=['Vision', 'Language', 'Games', 'Other'],
legend=alt.Legend(
values = ['Vision', 'Language', 'Games', 'Other'],),),
shape = alt.Shape('Domain'),#, legend=None),
tooltip=['System',
'Reference',
'Publication date',
alt.Tooltip('Parameters', format=".1e"),
'Domain'],
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
)
regression = chart.transform_regression(
on="Publication date",
regression="Parameters",
method = 'exp',
groupby=["Domain"],
).mark_line(point=False, strokeDash=[10,5], clip=True)
alt.layer(chart.add_selection(selection), regression).configure_axis(
labelFontSize=20,titleFontSize=30).configure_legend(
titleFontSize=20,
labelFontSize =18,
gradientLength=400,
gradientThickness=30,
symbolSize = 130,
)
How can I merge both legends into a single one?
You can set the legend to None in the line chart for shape and color and then use resolve_scale as per the comments on the question:
import altair as alt
from vega_datasets import data
df = data.cars()
selection = alt.selection_multi(fields=['Origin'], bind='legend')
chart = alt.Chart(df).mark_point(filled=False).encode(
x=alt.X('Acceleration'),
y=alt.Y('Horsepower',scale=alt.Scale(type='log'), axis=alt.Axis(format=".1e")),
color='Origin',
shape='Origin',
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
)
regression = chart.transform_regression(
on="Acceleration", regression="Horsepower", groupby=["Origin"]
).mark_line(
).encode(color=alt.Color('Origin', legend=None), shape=alt.Shape('Origin', legend=None))
(alt.layer(chart, regression)
.resolve_scale(shape='independent', color='independent')
.add_selection(selection))

Deal with overlapping in multiple x-axes in plotly python

I am trying to create a plot using plotly with multiple axes. And for this, I am using the following code:
#Plotly libraries and options for graphic logic
from plotly.io import to_html
import plotly.io as pio
pio.renderers.default='browser'
import plotly.graph_objects as go
#Generic libraries
import pandas as pd
import numpy as np
from datetime import datetime
input_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv')
threshold =2.8
name_yaxis="Gap"
input_df["AAPL.High"] = (input_df["AAPL.High"]-min(input_df["AAPL.High"]))*(threshold)/(max(input_df["AAPL.High"])-min(input_df["AAPL.High"]))+np.random.uniform(0.3,0.4,1)
ID_TAIL = "ID_1"
fig = go.Figure()
fig.add_trace(go.Scatter(x=input_df['Date'], y=input_df['AAPL.High'],
mode='lines+markers',
marker_size=12,
line = dict(color="#C4C4C4"),
marker=dict(color=( (0 < input_df['AAPL.High']) & (input_df['AAPL.High'] < threshold)).astype('int'),
colorscale=[[0, '#A51890'], [1, '#3BBFFE']]
),
showlegend=False,
xaxis="x1",
name = ""
)
)
my_x = [ID_TAIL + "_" +format(i, '04d') + "_0" for i in range(1,input_df.shape[0])]
fig.add_trace(go.Scatter(x=my_x, y=input_df['AAPL.High'],
mode='lines+markers',
marker_size=12,
line = dict(color="#C4C4C4"),
marker=dict(color=( (0 < input_df['AAPL.High']) & (input_df['AAPL.High'] < threshold)).astype('int'),
colorscale=[[0, '#A51890'], [1, '#3BBFFE']]
),
showlegend=False,
xaxis="x2",
name = ""
)
)
#== Add title boxes ==#
# Add title legend for box status
fig.add_annotation( text="<b>Health status<b>", xref="paper", yref="paper",
x=1.02, xanchor="left",
y=0.9, yanchor="bottom", # Same y as legend below
showarrow=False,
font = dict(family = "Roboto", size = 10))
#== End ==#
My problem is that as you can see in the following image, the ticks are overlapping:
So, my question is, how to create space between them?
Thanks in advance.
Here's a quick fix. Pop this line at the bottom of your code, and it will move xaxis2 to the top of the graph:
fig.update_layout({'xaxis2': {'side': 'top', 'tickangle': 45, 'nticks': 50}})
Output:
Shifting the secondary xaxis to the top will look like this.
Another Option:
Another approach would be to concatenate the axis titles into a single string, and display the concatenated string on the x-axis. This SO answer demonstrates this logic.
You can reduce the number of ticks by adding the following line
fig.update_layout(xaxis={'nticks': 8, 'tickangle': 90}, xaxis2={'nticks': 8, 'tickangle': 90})
Depending on the size of the plot, ticks may still overlap. In that case, you can either further reduce the tick number or hardcode the tick positions:
tickvalsX = ['2015-07', '2016-01', '2016-07', '2017-01']
tickvalsY = ['ID_1_0001_0', 'ID_1_00100_0', 'ID_1_0200_0', 'ID_1_0300_0', 'ID_1_0400_0', 'ID_1_0500_0']
fig.update_layout(xaxis={'tickmode': 'array', 'tickangle': 90, 'tickvals': tickvalsX}, xaxis2={'tickmode': 'array', 'tickangle': 90, 'tickvals': tickvalsY})
Further style elements of the axis you can find in the Plotly reference.

Rolling average on a layered faceted chart in Altair

I successfully got layers to work in faceted charts and rolling average to work in layered charts. I now want to sort of combine the two i.e have a rolling average in a layered faceted chart.
Intuitively combining the two gives me an error -
Javascript Error: Cannot read property 'concat' of undefined
This usually means there's a typo in your chart specification. See the javascript console for the full traceback.
Code (gives the above error):
# Data Preparation
df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
idf = df[df['Country/Region'] == 'India']
idf = idf[df.columns[4:]]
idf = idf.T
idf = idf.reset_index()
idf.columns = ['day', 'case']
idf['country'] = 'india'
gdf = df[df['Country/Region'] == 'Germany']
gdf = gdf[df.columns[4:]]
gdf = gdf.T
gdf = gdf.reset_index()
gdf.columns = ['day', 'case']
gdf['country'] = 'germany'
fdf = pd.concat([idf,gdf])
# Charting
a = alt.Chart().mark_bar(opacity=0.5).encode(
x='day:T',
y='case:Q'
)
c = alt.Chart().mark_line().transform_window(
rolling_mean='mean(case:Q)',
frame=[-7, 0]
).encode(
x='day:T',
y='rolling_mean:Q'
)
alt.layer(a, c, data=fdf).facet(alt.Column('country', sort=alt.EncodingSortField('case', op='max', order='descending')))
If you remove the transform_window and replace y='rolling_mean:Q' with y='case:Q', you'd get a layered faceted chart. It is this chart on which I want a 7 day rolling average.
You should replace your window transform with this:
.transform_window(
rolling_mean='mean(case)',
frame=[-7, 0],
groupby=['country']
)
There were two issues with your original transform:
type shorthands are only used in encodings, never in transforms. When you wrote mean(case:Q), you were specifying a rolling mean of the field named "case:Q", which does not exist.
since you are faceting by country, you need to group by country when computing the rolling mean.
The result looks like this:
Try to use transform_window by sort=[{'field': 'date'}]
https://vega.github.io/vega-lite/docs/window.html#cumulative-frequency-distribution
Or:
https://altair-viz.github.io/gallery/scatter_marginal_hist.html
https://altair-viz.github.io/gallery/layered_chart_with_dual_axis.html#layered-chart-with-dual-axis
https://altair-viz.github.io/gallery/parallel_coordinates.html#parallel-coordinates-example
import altair as alt
from vega_datasets import data
source = data.iris()
alt.Chart(source).transform_window(
index='count()'
).transform_fold(
['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth']
).mark_line().encode(
x='key:N',
y='value:Q',
color='species:N',
detail='index:N',
opacity=alt.value(0.5)
).properties(width=500)
https://altair-viz.github.io/user_guide/compound_charts.html?highlight=repeat#horizontal-concatenation
import altair as alt
from vega_datasets import data
iris = data.iris.url
chart1 = alt.Chart(iris).mark_point().encode(
x='petalLength:Q',
y='petalWidth:Q',
color='species:N'
).properties(
height=300,
width=300
)
chart2 = alt.Chart(iris).mark_bar().encode(
x='count()',
y=alt.Y('petalWidth:Q', bin=alt.Bin(maxbins=30)),
color='species:N'
).properties(
height=300,
width=100
)

Keep altair sliders with plots when concatenating

When concatenating 2 charts with their own sliders, the sliders are grouped together at the end. Is there a way to have the sliders remain with each plot?
Here is an example, modified from the docs
import altair.vegalite.v3 as alt
import pandas as pd
import numpy as np
rand = np.random.RandomState(42)
df = pd.DataFrame({"xval": range(100), "yval": rand.randn(100).cumsum()})
slider1 = alt.binding_range(min=0, max=100, step=1, name="cutoff1:")
selector1 = alt.selection_single(
name="SelectorName1", fields=["cutoff1"], bind=slider1, init={"cutoff1": 50}
)
slider2 = alt.binding_range(min=0, max=100, step=1, name="cutoff2:")
selector2 = alt.selection_single(
name="SelectorName2", fields=["cutoff2"], bind=slider2, init={"cutoff2": 50}
)
ch_base = (
alt.Chart(df)
.mark_point()
.encode(
x="xval",
y="yval",
color=alt.condition(
alt.datum.xval < selector1.cutoff1, alt.value("red"), alt.value("blue")
),
)
)
ch1 = ch_base.add_selection(selector1)
ch2 = ch_base.encode(
color=alt.condition(
alt.datum.xval < selector2.cutoff2, alt.value("red"), alt.value("blue")
)
).add_selection(selector2)
ch1 & ch2
As seen in the image, the sliders are by default grouped next to each other:
Sliders always appear at the bottom of the full chart. There is currently no way to change this.
If you would like this feature to exist in the future, I would suggest submitting a feature request in Vega-Lite.
As a workaround, you can create two charts, and embed them in a single document using vega-embed, although when you do this it is not trivial to pass signals between the two charts.

Categories

Resources