I have a scatter plot in altair where I am representing a column using both shape and color. I would like to have a single legend with both pieces of information, but instead I am getting two legends, one for shape and another for color.
The code is as follows. See this notebook for a reproducible example (you will need to enter your google credentials to load the data).
import altair as alt
alt.themes.enable('fivethirtyeight')
selection = alt.selection_multi(fields=['Domain'], bind='legend')
chart = alt.Chart(df, width=1100, height=600,
title="Parameter count of ML systems through time")\
.mark_point(size=120, filled=False).encode(
x=alt.X('Publication date:T'),
y=alt.Y('Parameters:Q',
scale=alt.Scale(type='log', domain=(1, 3e13)),
axis=alt.Axis(format=".1e")),
color=alt.Color('Domain',
sort=['Vision', 'Language', 'Games', 'Other'],
legend=alt.Legend(
values = ['Vision', 'Language', 'Games', 'Other'],),),
shape = alt.Shape('Domain'),#, legend=None),
tooltip=['System',
'Reference',
'Publication date',
alt.Tooltip('Parameters', format=".1e"),
'Domain'],
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
)
regression = chart.transform_regression(
on="Publication date",
regression="Parameters",
method = 'exp',
groupby=["Domain"],
).mark_line(point=False, strokeDash=[10,5], clip=True)
alt.layer(chart.add_selection(selection), regression).configure_axis(
labelFontSize=20,titleFontSize=30).configure_legend(
titleFontSize=20,
labelFontSize =18,
gradientLength=400,
gradientThickness=30,
symbolSize = 130,
)
How can I merge both legends into a single one?
You can set the legend to None in the line chart for shape and color and then use resolve_scale as per the comments on the question:
import altair as alt
from vega_datasets import data
df = data.cars()
selection = alt.selection_multi(fields=['Origin'], bind='legend')
chart = alt.Chart(df).mark_point(filled=False).encode(
x=alt.X('Acceleration'),
y=alt.Y('Horsepower',scale=alt.Scale(type='log'), axis=alt.Axis(format=".1e")),
color='Origin',
shape='Origin',
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
)
regression = chart.transform_regression(
on="Acceleration", regression="Horsepower", groupby=["Origin"]
).mark_line(
).encode(color=alt.Color('Origin', legend=None), shape=alt.Shape('Origin', legend=None))
(alt.layer(chart, regression)
.resolve_scale(shape='independent', color='independent')
.add_selection(selection))
Related
Given the image below some values are smaller than 50. I want values smaller than 50 not to be on the chart.
Is it possible to hide, remove or shrink opacity for this?
Code is:
import altair as alt
from vega_datasets import data
import streamlit as st
source=data.barley()
bars = alt.Chart(source).mark_bar().encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
color=alt.Color('site')
)
text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
detail='site:N',
text=alt.Text('sum(yield):Q', format='.1f')
)
st.altair_chart(bars + text, theme="streamlit", use_container_width=True)
You can use an aggregate transform to have access to the aggregated values in a condition and filter based on a threshold:
import altair as alt
from vega_datasets import data
base = alt.Chart(data.barley())
bars = base.mark_bar().encode(
x=alt.X('sum(yield):Q', stack='zero'),
y=alt.Y('variety:N'),
color=alt.Color('site'),
)
text = base.mark_text(dx=-2, color='white', align='right').transform_aggregate(
yield_sum='sum(yield)',
groupby=['variety', 'site']
).encode(
x=alt.X('yield_sum:Q', stack='zero'),
y=alt.Y('variety:N'),
text=alt.Text('yield_sum:Q', format='.0f'),
opacity=alt.condition('datum.yield_sum > 50', alt.value(1), alt.value(0)),
order='site' # This is needed because the transform_aggregate return a different order of the values than the bar chart
)
bars + text
I would like to have two legends via Altair just like the picture below.
I have created the legend of "Count of actors", but I don't know how to generate the other one. My code is below:
plot = base.mark_circle(
opacity=0.8,
stroke='black',
strokeWidth=1
).encode(
alt.X('TYPE:O'),
alt.Y('index:N',
sort= movies_order
),
alt.Size('count(index):Q',
scale=alt.Scale(range=[0,4500]),
legend=alt.Legend(title='Count of actors', symbolFillColor='white')),
alt.Color('GENDER', legend=None)
#complete this
).properties(
width=350,
height=880
And the chart I created is like this:
This is the default behavior in Altair, but you have disabled the color legend. Change alt.Color('GENDER', legend=None) to alt.Color('GENDER').
Here is a modifed example for the Altair gallery with two legends:
import altair as alt
from vega_datasets import data
source = data.cars()
alt.Chart(source).mark_circle().encode(
x='Horsepower',
y='Miles_per_Gallon',
color='Origin',
size='Cylinders')
I was trying to give text labels on some altair chart linked to a selected interval from another chart. I realize that the text given by "mark_text ()" doesn't show completely at the last points of the chart where the domain in the x-axis is specified to be the interval selected, also I didn't know how to specify the format so the dates will be given just as yyyy-mm or month-year (don't want to display the day).
Another thing that I realized, is when one specifies the tooltip doesn't show at all when the domain on the x-axis of the graph is also linked to an interval selected in another chart
, that's the reason I used the mark_text()
the code I'm using is the following
import altair as alt
from vega_datasets import data
nearest = alt.selection_single(nearest=True, on='mouseover',
encodings=['x','y'], empty='none')
interval = alt.selection_interval(encodings=['x'])
weather = data.seattle_weather()
base = alt.Chart(weather).mark_rule(size=2).encode(
x='date:T')
chart = base.mark_line().encode(
x=alt.X('date:T', scale=alt.Scale(domain=interval.ref())),
y='temp_max:Q',).properties(
width=800,
height=300)
text=base.mark_text(align='left', dx=5, dy=5).encode(
y='temp_max:Q',
text=alt.condition(nearest, 'label:N', alt.value(' '))
).transform_calculate(label='"Date: " + format(datum.date, "") '
).properties(selection=nearest,width=800,
height=300)
point=base.mark_point().encode(y='temp_max:Q',opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
view = base.mark_line().add_selection(
interval).properties(width=800, height=20)
(point+text+chart) &view
It looks like you're trying to create a tooltip using a layer, and this is the cause of many of the problems you're having. Have you considered using the tooltip encoding?
import altair as alt
from vega_datasets import data
nearest = alt.selection_single(nearest=True, on='mouseover',
encodings=['x','y'], empty='none')
interval = alt.selection_interval(encodings=['x'])
weather = data.seattle_weather()
line = alt.Chart(weather).mark_line().encode(
x=alt.X('date:T', scale=alt.Scale(domain=interval)),
y='temp_max:Q'
).properties(
width=800,
height=200
)
point = line.mark_point().encode(
tooltip='yearmonth(date):N',
opacity=alt.condition(nearest, alt.value(1), alt.value(0))
).add_selection(nearest)
view = alt.Chart(weather).mark_line().encode(
x='date:T',
).properties(
width=800,
height=20
).add_selection(interval)
(point + line) & view
I successfully got layers to work in faceted charts and rolling average to work in layered charts. I now want to sort of combine the two i.e have a rolling average in a layered faceted chart.
Intuitively combining the two gives me an error -
Javascript Error: Cannot read property 'concat' of undefined
This usually means there's a typo in your chart specification. See the javascript console for the full traceback.
Code (gives the above error):
# Data Preparation
df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
idf = df[df['Country/Region'] == 'India']
idf = idf[df.columns[4:]]
idf = idf.T
idf = idf.reset_index()
idf.columns = ['day', 'case']
idf['country'] = 'india'
gdf = df[df['Country/Region'] == 'Germany']
gdf = gdf[df.columns[4:]]
gdf = gdf.T
gdf = gdf.reset_index()
gdf.columns = ['day', 'case']
gdf['country'] = 'germany'
fdf = pd.concat([idf,gdf])
# Charting
a = alt.Chart().mark_bar(opacity=0.5).encode(
x='day:T',
y='case:Q'
)
c = alt.Chart().mark_line().transform_window(
rolling_mean='mean(case:Q)',
frame=[-7, 0]
).encode(
x='day:T',
y='rolling_mean:Q'
)
alt.layer(a, c, data=fdf).facet(alt.Column('country', sort=alt.EncodingSortField('case', op='max', order='descending')))
If you remove the transform_window and replace y='rolling_mean:Q' with y='case:Q', you'd get a layered faceted chart. It is this chart on which I want a 7 day rolling average.
You should replace your window transform with this:
.transform_window(
rolling_mean='mean(case)',
frame=[-7, 0],
groupby=['country']
)
There were two issues with your original transform:
type shorthands are only used in encodings, never in transforms. When you wrote mean(case:Q), you were specifying a rolling mean of the field named "case:Q", which does not exist.
since you are faceting by country, you need to group by country when computing the rolling mean.
The result looks like this:
Try to use transform_window by sort=[{'field': 'date'}]
https://vega.github.io/vega-lite/docs/window.html#cumulative-frequency-distribution
Or:
https://altair-viz.github.io/gallery/scatter_marginal_hist.html
https://altair-viz.github.io/gallery/layered_chart_with_dual_axis.html#layered-chart-with-dual-axis
https://altair-viz.github.io/gallery/parallel_coordinates.html#parallel-coordinates-example
import altair as alt
from vega_datasets import data
source = data.iris()
alt.Chart(source).transform_window(
index='count()'
).transform_fold(
['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth']
).mark_line().encode(
x='key:N',
y='value:Q',
color='species:N',
detail='index:N',
opacity=alt.value(0.5)
).properties(width=500)
https://altair-viz.github.io/user_guide/compound_charts.html?highlight=repeat#horizontal-concatenation
import altair as alt
from vega_datasets import data
iris = data.iris.url
chart1 = alt.Chart(iris).mark_point().encode(
x='petalLength:Q',
y='petalWidth:Q',
color='species:N'
).properties(
height=300,
width=300
)
chart2 = alt.Chart(iris).mark_bar().encode(
x='count()',
y=alt.Y('petalWidth:Q', bin=alt.Bin(maxbins=30)),
color='species:N'
).properties(
height=300,
width=100
)
For example you might want data like:
DATE,KEY,VALUE
2019-01-01,REVENUE,100
2019-01-01,COST,100.1
...
plotted as a time series BAR chart with little space in between the bars and no labels except for dates. The popup or legend would show you what the REV,COST cols were.
Basic bar chart with alt.Column, alt.X, alt.Y works but the labels and grouping are wrong. Is it possible to make the Column groups correspond to the x-axis and hide the X axis labels?
EDIT:
Latest best:
import altair as alt
import pandas as pd
m = 100
data = pd.DataFrame({
'DATE': pd.date_range('2019-01-01', freq='D', periods=m),
'REVENUE': np.random.randn(m),
'COST': np.random.randn(m),
}).melt('DATE', var_name='KEY', value_name='VALUE')
bars = alt.Chart(data, width=10).mark_bar().encode(
y=alt.Y('VALUE:Q', title=None),
x=alt.X('KEY:O', axis=None),
color=alt.Color('KEY:O', scale=alt.Scale(scheme='category20')),
tooltip=['DATE', 'KEY', 'VALUE'],
)
(bars).facet(
column=alt.Column(
'yearmonthdate(DATE):T', header=alt.Header(labelOrient="bottom",
labelAngle=-45,
format='%b %d %Y'
)
),
align="none",
spacing=0,
).configure_header(
title=None
).configure_axis(
grid=False
).configure_view(
strokeOpacity=0
)
Another post because I can't seem to add multiple images to the original one.
This is another way with another flaw: the bars are overlapping. Notice the dates however are handled properly because this is using an actual axis.
import altair as alt
import pandas as pd
import numpy as np
m = 250
data = pd.DataFrame({
'DATE': pd.date_range('2019-01-01', freq='D', periods=m),
'REVENUE': np.random.randn(m),
'COST': np.random.randn(m),
}).melt('DATE', var_name='KEY', value_name='VALUE')
# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
fields=['REVENUE'], empty='none')
# The basic line
line = alt.Chart(data).mark_bar(interpolate='basis').encode(
x='DATE:T',
y='VALUE:Q',
color='KEY:N'
).configure_bar(opacity=0.5)
line
You can create a grouped bar chart using a combination of encodings and facets, and you can adjust the axis titles and scales to customize the appearance. Here is an examle (replicating https://vega.github.io/editor/#/examples/vega/grouped-bar-chart in Altair, as you mentioned in your comment):
import altair as alt
import pandas as pd
data = pd.DataFrame([
{"category":"A", "position":0, "value":0.1},
{"category":"A", "position":1, "value":0.6},
{"category":"A", "position":2, "value":0.9},
{"category":"A", "position":3, "value":0.4},
{"category":"B", "position":0, "value":0.7},
{"category":"B", "position":1, "value":0.2},
{"category":"B", "position":2, "value":1.1},
{"category":"B", "position":3, "value":0.8},
{"category":"C", "position":0, "value":0.6},
{"category":"C", "position":1, "value":0.1},
{"category":"C", "position":2, "value":0.2},
{"category":"C", "position":3, "value":0.7}
])
text = alt.Chart(data).mark_text(dx=-10, color='white').encode(
x=alt.X('value:Q', title=None),
y=alt.Y('position:O', axis=None),
text='value:Q'
)
bars = text.mark_bar().encode(
color=alt.Color('position:O', legend=None, scale=alt.Scale(scheme='category20')),
)
(bars + text).facet(
row='category:N'
).configure_header(
title=None
)
original answer:
I had trouble parsing from your question exactly what you're trying to do (in the future please consider including a code snippet demonstrating what you've tried and pointing out why the result is not sufficient), but here is an example of a bar chart with data of this form, that has x axis labeled by only date, with a tooltip and legend showing the revenue and cost:
import altair as alt
import pandas as pd
data = pd.DataFrame({
'DATE': pd.date_range('2019-01-01', freq='D', periods=4),
'REVENUE': [100, 200, 150, 50],
'COST': [150, 125, 75, 80],
}).melt('DATE', var_name='KEY', value_name='VALUE')
alt.Chart(data).mark_bar().encode(
x='yearmonthdate(DATE):O',
y='VALUE',
color='KEY',
tooltip=['KEY', 'VALUE'],
)