PYTHON: How to get seaborn attributes to reset to default? - python

I cannot for the life of me get seaborn to go back to default settings.
I will place the code that I believe caused this issue, but I would recommend against running it unless you know how to fix it.
The culprit I believe is in the last chunk
sns.set(font_scale = 4)
before this question gets deleted because it's already been asked, I have tried the other posted solutions with no success. Just to name a quick few, resetting using sns.set(), sns.set_style(), sns.restore_defaults(). I have also tried resetting matplot settings to defaults as well. This attribute persists across all my files, so I cant even open a new file, delete the line of code that caused it, or run any past programs, or it will apply to those graphs too.
My seaborn version is 0.10.1, I have tried to update it, but I cant get it to go through. I am using anaconda's spyder IDE
The documentation says for versions after 0.8 that the styles/themes must be invoked to be reset, but if I try to use their solution sns.set_theme() I get an error saying that this module has no attribute.
I'm sure this persistence is considered a feature, but I desperately need it to go away!
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
if __name__ == '__main__':
#data prep
data_path = './assets/'
out_path = './output'
#scraping javascript map data via xml
endpoint = "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData"
data = requests.get(endpoint, params={"id": "US_MAP_DATA"}).json()
#convert to df and export raw data as csv
df = pd.DataFrame(data["US_MAP_DATA"])
path = os.path.join(out_path,'Raw_CDC_Data.csv')
df.to_csv(path)
#Remove last data point (Total USA)
df.drop(df.tail(1).index,inplace=True)
#Create DF of just 50 states
state_abbr =["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
states = df[df['abbr'].isin(state_abbr)]
#Adding NYC to state of NY
# FILL THIS IN LATER
#Graphing
plt.style.use('default')
sns.set()
#add new col survival rate and save
states['survival_rate']=states['tot_cases']-states['tot_death']
states.drop(df.columns[[0]],axis=1)
states.reset_index(drop=True)
path = os.path.join(out_path,'CDC_Data_By_State.csv')
states.to_csv(path)
#Stacked BarPlot
fig, ax = plt.subplots()
colors = ['#e5c5b5','#a8dda8']
r=range(0,len(states.index))
plt.bar(r,states['survival_rate'],color=colors[0])
#ax = stacked['survival_rate','tot_death'].plot.bar(stacked=True, color=colors, ax=ax)
fig, ax = plt.subplots()
plt.figure(figsize=(20,35))
sns.set(font_scale=4)
ax = sns.barplot(x='tot_cases',y='abbr',data=states)
ax.set(title='USA Covid-19 Cases by State', ylabel='State', xlabel='Confirmed Cases')
path = os.path.join(out_path,'Total_Deaths_Bar.png')
plt.savefig(path)

Related

Is there a way to control which vertices connect in a plotly.express.line_geo map?

I'm trying to make a connection map that has the option to use an animation_frame to show different months/years. Plotly.express has this option, but the plotly.express.line_geo maps seem to just attach the vertices of the network at random. I was looking at these examples from https://plotly.com/python/lines-on-maps/.
import plotly.express as px
df = px.data.gapminder().query("year == 2007")
fig = px.line_geo(df, locations="iso_alpha",
color="continent", # "continent" is one of the columns of gapminder
projection="orthographic")
fig.show()
Plotly.graph_objects allows you to map actual connections between vertices, but doesn't seem to have an animation option.
import plotly.graph_objects as go
import pandas as pd
df_airports = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_february_us_airport_traffic.csv')
df_airports.head()
df_flight_paths = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_february_aa_flight_paths.csv')
df_flight_paths.head()
fig = go.Figure()
flight_paths = []
for i in range(len(df_flight_paths)):
fig.add_trace(
go.Scattergeo(
locationmode = 'USA-states',
lon = [df_flight_paths['start_lon'][i], df_flight_paths['end_lon'][i]],
lat = [df_flight_paths['start_lat'][i], df_flight_paths['end_lat'][i]],
mode = 'lines',
line = dict(width = 1,color = 'red'),
opacity = float(df_flight_paths['cnt'][i]) / float(df_flight_paths['cnt'].max()),
)
)
fig.show()
Does anyone know of a way that i could make a map like the flight path map, but allow an animation option to look at the flight maps for different months/years?
you can animate any trace type using frames
taking sample flight path data used in question, have split it into groups based on first letter of start airport
there is no need to create a trace per flight, instead create pairs of start end locations in arrays separated by None
with this it is simple to create a frame with a trace for each group
then just create the figure from the frames, plus a default trace
add play button and slider
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
df_flight_paths = pd.read_csv(
"https://raw.githubusercontent.com/plotly/datasets/master/2011_february_aa_flight_paths.csv"
)
frames = []
# lets split data based on first letter of start airport
# create a frame for each grouping
bins = 6
for color, df in df_flight_paths.groupby(
pd.qcut(
df_flight_paths["airport1"].str[0].apply(ord),
q=bins,
labels=px.colors.qualitative.Plotly[:bins],
)
):
name = f'{df["airport1"].str[0].min()}-{df["airport1"].str[0].max()}'
frames.append(
go.Frame(
name=name,
data=go.Scattergeo(
lon=df.assign(nan=None)[["start_lon", "end_lon", "nan"]].values.ravel(),
lat=df.assign(nan=None)[["start_lat", "end_lat", "nan"]].values.ravel(),
mode="lines",
line=dict(width=1, color=color),
),
)
)
# now create figure and add play button and slider
go.Figure(
data=frames[0].data,
frames=frames,
layout={
"updatemenus": [
{
"type": "buttons",
"buttons": [{"label": "Play", "method": "animate", "args": [None]}],
}
],
"sliders": [
{
"active": 0,
"steps": [
{
"label": f.name,
"method": "animate",
"args": [[f.name]],
}
for f in frames
],
}
],
},
).update_geos(
scope="north america",
)

How to deal with extreme values on a Folium map

I have very little experience with folium maps.
I need to make a map with the number of establishments in each department, the problem is that the capital has far more establishments than the interior, so when I create the color layer I get the capital as dark blue and all the rest with the same lighter color. This way the map is not so useful...
How could I solve that? I thought of maybe dividing the value by the population but it would be better to use the original value.
In the documentation, I did not find a way to parameterize the color.
df1 = pd.DataFrame({'code':['75','77','78','91','92','93','94','95'],'value':['13000','2000','2500','2300','2150','2600','1630','1300']})
dep_geo = geopandas.read_file('./dep.json', driver="JSON") #geodata taken from https://github.com/gregoiredavid/france-geojson/blob/master/departements.geojson
departments = {'75', '77', '78', '91', '92', '93', '94', '95'}
dep_geo = dep_geo[dep_geo['code'].isin(departments)]
df_map = dep_geo.merge(df1, how="left", left_on=['code'], right_on=['code'])
my_map = folium.Map(location=[48.856614, 2.3522219],
zoom_start = 9, tiles='cartodbpositron')
folium.Choropleth(
geo_data=df_map,
data=df_map,
columns=['code',"value"],
key_on="feature.properties.code",
fill_color='YlGnBu',
fill_opacity=0.5,
line_opacity=0.2,
legend_name="value ",
smooth_factor=0,
Highlight= True,
line_color = "black",
name = "value",
show=False,
overlay=True,
nan_fill_color = "White"
).add_to(my_map)
Result:
Thank you for your help!
it's as simple as using vmax argument. I've set to 85th percentile
also used geopandas explore() to generate the folium map
import geopandas as gpd
import pandas as pd
import folium
df1 = pd.DataFrame(
{
"code": ["75", "77", "78", "91", "92", "93", "94", "95"],
"value": ["13000", "2000", "2500", "2300", "2150", "2600", "1630", "1300"],
}
)
# dep_geo = geopandas.read_file('./dep.json', driver="JSON") #geodata taken from https://github.com/gregoiredavid/france-geojson/blob/master/departements.geojson
dep_geo = gpd.read_file(
"https://github.com/gregoiredavid/france-geojson/raw/master/departements.geojson"
) # geodata taken from https://github.com/gregoiredavid/france-geojson/blob/master/departements.geojson
departments = {"75", "77", "78", "91", "92", "93", "94", "95"}
dep_geo = dep_geo[dep_geo["code"].isin(departments)]
df_map = dep_geo.merge(df1, how="left", left_on=["code"], right_on=["code"])
df_map["value"] = pd.to_numeric(df_map["value"])
df_map.explore(
column="value",
cmap="YlGnBu",
vmax=df_map["value"].quantile(0.85),
style_kwds=dict(
color="rgba(0,0,0,.2)",
),
location=[48.856614, 2.3522219],
zoom_start=9,
tiles="cartodbpositron",
)

Continuous Color Scale Adjusts on Filtering

I am working on a presidential elections project that involves filtering a choropleth map. My data is at the county level, and I have a drop down box that allows a user to select a state. The counties are colored by a blue to red continuous color scale representing the lean from democrat to republican. The variable I use for the color scale is the margin between the vote of both parties.
If the margin is positive, the county should be colored a shade of blue. If the margin is negative, the county should colored a shade of red.
However, when I filter to a particular state and all counties in that state voted for one party, the scale finds the lowest margin value and assigns that a color on the blue end of the spectrum even if that county voted more for the Republican.
Is there a way to fix the color scale when filtering so the counties are colored correctly?
Here is some example code:
import pandas as pd
import dash
import os
from urllib.request import urlopen
import json
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
with urlopen(
"https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
) as response:
counties = json.load(response)
data = [
["Delaware", "Kent County", 10001, 0.467, 0.517, -75.513210, 39.156876],
["Delaware", "New Castle County", 10003, 0.322, 0.663, -75.513210, 39.156876],
["Delaware", "Sussex County", 10005, 0.559, 0.428, -75.513210, 39.156876],
["District of Columbia", "District of Columbia",11001,0.0712,0.913,-77.014468,38.910270],
["Rhode Island", "Bristol County",44001,0.2429,0.7352,-71.41572,41.65665],
["Rhode Island", "Kent County",44003,0.45117,0.5275,-71.41572,41.65665],
["Rhode Island", "Newport County",44005,0.3406,0.6389,-71.41572,41.65665],
["Rhode Island", "Providence County",44007,0.3761,0.605177,-71.41572,41.65665],
["Rhode Island", "Washington County",44009,0.392032,0.5857,-71.41572,41.65665]
]
data = pd.DataFrame(
data,
columns=[
"State",
"County",
"fips_code",
"perc_gop",
"perc_dem",
"lon",
"lat",
],
)
state_choices = data["State"].sort_values().unique()
data['margin_perc'] = data['perc_dem'] - data['perc_gop']
app = dash.Dash(__name__, assets_folder=os.path.join(os.curdir, "assets"))
server = app.server
app.layout = html.Div([
html.Div([
dcc.Dropdown(
id="dropdown1",
options=[{"label": i, "value": i} for i in state_choices],
value=state_choices[0],
)
],style={"width": "100%", "display": "inline-block", "text-align": "center"}
),
# State Map with County Choropleth
html.Div([
dcc.Graph(id="state_map")],
style={"width": "100%", "display": "inline-block", "text-align": "center"},
)
]
)
#app.callback(Output("state_map", "figure"), Input("dropdown1", "value"))
def update_figure3(state_select):
new_df = data[data["State"] == state_select]
avg_lat = new_df["lat"].mean()
avg_lon = new_df["lon"].mean()
fig = px.choropleth_mapbox(
new_df,
geojson=counties,
locations="fips_code",
color="margin_perc",
color_continuous_scale="balance",
mapbox_style="carto-positron",
zoom=6,
center={"lat": avg_lat, "lon": avg_lon},
opacity=0.5,
labels={
"State": "State",
"County": "County",
"perc_gop": "% Republican",
"perc_dem": "% Democratic",
"margin_perc":"% Margin"
},
hover_data={
"fips_code": False,
"State": True,
"County": True,
"perc_gop": True,
"perc_dem": True,
},
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
return fig
app.run_server(host="0.0.0.0", port="8051")
Figured it out --> needed to read documentation more carefully :/
The color_continuous_midpoint argument came in handy. Just calculated the midpoint for the color variable for the entire dataset and used that as the fixed midpoint in the scale.

Python DataReader - Update with new information

import pandas as pd
from pandas_datareader import data as wb
tickers = ["MMM", "ABT", "ABBV", "ABMD", "ACN", "ATVI", "ADBE", "AMD", "AAP", "AES", "AFL", "A", "APD", "AKAM", "ALK", "ALB", "ARE", "ALXN", "ALGN", "ALLE", "LNT", "ALL", "GOOGL", "GOOG", "MO", "AMZN", "AMCR", "AEE", "AAL", "AEP", "AXP", "AIG", "AMT", "AWK", "AMP", "ABC", "AME", "AMGN", "APH", "ADI", "ANSS", "ANTM", "AON", "AOS", "APA", "AAPL", "AMAT", "APTV", "ADM", "ANET", "AJG", "AIZ", "T", "ATO", "ADSK", "ADP", "AZO", "AVB", "AVY", "BKR", "BLL", "BAC", "BK", "BAX", "BDX", "BBY", "BIO", "BIIB", "BLK", "BA", "BKNG", "BWA", "BXP", "BSX", "BMY", "AVGO", "BR", "CHRW", "COG", "CDNS", "CZR", "CPB", "COF", "CAH", "KMX", "CCL", "CARR", "CTLT", "CAT", "CBOE", "CBRE", "CDW", "CE", "CNC", "CNP", "CERN", "CF", "SCHW", "CHTR", "CVX", "CMG", "CB", "CHD", "CI", "CINF", "CTAS", "CSCO", "C", "CFG", "CTXS", "CLX", "CME", "CMS", "KO", "CTSH", "CL", "CMCSA", "CMA", "CAG", "COP", "ED", "STZ", "COO", "CPRT", "GLW", "CTVA", "COST", "CCI", "CSX", "CMI", "CVS", "DHI", "DHR", "DRI", "DVA", "DE", "DAL", "XRAY", "DVN", "DXCM", "FANG", "DLR", "DFS", "DISCA", "DISCK", "DISH", "DG", "DLTR", "D", "DPZ", "DOV", "DOW", "DTE", "DUK", "DRE", "DD", "DXC", "EMN", "ETN", "EBAY", "ECL", "EIX", "EW", "EA", "EMR", "ENPH", "ETR", "EOG", "EFX", "EQIX", "EQR", "ESS", "EL", "ETSY", "EVRG", "ES", "RE", "EXC", "EXPE", "EXPD", "EXR", "XOM", "FFIV", "FB", "FAST", "FRT", "FDX", "FIS", "FITB", "FE", "FRC", "FISV", "FLT", "FLIR", "FMC", "F", "FTNT", "FTV", "FBHS", "FOXA", "FOX", "BEN", "FCX", "GPS", "GRMN", "IT", "GNRC", "GD", "GE", "GIS", "GM", "GPC", "GILD", "GL", "GPN", "GS", "GWW", "HAL", "HBI", "HIG", "HAS", "HCA", "PEAK", "HSIC", "HSY", "HES", "HPE", "HLT", "HFC", "HOLX", "HD", "HON", "HRL", "HST", "HWM", "HPQ", "HUM", "HBAN", "HII", "IEX", "IDXX", "INFO", "ITW", "ILMN", "INCY", "IR", "INTC", "ICE", "IBM", "IP", "IPG", "IFF", "INTU", "ISRG", "IVZ", "IPGP", "IQV", "IRM", "JKHY", "J", "JBHT", "SJM", "JNJ", "JCI", "JPM", "JNPR", "KSU", "K", "KEY", "KEYS", "KMB", "KIM", "KMI", "KLAC", "KHC", "KR", "LB", "LHX", "LH", "LRCX", "LW", "LVS", "LEG", "LDOS", "LEN", "LLY", "LNC", "LIN", "LYV", "LKQ", "LMT", "L", "LOW", "LUMN", "LYB", "MTB", "MRO", "MPC", "MKTX", "MAR", "MMC", "MLM", "MAS", "MA", "MKC", "MXIM", "MCD", "MCK", "MDT", "MRK", "MET", "MTD", "MGM", "MCHP", "MU", "MSFT", "MAA", "MHK", "TAP", "MDLZ", "MPWR", "MNST", "MCO", "MS", "MOS", "MSI", "MSCI", "NDAQ", "NTAP", "NFLX", "NWL", "NEM", "NWSA", "NWS", "NEE", "NLSN", "NKE", "NI", "NSC", "NTRS", "NOC", "NLOK", "NCLH", "NOV", "NRG", "NUE", "NVDA", "NVR", "NXPI", "ORLY", "OXY", "ODFL", "OMC", "OKE", "ORCL", "OTIS", "PCAR", "PKG", "PH", "PAYX", "PAYC", "PYPL", "PENN", "PNR", "PBCT", "PEP", "PKI", "PRGO", "PFE", "PM", "PSX", "PNW", "PXD", "PNC", "POOL", "PPG", "PPL", "PFG", "PG", "PGR", "PLD", "PRU", "PEG", "PSA", "PHM", "PVH", "QRVO", "PWR", "QCOM", "DGX", "RL", "RJF", "RTX", "O", "REG", "REGN", "RF", "RSG", "RMD", "RHI", "ROK", "ROL", "ROP", "ROST", "RCL", "SPGI", "CRM", "SBAC", "SLB", "STX", "SEE", "SRE", "NOW", "SHW", "SPG", "SWKS", "SNA", "SO", "LUV", "SWK", "SBUX", "STT", "STE", "SYK", "SIVB", "SYF", "SNPS", "SYY", "TMUS", "TROW", "TTWO", "TPR", "TGT", "TEL", "TDY", "TFX", "TER", "TSLA", "TXN", "TXT", "TMO", "TJX", "TSCO", "TT", "TDG", "TRV", "TRMB", "TFC", "TWTR", "TYL", "TSN", "UDR", "ULTA", "USB", "UAA", "UA", "UNP", "UAL", "UNH", "UPS", "URI", "UHS", "UNM", "VLO", "VAR", "VTR", "VRSN", "VRSK", "VZ", "VRTX", "VFC", "VIAC", "VTRS", "V", "VNO", "VMC", "WRB", "WAB", "WMT", "WBA", "DIS", "WM", "WAT", "WEC", "WFC", "WELL", "WST", "WDC", "WU", "WRK", "WY", "WHR", "WMB", "WLTW", "WYNN", "XEL", "XLNX", "XYL", "YUM", "ZBRA", "ZBH", "ZION", "ZTS"]
financial_data = pd.DataFrame()
for t in tickers:
financial_data[t] = wb.DataReader(t, data_source='yahoo', start = '1995-1-1')["Adj Close"]
financial_data.to_excel("Financial Data.xlsx")
I am using Datareader to gather some stock info. I am grabbing a lot of info (from 1995 to 2021) and then I export it to Excel. I was wondering if there is a way, let's say tomorrow, to speed up the update of the information, instead of running the whole script on Python from start to bottom, since my goal tomorrow would just be to have a single new line on the whole Excel file. If I just execute the script, it will override the Excel file + add a new line of info. This seems pretty ineffective, and I was wondering if there's a way to "tell the script" I am just looking for tomorrow's info, instead of "telling it" to grab me again the information starting from 1995.
Thanks.
I don't know exactly how pandas works, but I would say it does lazy fast loading and this is not very computationally expensive. The costly thing is to operate with each loaded data. Then I think that in your case if the data is ordered by dates in increasing order, it would be enough to have a variable called timestamp_toStart initialized the first time to '1995-1-1' and that after this, after the first execution it is updated to the last value of the last date read. You could save this value in a file and reread it and load it every time you rerun the script.
financial_data = pd.DataFrame()
#load timestamp_toStart from the file here
for t in tickers:
financial_data[t] = wb.DataReader(t, data_source='yahoo', start = timestamp_toStart)["Adj Close"]
timestamp = wb.DataReader(t, data_source='yahoo', start = timestamp_toStart)["MMM"] #Not Sure about the correct syntax
timestamp_toStart = timestamp
#Save in a file timestamp_toStart
financial_data.to_excel("Financial Data.xlsx")

Why is my choropleth map in python empty?

So I am trying to learn to plot choropleth maps. I used a sample dataset but it comes out empty. Please could you look at it and tell me what is wrong because I have crosschecked the syntax and do not understand why it does not show my data. I have included the dataset head and the code I wrote as seen below:
datia = dict(type = 'choropleth',
locations = df['Names'],
locationmode = 'USA-states',
colorscale= 'Portland',
text= df['Names'],
z=df['2010-11'],
colorbar = {'title':'Vaccine Coverage in percentage'})
layout = dict(title = 'Vaccine Coverage in percentage',
geo = {'scope':'usa'})
choromap = go.Figure(data = [datia],layout = layout)
iplot(choromap, validate=False)
You need to give STATE Abbreviations(DC,AL,...) instead of Names.
I faced the same issue. It worked after changing that.
You can use the following code to convert names to abbreviations
def convert_state_name(name):
cd=str(name)
us_state_abbrev = {
"Alabama":"AL"
"Alaska":"AK"
"Arizona":"AZ"
"Arkansas":"AR"
"California":"CA"
"Colorado":"CO"
"Connecticut":"CT"
"Delaware":"DE"
"Washington DC":"DC"
"Florida":"FL"
"Georgia":"GA"
"Hawaii":"HI"
"Idaho":"ID"
"Illinois":"IL"
"Indiana":"IN"
"Iowa":"IA"
"Kansas":"KS"
"Kentucky":"KY"
"Louisiana":"LA"
"Maine":"ME"
"Maryland":"MD"
"Massachusetts":"MA"
"Michigan":"MI"
"Minnesota":"MN"
"Mississippi":"MS"
"Missouri":"MO"
"Montana":"MT"
"Nebraska":"NE"
"Nevada":"NV"
"New Hampshire":"NH"
"New Jersey":"NJ"
"New Mexico":"NM"
"New York":"NY"
"North Carolina":"NC"
"North Dakota":"ND"
"Ohio":"OH"
"Oklahoma":"OK"
"Oregon":"OR"
"Pennsylvania":"PA"
"Rhode Island":"RI"
"South Carolina":"SC"
"South Dakota":"SD"
"Tennessee":"TN"
"Texas":"TX"
"Utah":"UT"
"Vermont":"VT"
"Virginia":"VA"
"Washington":"WA"
"West Virginia":"WV"
"Wisconsin":"WI"
"Wyoming":"WY"
return us_state_abbrev[cd]
df['STATE_CD'] = df.Names.apply(lambda x:convert_state_name(x))
Finally you can replace locations=df['Names'] with locations=df['STATE_CD']
Hope this helps.

Categories

Resources