Related
I think it has to do something with my wb.DataReader. I've made sure the Ticker is correct and the time. Yahoo finance has "Adj Close" as "Adj Close**" and I've tried both with correct spelling and capitalization.
import locale
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
ticker = "BHIL"
data = pd.DataFrame()
data[ticker] = wb.DataReader(ticker, data_source = 'yahoo', start = '2021-2-17')['Adj Close']
#Plot
data.plot(figsize=(15,6))
log_return = np.log(1 + data.pct_change())
#Plot
sns.distplot(log_return.iloc[1:])
plt.xlabel("Daily Return")
plt.ylabel("Frequency")
u = log_return.mean()
var = log_return.var()
drift = u - (0.5*var)
stdev = log_return.std()
days = 50
trials = 10000
Z = norm.ppf(np.random.rand(days, trials)) #days, trials
daily_returns = np.exp(drift.values + stdev.values * Z)
price_paths = np.zeros_like(daily_returns)
price_paths[0] = data.iloc[-1]
for t in range(1, days):
price_paths[t] = price_paths[t-1]*daily_returns[t]
Traceback (most recent call last):
File "/Users/gknight/Desktop/Benson Hill CFA/Monte_Carlo.py", line 11, in <module>
data[ticker] = wb.DataReader(ticker, data_source = 'yahoo', start = '2022-10-10')['Adj Close**']
File "/Users/gknight/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py", line 207, in wrapper
return func(*args, **kwargs)
File "/Users/gknight/opt/anaconda3/lib/python3.9/site-packages/pandas_datareader/data.py", line 370, in DataReader
return YahooDailyReader(
File "/Users/gknight/opt/anaconda3/lib/python3.9/site-packages/pandas_datareader/base.py", line 253, in read
df = self._read_one_data(self.url, params=self._get_params(self.symbols))
File "/Users/gknight/opt/anaconda3/lib/python3.9/site-packages/pandas_datareader/yahoo/daily.py", line 153, in _read_one_data
data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
TypeError: string indices must be integers
(base) gknight#GK Benson Hill CFA %
You need to use y_finance and overide ```pdr_override()````. So chage you code to:
import locale
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
import yfinance as yf
yf.pdr_override()
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
ticker = "BHIL"
data = pd.DataFrame()
data[ticker] = pdr.get_data_yahoo(ticker, start = '2021-2-17')['Adj Close']
#Plot
data.plot(figsize=(15,6))
log_return = np.log(1 + data.pct_change())
#Plot
sns.distplot(log_return.iloc[1:])
plt.xlabel("Daily Return")
plt.ylabel("Frequency")
u = log_return.mean()
var = log_return.var()
drift = u - (0.5*var)
stdev = log_return.std()
days = 50
trials = 10000
Z = norm.ppf(np.random.rand(days, trials)) #days, trials
daily_returns = np.exp(drift.values + stdev.values * Z)
price_paths = np.zeros_like(daily_returns)
price_paths[0] = data.iloc[-1]
for t in range(1, days):
price_paths[t] = price_paths[t-1]*daily_returns[t]
Note that ````distplot is a deprecated function and will be removed in seaborn v0.14.0. so update your version of seaborn, or change the distplot to sns.histplot(log_return.iloc[1:])
to get
I have been mainly working in VS code to create a bokeh dashboard and I now need to get it to run within a Jupyter notebook. I know that some transformations in the code are required to push the code to a Jupyter notebook and for it to update interactively with widgets.
I have referred to this documentation:-
https://docs.bokeh.org/en/latest/docs/user_guide/jupyter.html#userguide-jupyter-notebook
But it is either too simplistic for my code, or that I have not used the push_notebook commands properly (or both).
Here is the code that I am trying to run in the notebook:-
################################### Code chunk 1##########################
from ipywidgets import interact
import pandas as pd
import numpy as np
import math
from bokeh.models import HoverTool
from bokeh.io import curdoc, output_notebook, push_notebook
from bokeh.plotting import figure, ColumnDataSource
from bokeh.layouts import layout, row, column, gridplot
from bokeh.models.widgets import RangeSlider
#https://discourse.bokeh.org/t/interactive-histograms-not-updating-with-sliders/3779/25
#clustering packages
from operator import index
from bokeh.models.widgets.markups import Div
import numpy as np
from numpy.lib import source
import pandas as pd
from bokeh.io import curdoc
from bokeh.layouts import column, row, gridplot, Column, Row
from bokeh.models import ColumnDataSource, Select, Slider, BoxSelectTool, LassoSelectTool, Tabs, Panel, LinearColorMapper, ColorBar, BasicTicker, PrintfTickFormatter, MultiSelect, DataTable, TableColumn
from bokeh.plotting import figure, curdoc, show
from bokeh.palettes import viridis, gray, cividis, Category20
from bokeh.transform import factor_cmap
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, r2_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.decomposition import PCA
#tables
from time import time
output_notebook()
################################# Code chunk 2 ###########################################
#define the categorical variable
category_a = ['A','B','C']
category_b = ['X','Y','Z']
print("step 2")
df = pd.DataFrame({
'id': np.arange(0, 100),
'date': pd.date_range(start='1/1/2021', periods=100, freq='D'),
'month':np.random.randint(1, 12, 100),
'sensor_1': np.random.uniform(0, 1,100),
'sensor_2': np.random.uniform(10, 150, 100),
'sensor_3': np.random.randint(0, 90, 100),
'sensor_4': np.random.randint(0, 450, 100),
'sensor_5': np.random.randint(0, 352, 100),
'categorya': np.random.choice(category_a, 100, p=[0.2, 0.4, 0.4]),
'categoryb': np.random.choice(category_b, 100, p=[0.6, 0.2, 0.2]),
})
source = ColumnDataSource(data=df)
################################### Code chunk 3##############################
class hist_data:
def __init__(self, df, col, n_bins, bin_range):
self.sensor1_lwr = min(df['sensor_1'])#duration millisecond
self.sensor1_upr = max(df['sensor_1'])#duration millisecond
self.sensor2_lwr = min(df['sensor_2'])#count watch
self.sensor2_upr = max(df['sensor_2'])#count_watch
self.sensor3_lwr = min(df['sensor_3'])#count idle
self.sensor3_upr = max(df['sensor_3'])#count idle
self.sensor4_lwr = min(df['sensor_4'])#count inter and watch
self.sensor4_upr = max(df['sensor_4'])#count inter and watch
self.sensor5_lwr = min(df['sensor_5'])#count inter
self.sensor5_upr = max(df['sensor_5'])#count inter
self.col = col
self.n_bins = n_bins
self.bin_range = bin_range
self.original_df = df
self.source = ColumnDataSource(self.create_hist_data(df))
def filt_df(self):
filt = (pd.DataFrame(self.original_df[(self.original_df.sensor_1 >=self.sensor1_lwr) &
(self.original_df.sensor_1 <= self.sensor1_upr) &
(self.original_df.sensor_2 >= self.sensor2_lwr) &
(self.original_df.sensor_2 <= self.sensor2_upr) &
(self.original_df.sensor_3 >= self.sensor3_lwr) &
(self.original_df.sensor_3 <= self.sensor3_upr) &
(self.original_df.sensor_4 >= self.sensor4_lwr) &
(self.original_df.sensor_4 <= self.sensor4_upr) &
(self.original_df.sensor_5 >= self.sensor5_lwr) &
(self.original_df.sensor_5 <= self.sensor5_upr)]))
print(f'{self.sensor1_lwr} {self.sensor1_upr} {self.sensor2_lwr} {self.sensor2_upr} {self.sensor3_lwr} {self.sensor3_upr} {self.sensor4_lwr} {self.sensor4_upr} {self.sensor5_lwr} {self.sensor5_upr}')
filt.shape
return ColumnDataSource(self.create_hist_data(filt))
def create_hist_data(self,df):
arr_hist, edges = np.histogram(df[self.col],bins=self.n_bins, range=self.bin_range)
arr_df = pd.DataFrame({'count': arr_hist, 'left': edges[:-1], 'right': edges[1:]})
arr_df['f_count'] = ['%d' % count for count in arr_df['count']]
arr_df['f_interval'] = ['%d to %d ' % (left, right) for left, right in zip(arr_df['left'], arr_df['right'])]
return (arr_df)
df = df
########################histograms
hist_data_A = hist_data(df,'sensor_1',math.floor(math.sqrt(df['sensor_1'].nunique())),[min(df['sensor_1']),max(df['sensor_1'])])
hist_data_B = hist_data(df,'sensor_2',math.floor(math.sqrt(df['sensor_2'].nunique())),[min(df['sensor_2']),max(df['sensor_2'])])
hist_data_C = hist_data(df,'sensor_3',math.floor(math.sqrt(df['sensor_3'].nunique())),[min(df['sensor_3']),max(df['sensor_3'])])
hist_data_D = hist_data(df,'sensor_4',math.floor(math.sqrt(df['sensor_4'].nunique())),[min(df['sensor_4']),max(df['sensor_4'])])
hist_data_E = hist_data(df,'sensor_5',math.floor(math.sqrt(df['sensor_5'].nunique())),[min(df['sensor_5']),max(df['sensor_5'])])
############################slider
A_Slider= RangeSlider(start=min(df['sensor_1']), end=max(df['sensor_1']), value=(min(df['sensor_1']),max(df['sensor_1'])), step=1, title='sensor_1')
B_Slider = RangeSlider(start=min(df['sensor_2']), end=max(df['sensor_2']), value=(min(df['sensor_2']),max(df['sensor_2'])), step=1, title='sensor_2')
C_Slider = RangeSlider(start=min(df['sensor_3']), end=max(df['sensor_3']), value=(min(df['sensor_3']),max(df['sensor_3'])), step=1, title='sensor_3')
D_Slider = RangeSlider(start=min(df['sensor_4']), end=max(df['sensor_4']), value=(min(df['sensor_4']),max(df['sensor_4'])), step=1, title='sensor_4')
E_Slider = RangeSlider(start=min(df['sensor_5']), end=max(df['sensor_5']), value=(min(df['sensor_5']),max(df['sensor_5'])), step=1, title='sensor_5')
def callback_A(attr,new,old):
hist_data_A.sensor1_lwr = new[0]
hist_data_A.sensor1_upr = new[1]
hist_data_A.source = hist_data_A.filt_df()
Graphs1.children[0] = plot_data_A()
push_notebook(handle=grid)
def callback_B(attr,new,old):
hist_data_B.sensor2_lwr = new[0]
hist_data_B.sensor2_upr = new[1]
hist_data_B.source = hist_data_B.filt_df()
Graphs1.children[1] = plot_data_B()
push_notebook(handle=grid)
def callback_C(attr,new,old):
hist_data_C.sensor3_lwr = new[0]
hist_data_C.sensor3_upr = new[1]
hist_data_C.source = hist_data_C.filt_df()
Graphs1.children[2] = plot_data_C()
push_notebook(handle=grid)
def callback_D(attr,new,old):
hist_data_D.sensor4_lwr = new[0]
hist_data_D.sensor4_upr = new[1]
hist_data_D.source = hist_data_D.filt_df()
Graphs2.children[0] = plot_data_D()
push_notebook(handle=grid)
def callback_E(attr,new,old):
hist_data_E.sensor5_lwr = new[0]
hist_data_E.sensor5_upr = new[1]
hist_data_E.source = hist_data_E.filt_df()
Graphs2.children[1] = plot_data_E()
push_notebook(handle=grid)
A_Slider.on_change("value",callback_A)
B_Slider.on_change("value",callback_B)
C_Slider.on_change("value",callback_C)
D_Slider.on_change("value",callback_D)
E_Slider.on_change("value",callback_E)
(df,'sensor_1',df['sensor_1'].nunique(),[min(df['sensor_1']),max(df['sensor_1'])])
(df,'sensor_2',df['sensor_2'].nunique(),[min(df['sensor_2']),max(df['sensor_2'])])
(df,'sensor_3',df['sensor_3'].nunique(),[min(df['sensor_3']),max(df['sensor_3'])])
(df,'sensor_4',df['sensor_4'].nunique(),[min(df['sensor_4']),max(df['sensor_4'])])
(df,'sensor_5',df['sensor_5'].nunique(),[min(df['sensor_5']),max(df['sensor_5'])])
# Histogram
def interactive_histogram( hist_data, title,x_axis_label,x_tooltip):
source = hist_data
# Set up the figure same as before
toollist = ['lasso_select', 'tap', 'reset', 'save','crosshair','wheel_zoom','pan','hover','box_select']
p = figure(plot_width = 400,
plot_height = 400,
title = title,
x_axis_label = x_axis_label,
y_axis_label = 'Count',tools=toollist)
# Add a quad glyph with source this time
p.quad(bottom=0,
top='count',
left='left',
right='right',
source=source,
fill_color='red',
hover_fill_alpha=0.7,
hover_fill_color='blue',
line_color='black')
# Add style to the plot
p.title.align = 'center'
p.title.text_font_size = '18pt'
p.xaxis.axis_label_text_font_size = '12pt'
p.xaxis.major_label_text_font_size = '12pt'
p.yaxis.axis_label_text_font_size = '12pt'
p.yaxis.major_label_text_font_size = '12pt'
# Add a hover tool referring to the formatted columns
hover = HoverTool(tooltips = [(x_tooltip, '#f_interval'),
('Count', '#f_count')])
# Add the hover tool to the graph
p.add_tools(hover)
return p
push_notebook(handle=grid)
#binsize = 10
binzise = 100
def plot_data_A():
A_hist = interactive_histogram(hist_data_A.source, 'sensor_1','sensor_1','sensor_1')
return A_hist
def plot_data_B():
B_hist = interactive_histogram(hist_data_B.source, 'sensor_2','sensor_2','sensor_2')
return B_hist
#
def plot_data_C():
C_hist = interactive_histogram(hist_data_C.source, 'sensor_3','sensor_3','sensor_3')
return C_hist
#
def plot_data_D():
D_hist = interactive_histogram(hist_data_D.source, 'sensor_4','sensor_4','sensor_4')
return D_hist
#
def plot_data_E():
E_hist = interactive_histogram(hist_data_E.source, 'sensor_5','sensor_5','sensor_5')
return E_hist
#
Graphs1 = row([plot_data_A(), plot_data_B(), plot_data_C()])
Graphs2 = row([plot_data_D(), plot_data_E()])
Controls1= column([A_Slider,B_Slider,C_Slider,D_Slider,E_Slider])
#grid = gridplot([[Graphs1],
# [Controls1]])
grid = gridplot([[Controls1,Graphs1],[None,Graphs2]])
show(grid)
Now, it brings up the plots:-
But the widgets do not update the plots. Can someone kindly show me what I am missing?
I want to mosaic/merge multiple swaths of the MODIS dataset (MOD06_L2) using python. I used the example (http://hdfeos.org/zoo/MORE/LAADS/MOD/MOD04_L2_merge.py) to read multiple files and merge. But I am getting an error while doing so, how to correct this error?
I would like to know is there any better way than this, to merge/mosaic MODIS HDF files into one?
import os
import glob
import matplotlib as mpl
import matplotlib.pyplot as plt
# import cartopy.crs as ccrs
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np
# The first file in 3 swath files.
FILE_NAME = 'MOD06_L2.A2017126.0655.061.2019226193408.hdf'
GEO_FILE_NAME ='MOD06_L2.A2017126.0655.061.2019226193408.hdf'
DATAFIELD_NAME = 'Brightness_Temperature'
from pyhdf.SD import SD, SDC
i = 0
for file in list(glob.glob('MOD06*.hdf')):
reader = open(file)
hdf = SD(file, SDC.READ)
# Read dataset.
data2D = hdf.select(DATAFIELD_NAME)
data = data2D[:,:].astype(np.double)
hdf_geo = SD(GEO_FILE_NAME, SDC.READ)
# Read geolocation dataset.
lat = hdf_geo.select('Latitude')
latitude = lat[:,:]
lon = hdf_geo.select('Longitude')
longitude = lon[:,:]
# Retrieve attributes.
attrs = data2D.attributes(full=1)
lna=attrs["long_name"]
long_name = lna[0]
aoa=attrs["add_offset"]
add_offset = aoa[0]
fva=attrs["_FillValue"]
_FillValue = fva[0]
sfa=attrs["scale_factor"]
scale_factor = sfa[0]
vra=attrs["valid_range"]
valid_min = vra[0][0]
valid_max = vra[0][1]
ua=attrs["units"]
units = ua[0]
invalid = np.logical_or(data > valid_max,data < valid_min)
invalid = np.logical_or(invalid, data == _FillValue)
data[invalid] = np.nan
data = (data - add_offset) * scale_factor
datam = np.ma.masked_array(data, np.isnan(data))
if i == 0 :
data_m = datam
latitude_m = latitude
longitude_m = longitude
else:
data_m = np.vstack([data_m, datam])
latitude_m = np.vstack([latitude_m, latitude])
longitude_m = np.vstack([longitude_m, longitude])
i = i + 1
m = Basemap(projection='cyl', resolution='l',
llcrnrlat=-90, urcrnrlat=90,
llcrnrlon=-180, urcrnrlon=180)
m.drawcoastlines(linewidth=0.5)
m.drawparallels(np.arange(-90, 91, 45))
m.drawmeridians(np.arange(-180, 180, 45), labels=[True,False,False,True])
sc = m.scatter(longitude_m, latitude_m, c=data_m, s=0.1, cmap=plt.cm.jet,
edgecolors=None, linewidth=0)
cb = m.colorbar()
cb.set_label(units)
# Put title using the first file.
basename = os.path.basename(FILE_NAME)
plt.title('{0}\n{1}'.format(basename, DATAFIELD_NAME))
fig = plt.gcf()
# Save image.
pngfile = "{0}.py.png".format(basename)
fig.savefig(pngfile)
It showing an error
ValueError: 'c' argument has 4604040 elements, which is inconsistent with 'x' and 'y' with size 657720.
I am trying to generate an animation for a large data with a dynamic grid (ocean waves). I have managed to write a script that is functional but it is time and resource consuming. I was hoping if anyone could see what i can improve in my code to help speed it up.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import animation as anim
import xarray as xr
import PySimpleGUI as sg
import sys
#file importing mechanism
jan = xr.open_dataset('Model/Bar_mig/xboutput_equi.nc')
########################## labeling the variables in the data-set ##############
nx = jan.variables['globalx']
globaltime = jan.variables['globaltime']
zb = jan.variables['zb'][:,0,:]
zs = jan.variables['zs'][:,0,:]
ccz = jan.variables['ccz'][:,:,0,:]
uz = jan.variables['uz'][:,:,0,:]
nz = np.array(range(0,100))
nx = (np.array(range(0,nx.size)))
globaltime_ar = np.array(globaltime)
conc = np.vstack(globaltime_ar)
newdf = pd.DataFrame(conc)
itr = len(newdf.index)
uz1=np.flip(uz,0)
uz2=np.flip(np.flip(uz1,1),axis=0)
depth1 = (zb)
a = np.array(depth1)
b = pd.DataFrame(a)
depth = b.dropna(axis=1, how='all')
zba1 = (np.array(zb))
zsa1 = (np.array(-zs))
zba = pd.DataFrame(zba1)
zsa = pd.DataFrame(zsa1)
This is how i am setting up the dynamic grid. ( example of the the output)
#dynamic grid
for w in range(0,itr):
AA=[]
sizer = depth.iloc[w,]
sizer1 = sizer.dropna(axis=0, how='all')
for j in range(0,sizer1.size):
maxi = -zsa.iloc[w,j]
mini = depth.iloc[w,j]
step = mini/nz.shape[-1]
globals()['col_{}'.format(j)] = pd.DataFrame(np.linspace(maxi,mini,nz.shape[-1],endpoint=True))
globals()['col_{}'.format(j)] = globals()['col_{}'.format(j)].reset_index(drop=True)
AA.append(globals()['col_{}'.format(j)])
globals()['df_{}'.format(w)] = pd.concat(AA, axis=1).iloc[:nz.size]
globals()['df_{}'.format(w)].columns = range(globals()['df_{}'.format(w)].shape[1])
AA.clear()
sg.OneLineProgressMeter('My meter title', w, itr-1, 'key')
from matplotlib import animation as anim
fig = plt.figure(figsize=(15,7.5)) # Create a dummy figure
ax = plt.axes() # Set the axis rigid
mywriter = anim.FFMpegWriter()
scale=1
def animate(w):
w = w*scale
plt.clf()
plt.title(str(w) + 'hr')
y = globals()['df_{}'.format(w)]
x = np.array([nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx,nx])
data2 = np.flip(ccz[w*1,:,:],0)
cont = plt.pcolor(x,y,np.array(data2), cmap = 'jet',
vmin = 0, vmax = 0.03
)
plt.colorbar(label='Concentration Profile ($m^3/m^3$)')
plt.fill_between(nx,min(zb[0])-1,zb[w],color = 'yellow')
point = 350
plt.xlim(point,nx.shape[-1])
plt.ylim(min(zb[0,point:point+1]),max(zb[0,point:]))
plt.xlabel('Cross shore distance (m)')
plt.ylabel('Depth (m)')
fig.tight_layout()
return cont,
ani = anim.FuncAnimation(fig, animate, interval = 1, frames=itr)
ani.save('Sed_Con.mp4', writer=mywriter)
I am new to python and have created this tiny class "myclass" which is inside a module called linear_regression_example.py. It prints out a regression summary and a density plot:
import statsmodels.api as sm
import sklearn.datasets as skld
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
class myclass:
def __init__(self, result=1):
self.result = result
def myregression(self):
y_X = skld.load_boston()
y = y_X['target']
X = y_X['data']
n = y_X['feature_names']
y = pd.DataFrame(y)
X = pd.DataFrame(X, columns=n)
X = sm.add_constant(X)
mod = sm.OLS(y, X)
result = mod.fit()
if self.result == 1:
print(result.summary())
pred = mod.predict(result.params)
pred = pd.DataFrame(pred)
errors = y - pred
sns.distplot(errors)
plt.show()
I also have another file, called test.py:
import linear_regression_example as lre
test = lre.myclass()
test.myregression()
Running test.py in pycharm results in the output "Process finished with exit code 0" but no summary or plot is shown. Maybe someone here knows where the problem lies.
Best regards
Dominik
linear_regression_example.py
import statsmodels.api as sm
import sklearn.datasets as skld
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
class myclass:
def __init__(self, result=1):
self.result = result
def myregression(self):
y_X = skld.load_boston()
y = y_X['target']
X = y_X['data']
n = y_X['feature_names']
y = pd.DataFrame(y)
X = pd.DataFrame(X, columns=n)
X = sm.add_constant(X)
mod = sm.OLS(y, X)
result = mod.fit()
if self.result == 1:
print(result.summary())
pred = mod.predict(result.params)
pred = pd.DataFrame(pred)
errors = y - pred
sns.distplot(errors)
plt.show()
if __name__ == '__main__':
test = myclass()
test.myregression()
test.py
import linear_regression_example as lre
test = lre.myclass()
test.myregression()
OUTPUT (from test.py)