How to plot efficient frontier with pypfopt plotting - python

Hey guysm someone could help me?
I'm new here and in python codying to, so i'm beginner level.
I'm was trying plot my efficient frontier using pypfopt lib and i'm got some troubles with the parameters in Plotting.plot_efficient_frontier command that is used to plot the graph.
I added the picture of my code in the post
#importando as libs
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import datetime as dt
import matplotlib as plt
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
#definindo o período de análise
start = dt.datetime(2019,1,1)
end = dt.datetime(2019,12,31)
#asset tickers
tickers = ["ITUB4.SA","B3SA3.SA","PETR4.SA","ABEV3.SA","ELET3.SA"]
#getting the data
carteira = web.DataReader(tickers,"yahoo",start,end)
#craindo outro dataframe só com os proços de feachamento ajustados
prices = carteira['Adj Close']
#expected returns
#covariance matrix
e_r = mean_historical_return(prices)
cov_matrix = CovarianceShrinkage(prices).ledoit_wolf()
from pypfopt.efficient_frontier import EfficientFrontier
#border problem solution
#selectiong portfolio with better sharpe ratio
ef = EfficientFrontier(e_r, cov_matrix)
weights = ef.max_sharpe()
from pypfopt import Plotting
Plotting.plot_efficient_frontier(ef ,points=100 , show_assets = True)
[n ][1]
[1]: https://i.stack.imgur.com/NaeJI.png

try it in lowercase() like:
from pypfopt import plotting

Related

How to plot scatterplot using matplotlib from arrays (using strings)? Python

I have been trying to plot a 3D scatterplot from a pandas array (I have tried to convert the data over to numpy arrays and strings to put into the system). However, the error ValueError: s must be a scalar, or float array-like with the same size as x and y keeps popping up. My data for Patient ID is in the format of EMR-001, EMR-002 etc after blanking it out. My data for Discharge Date is converted to become a string of numbers like 20200120. My data for Diagnosis Code is a mix of characters like 001 or 10B.
I have also tried to look online at some of the other examples but have not been able to identify any areas. Could I seek your advice for anything I missed out or code I can input?
I'm using Python 3.9, UTF-8. Thank you in advanced!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#importing csv data via pd
A = pd.read_csv('input.csv') #import file for current master list
Diagnosis_Des = A["Diagnosis Code"]
Discharge_Date = A["Discharge Date"]
Patient_ID = A["Patient ID"]
B = Diagnosis_Des.to_numpy()
#B1 = np.array2string(B)
#print(B.shape)
C = Discharge_Date.to_numpy() #need to change to data format
#C1 = np.array2string(C)
#print(C1)
D = Patient_ID.to_numpy()
#D1 = np.array2string(D)
#print(D.shape)
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
sequence_containing_x_vals = D
sequence_containing_y_vals = B
print(type(sequence_containing_y_vals))
sequence_containing_z_vals = C
print(type(sequence_containing_z_vals))
plt.scatter(sequence_containing_x_vals, sequence_containing_y_vals, sequence_containing_z_vals)
pyplot.show()

'numpy.timedelta64' object is not iterable

import numpy as np
base=dsloc.time.values
time=np.array([base+np.timedelta64(step) for step in dsloc.step.values])
I was trying to use timeseries which is https://github.com/enyfeo/efas/blob/master/work/5_Timeseries.ipynb
I got the following error in the lines I specified; TypeError: 'numpy.timedelta64' object is not iterable
Can you help me? thanks...
Edit;
import pandas as pd
import xarray as xr
import numpy as np
from random import sample
#%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plot
pd.plotting.register_matplotlib_converters()
stations = pd.read_excel('C:/Users/90531/Desktop/Lisflood/KONYA_LONG_LAT_4digit.xlsx')
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['stname'] == 300 ] # We have chosen a station for consistency
station
Thats works for me:
import pandas as pd
from random import sample
stations = pd.read_excel('KONYA_LONG_LAT_4digit.xlsx')
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['stname'] == 300 ] # We have chosen a station for consistency
import xarray as xr
ds = xr.open_dataset('snow.nc')
# extract data for selected point in netcdf file by LISFLOOD coordinates
dsloc = ds.sel(x=station.lat.values,y=station.long.values,method='nearest')
import numpy as np
base=dsloc.time.values
time=np.array([base+np.timedelta64(step) for step in dsloc.step.values])
print(time)
output:
['2019-04-01T00:00:00.000000000' '2019-04-01T06:00:00.000000000'
'2019-04-01T12:00:00.000000000' '2019-04-01T18:00:00.000000000'
'2019-04-02T00:00:00.000000000' '2019-04-02T06:00:00.000000000'
'2019-04-02T12:00:00.000000000' '2019-04-02T18:00:00.000000000'
'2019-04-03T00:00:00.000000000' '2019-04-03T06:00:00.000000000'
'2019-04-03T12:00:00.000000000' '2019-04-03T18:00:00.000000000'
'2019-04-04T00:00:00.000000000' '2019-04-04T06:00:00.000000000'
'2019-04-04T12:00:00.000000000' '2019-04-04T18:00:00.000000000'
'2019-04-05T00:00:00.000000000' '2019-04-05T06:00:00.000000000'
'2019-04-05T12:00:00.000000000' '2019-04-05T18:00:00.000000000'
'2019-04-06T00:00:00.000000000' '2019-04-06T06:00:00.000000000'
'2019-04-06T12:00:00.000000000' '2019-04-06T18:00:00.000000000'
'2019-04-07T00:00:00.000000000' '2019-04-07T06:00:00.000000000'
'2019-04-07T12:00:00.000000000' '2019-04-07T18:00:00.000000000'
'2019-04-08T00:00:00.000000000' '2019-04-08T06:00:00.000000000'
'2019-04-08T12:00:00.000000000' '2019-04-08T18:00:00.000000000'
'2019-04-09T00:00:00.000000000' '2019-04-09T06:00:00.000000000'
'2019-04-09T12:00:00.000000000' '2019-04-09T18:00:00.000000000'
'2019-04-10T00:00:00.000000000' '2019-04-10T06:00:00.000000000'
'2019-04-10T12:00:00.000000000' '2019-04-10T18:00:00.000000000'
'2019-04-11T00:00:00.000000000']
Then, where its problem¿? On your nc havent dsloc.step.values, its vector, then u ve to iterate vector
How to fix? just look type of steps, try now:
import pandas as pd
from random import sample
stations = pd.read_excel('KONYA_LONG_LAT_4digit.xlsx')
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['stname'] == 300 ] # We have chosen a station for consistency
import xarray as xr
#ds = xr.open_dataset('snow.nc')
ds = xr.open_dataset('adaptor.efas_mars.external-1615983508.657324-23066-19-648c63b0-a6b0-4568-8970-d0f966ff16a2.nc')
# extract data for selected point in netcdf file by LISFLOOD coordinates
dsloc = ds.sel(x=station.lat.values,y=station.long.values,method='nearest')
import numpy as np
base=dsloc.time.values
steps = dsloc.step.values
if type(steps) == np.timedelta64:
time=np.array([base+np.timedelta64(steps)])
else:
time=np.array([base+np.timedelta64(step) for step in steps])
print(time)

How to know the order of eigen values in PCA

I performed a Pca analysis in python. And, I got the eigenvalues for the analysis, but I don't know what variables of my dataset are represented in the components.
There are a way to know which components represent each variable of my data:
for example: 4.669473069609005 corresponds to sillas, etc...
here is the file:
https://storage.googleapis.com/min_ambiente/servi_acc/datos.csv
here is the code:
# I have libraries es for some other methods I Implemented here.
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from google.cloud import bigquery
from sklearn.preprocessing import StandardScaler
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
from factor_analyzer.factor_analyzer import calculate_kmo
from factor_analyzer import FactorAnalyzer
%matplotlib inline
#load csv
from google.colab import files
uploaded = files.upload()
data = pd.read_csv("datos.csv")
data.fillna(0, inplace=True)
a,b = data.shape
X= data.iloc[:,0:b-1]
X.head()
enter image description here
#####################################################
###Estandarizar y realizar la matriz de covarianza###
#####################################################
#Standardize features by removing the mean and scaling to unit variance
#used for generating learning model parameters from training data and
#generate transformed data set
X_std = StandardScaler().fit_transform(X)
mean_vec = np.mean(X_std, axis=0)
cov_mat = (X_std - mean_vec).T.dot((X_std - mean_vec)) / (X_std.shape[0]-1)
###Valores y vectores propios obtenidos de la matriz covarianza
cov_mat = np.cov(X_std.T)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
dictionary = dict(zip(lst2, lst1))
print(dictionary)
###print from the highest to the lowest
eig_pairs.sort()
eig_pairs.reverse()
print('eigenvalues in descending order :')
for i in eig_pairs:
print(i[0])

How to Identify p (lag order) for ARIMA Model in Python

here is my auto correlation plot.
Generated by the following python code.
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.arima_model import ARIMA
def display_corelation(series):
autocorrelation_plot(series)
plt.show()
I know i can pass 1 or 2 in p by looking into the plot for the ARIMA Model.
My question is how i can generate the p value,
how to calculate the lag order from some series by pandas or any library instead of plotting currently at the moment ?
model = ARIMA(history, order=(1,1,0))
Got it by following code:
import pandas as pd
k=0
highestCorr = 0
for i in range(1,10):
cor = pd.Series.autocorr(series, lag=i)
if(cor > highestCorr):
highestCorr = cor
k=i

Inference from PCA (Using Python) on Oil Stock Returns

I was interested in finding out if there are a few common factors that have been driving the returns in individual stocks in the U.S. Oil sector, sourcing the daily closing prices from Yahoo!. Below is the (fairly simple) Python code I used, with the final result (the graph) presented all the way down below showing the top 2 PCs (as they seem to explain most of the variance). Problem is I can't make heads or tails of the plot. Not sure what can be inferred at all from this! Anyone willing to take a stab and/or point out the ugliness in my implementation? :-)
Sincerely.
import pandas as pd
from pandas_datareader import data as web
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statistics as st
%matplotlib inline
symbols=['XOM','CVX','SLB','PXD','EOG','OXY','HAL','KMI','SE','PSX','VLO','COP','APC','TSO','WMB','BHI','APA','COG','DVN','MPC','NBL','CXO','NOV','HES','MRO','EQT','XEC','FTI','RRC','OKE','SWN','NFX','HP','MUR','CHK','RIG','DO']
try:
h13 = pd.HDFStore('port.h13')
data = h13['norm']
h13.close()
except:
data = pd.DataFrame()
for sym in symbols:
data[sym] = web.DataReader(sym, data_source='yahoo', start='01/01/2016')['Adj Close']
data = data.dropna()
h13 = pd.HDFStore('port.h13')
h13['norm'] = data
h13.close()
(data / data.ix[0] * 100).plot(figsize=(8,6), grid = True)
log_returns = np.log(data / data.shift(1))
log_returns.hist(bins=50, figsize=(9,6))
lr = log_returns.iloc[1:]
sigma = np.cov(lr, rowvar = False)
lr_list = lr.T.values
myu_list=[sum(elem)/len(elem) for elem in zip(*lr_list.T)]
std_list=[st.stdev(elem) for elem in zip(*lr_list.T)]
lr_norm = (lr.as_matrix() - myu_list)/std_list
lr_cov = np.cov(lr_norm, rowvar = False, ddof = 0)
eig_val, eig_vec = np.linalg.eig(lr_cov)
plt.plot(eig_vec[0:3].dot(lr_norm.T).T)

Categories

Resources