I have this code for nsga3(evolutionary algorithm) but I get the error 'numpy.ndarray' object has no attribute 'fitness'.Generates reference points for NSGA-III selection. This code is based onjMetal NSGA-III implementation <https://github.com/jMetal/jMetal>_. Please help to remove this error
import copy
import random
import numpy as np
from deap import tools
class ReferencePoint(list): # A reference point exists in objective space an has a set of individuals associated with it
def __init__(self, *args):
list.__init__(self, *args)
self.associations_count = 0
self.associations = []
def generate_reference_points(num_objs, num_divisions_per_obj):
def gen_refs_recursive(work_point, num_objs, left, total, depth):
if depth == num_objs - 1:
work_point[depth] = left/total
ref = ReferencePoint(copy.deepcopy(work_point))
return [ref]
else:
res = []
for i in range(left):
work_point[depth] = i/total
res = res + gen_refs_recursive(work_point, num_objs, left-i, total, depth+1)
return res
print(gen_refs_recursive([0]*num_objs, num_objs, num_objs*num_divisions_per_obj,
num_objs*num_divisions_per_obj, 0))
def find_ideal_point(individuals):
'Finds the ideal point from a set individuals.'
current_ideal = [np.infty] * len(individuals[0].fitness.values) # Here th error is coming
for ind in individuals:
# Use wvalues to accomodate for maximization and minimization problems.
current_ideal = np.minimum(current_ideal,
np.multiply(ind.fitness.wvalues, -1))
print("Ideal POint is\n",current_ideal)
global individulas
individulas=np.random.rand(10,4)
generate_reference_points(2, 4)
find_ideal_point(individulas)
You can check how to prepare an input to find_ideal_point in this jupyter notebook. The implementation deals with records from deap.tools.Logbook which is "evolution records as a chronological list of dictionaries" not NumPy arrays.
Related
I tried to check the length of my training data to train the model but I got this error. I am implementing this in PyTorch. I have 3 main functions. dataset, extract beat and extract signal. can someone help to fix this issue, please?
This is my dataset class
class MyDataset(Dataset):
def __init__(self, patient_ids,bih2aami=True):#This method runs once when we call this class, and we pass the data or its references here with the label data.
self.patient_ids = patient_ids # list of patients ID
self.directory="C:\\Users\\User\\Downloads\\list\mit-bih-arrhythmia-database-1.0.0\\" # path
self.nb_qrs = 99 #number of beats extracted for each patient, found that each recording had at least 99 normal beats
self.idx_tuples = flatten([[(patient_idx, rpeak_idx) for rpeak_idx in range(self.nb_qrs)]
for patient_idx in range(len(patient_ids))])
self.bih2aami=bih2aami
#if bih2aami==True:
# self.y = self.bih2aami(self.y)
def __len__(self):#returns the size of the data set.
return len(self.idx_tuples) # length of the dataset
def __getitem__(self, idx): # get one sample from the dataset
patient_idx, rpeak_idx = self.idx_tuples[idx]
patient_id = self.patient_ids[patient_idx]
file = self.directory + patient_id
signal, normal_qrs_pos = get_signal(file)
qrs_pos = normal_qrs_pos[rpeak_idx]
beat, label = extract_beat(signal, qrs_pos)
#sample = {'signal': torch.tensor(beat).float(),
# 'label': torch.tensor(label).float()}
print(patient_id, patient_idx, beat.shape,label.shape) # bug : what if label null ??
X, y = torch.tensor(beat).float(), torch.tensor(label).float()
return X,y
Get signal function
def get_signal(file):
record = wfdb.rdrecord(file, channels=[0])
df = pd.DataFrame(record.p_signal, columns=record.sig_name)
lead = df.columns[0]
signal = df[lead] #getting the 1D signal
annotation = wfdb.rdann(file, 'atr') #getting the annotation
relabeled_ann = bih2lamedo(annotation.symbol)
annotations = pd.DataFrame(relabeled_ann,annotation.sample)
normal_qrs_pos = list(annotations[annotations[0]=='N'].index) #normal beats
#normal_qrs_pos = list(annotations[annotations[0]!='O'].index) #beats
#normal_qrs_pos = list(annotations.index) #normal beats
return signal, normal_qrs_pos
Get beat function
def extract_beat(signal, win_pos, qrs_positions, win_msec=40, fs=360, start_beat=36, end_beat=108):
"""
win_pos position at which you place the window of your beat
qrs_positions (list) the qrs indices from the annotations (read them from the atr file)-->obtained from annotation.sample
win_msec in milliseconds
"""
#extract signal
signal = np.array(signal)
#print(signal.shape)
#beat_array = np.zeros(start_beat+end_beat)#number of channels
start = int(max(win_pos-start_beat,0))
stop=start+start_beat+end_beat
#print(beat_array.shape,signal.shape)
beat = signal[start:stop]
#compute the nearest neighbor of win_pos among qrs_positions
tolerance = fs*win_msec//1000 #samples at a distance <tolrance are matched
nbr = NearestNeighbors(n_neighbors=1).fit(qrs_positions)
distances, indices = nbr.kneighbors(np.array([[win_pos]]).reshape(-1,1))
#label
if distances[0][0] <= tolerance:
label = 1
else:
label = 0
print(distances[0],tolerance,label)
return beat, label
I am attempting to look at conglomerated outlier information, utilizing several different SKLearn, HDBScan, and custom outlier detection classes. However, for some reason I am consistently running into an error where any class utilizing HDBScan cannot be iterated over. All other Sklearn and Custom classes can. The issue I am getting seems to consistently occur on the second pass of the HDBScan class and instantly happens upon algorithm.fit(tmp). Upon debugging the script, it looks like the error is thrown before even getting to the first line of the Class.
Any help? Below is the minimum viable reproduction:
import numpy as np
import pandas as pd
import hdbscan
from sklearn.datasets import make_blobs
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
class DBClass():
def __init__(self, random = None):
self.random = random
def fit(self, data):
self.train_data = data
cluster = hdbscan.HDBSCAN()
cluster.fit(self.train_data)
self.fit = cluster
def predict(self, data):
self.predict_data = data
if self.train_data.equals(self.predict_data):
return self.fit.probabilities_
def OutlierEnsemble(df, anomaly_algorithms = None, num_slices = 5, num_columns = 7, outliers_fraction = 0.05):
if isinstance(df, np.ndarray):
df = pd.DataFrame(df)
assert isinstance(df, pd.DataFrame)
if not anomaly_algorithms:
anomaly_algorithms = [
("Robust covariance",
EllipticEnvelope(contamination=outliers_fraction)),
("One-Class SVM",
OneClassSVM(nu=outliers_fraction,
kernel="rbf")),
("Isolation Forest",
IsolationForest(contamination=outliers_fraction)),
("HDBScan LOF",
DBClass()),
]
data = []
for i in range(1, num_slices + 1):
data.append(df.sample(n = num_columns, axis = 1, replace = False))
predictions = []
names = []
for tmp in data:
counter = 0
for name, algorithm in anomaly_algorithms:
algorithm.fit(tmp)
predictions.append(algorithm.predict(tmp))
counter += 1
names.append(f"{name}{counter}")
return predictions
blobs, labels = make_blobs(n_samples=3000, n_features=12)
OutlierEnsemble(blobs)
The error provided is not the most helpful.
Traceback (most recent call last):
File "<ipython-input-4-e1d4b63cfccd>", line 75, in <module>
OutlierEnsemble(blobs)
File "<ipython-input-4-e1d4b63cfccd>", line 66, in OutlierEnsemble
algorithm.fit(tmp)
TypeError: 'HDBSCAN' object is not callable
In your DBClass.fit, DBClass.fit is unintentionally redefined.
You could perhaps use something like,
class DBClass():
def __init__(self, random = None):
self.random = random
def fit(self, data):
self.train_data = data
cluster = hdbscan.HDBSCAN()
cluster.fit(self.train_data)
self.myfit = cluster # save calculated cluster
def predict(self, data):
self.predict_data = data
if self.train_data.equals(self.predict_data):
return self.myfit.probabilities_ # use calculated cluster
I am trying to run a NEAT algorithm using this python implementation. This is the original file from the library that is relevant for my question:
from neat.graphs import feed_forward_layers
class FeedForwardNetwork(object):
def __init__(self, inputs, outputs, node_evals):
self.input_nodes = inputs
self.output_nodes = outputs
self.node_evals = node_evals
self.values = dict((key, 0.0) for key in inputs + outputs)
def activate(self, inputs):
if len(self.input_nodes) != len(inputs):
raise RuntimeError("Expected {0:n} inputs, got {1:n}".format(len(self.input_nodes), len(inputs)))
for k, v in zip(self.input_nodes, inputs):
self.values[k] = v
for node, act_func, agg_func, bias, response, links in self.node_evals:
node_inputs = []
for i, w in links:
node_inputs.append(self.values[i] * w)
s = agg_func(node_inputs)
self.values[node] = act_func(bias + response * s)
return [self.values[i] for i in self.output_nodes]
#staticmethod
def create(genome, config):
""" Receives a genome and returns its phenotype (a FeedForwardNetwork). """
# Gather expressed connections.
connections = [cg.key for cg in genome.connections.values() if cg.enabled]
layers = feed_forward_layers(config.genome_config.input_keys, config.genome_config.output_keys, connections)
node_evals = []
for layer in layers:
for node in layer:
inputs = []
node_expr = [] # currently unused
for conn_key in connections:
inode, onode = conn_key
if onode == node:
cg = genome.connections[conn_key]
inputs.append((inode, cg.weight))
node_expr.append("v[{}] * {:.7e}".format(inode, cg.weight))
ng = genome.nodes[node]
aggregation_function = config.genome_config.aggregation_function_defs.get(ng.aggregation)
activation_function = config.genome_config.activation_defs.get(ng.activation)
node_evals.append((node, activation_function, aggregation_function, ng.bias, ng.response, inputs))
return FeedForwardNetwork(config.genome_config.input_keys, config.genome_config.output_keys, node_evals)
Since I evaluate the performance of my neural networks on a large dataset, I wanted to speed up the activate method using numba jit. In order to not fall back into numbas object mode I had to update the implementation of the activate method (and hence also the fields of the FeedForwardNetwork class) using only datatypes supported by numba. This is what I came up with (create is the same as before):
from neat.graphs import feed_forward_layers
from neat.six_util import itervalues
import numba
from numba import jit, njit
from numba.typed import List, Dict
import numpy as np
import math
#jit(nopython=True)
def activate(input_nodes, output_nodes, node_evals_node, node_evals_bias, node_evals_resp, node_evals_ins_nodes, node_evals_ins_conns, values, inputs):
for i in range(input_nodes.size):
values[input_nodes[i]] = inputs[i]
for node in range(len(node_evals_node)):
s = 0
for pred in range(len(node_evals_ins_nodes[node])):
s += values[node_evals_ins_nodes[node][pred]] * node_evals_ins_conns[node][pred]
values[node_evals_node[node]] = math.tanh(node_evals_bias[node] + node_evals_resp[node] * s)
return [values[output_nodes[i]] for i in range(output_nodes.size)]
class FeedForwardNetwork(object):
def __init__(self, inputs, outputs, node_evals):
self.input_nodes = np.array(inputs)
self.output_nodes = np.array(outputs)
# NODE_EVALS decomposition
self.node_evals_node = np.reshape(np.array(node_evals)[:, 0:1], (len(node_evals),)).astype(np.int64)
self.node_evals_bias = np.reshape(np.array(node_evals)[:, 3:4], (len(node_evals),)).astype(np.float64)
self.node_evals_resp = np.reshape(np.array(node_evals)[:, 4:5], (len(node_evals),)).astype(np.float64)
temp = np.array(node_evals)[:, 5:6]
self.node_evals_ins_nodes = List()
self.node_evals_ins_conns = List()
for node in range(temp.size):
l = List()
m = List()
for predecessor in range(len(temp[node])):
l.append(temp[0][node][predecessor][0])
m.append(temp[0][node][predecessor][1])
self.node_evals_ins_nodes.append(l)
self.node_evals_ins_conns.append(m)
self.values = Dict()
# Set types of dict
self.values[0] = float(1)
self.values.pop(0)
This is the code I call the create and activate method in:
def eval_single_genome(genome, config, thread_id, result):
net = neat.nn.FeedForwardNetwork.create(genome, config)
error_sum = 0
for i, row in PRICES.iterrows():
prediction = feed_forward.activate(net.input_nodes, net.output_nodes, net.node_evals_node, net.node_evals_bias, net.node_evals_resp, net.node_evals_ins_nodes, net.node_evals_ins_conns, net.values, np.array([0]))
error_sum += (prediction - PRICES.iloc[i]['open']) ** 2
result[thread_id] = error_sum
The code compiles and runs without errors or warnings which (as far as I've understood) indicates that numba should be able to optimize my implementation. But adding/removing the #jit(nopython=True)decorator doesn't change the runtime at all.
Did I overlook something? Or is there just nothing that numba can improve in my case?
I have set up zipline locally on PyCharm. The simulations work, moreover, I have access to premium data from quandl (which automatically updated when I entered my API key). However, now my question is, how do I make a pipeline locally using zipline.
Zipline's documentation is challenging. Zipline.io (as of 2021-0405) is also down. Fortunately, Blueshift has documentation and sample code that shows how to make a pipeline that can be run locally:
Blueshift sample pipeline code is here. (Pipelines library here.)
Zipline documentation can be accessed from MLTrading (archive documentation here) since though challenging it is still useful.
Full code of the pipeline sample code from Blueshift, but modified to run locally through PyCharm, is below the line. Please note as I'm sure you're already aware, the strategy is a bad strategy and you shouldn't trade on it. It does show local instantiations of pipelines though.
"""
Title: Classic (Pedersen) time-series momentum (equal weights)
Description: This strategy uses past returns and go long (short)
the positive (negative) n-percentile
Style tags: Momentum
Asset class: Equities, Futures, ETFs, Currencies
Dataset: All
"""
"""
Sources:
Overall Algorithm here:
https://github.com/QuantInsti/blueshift-demo-strategies/blob/master/factors/time_series_momentum.py
Custom (Ave Vol Filter, Period Returns) Functions Here:
https://github.com/QuantInsti/blueshift-demo-strategies/blob/master/library/pipelines/pipelines.py
"""
import numpy as np
from zipline.pipeline import CustomFilter, CustomFactor, Pipeline
from zipline.pipeline.data import EquityPricing
from zipline.api import (
order_target_percent,
schedule_function,
date_rules,
time_rules,
attach_pipeline,
pipeline_output,
)
def average_volume_filter(lookback, amount):
"""
Returns a custom filter object for volume-based filtering.
Args:
lookback (int): lookback window size
amount (int): amount to filter (high-pass)
Returns:
A custom filter object
Examples::
# from library.pipelines.pipelines import average_volume_filter
pipe = Pipeline()
volume_filter = average_volume_filter(200, 1000000)
pipe.set_screen(volume_filter)
"""
class AvgDailyDollarVolumeTraded(CustomFilter):
inputs = [EquityPricing.close, EquityPricing.volume]
def compute(self, today, assets, out, close_price, volume):
dollar_volume = np.mean(close_price * volume, axis=0)
high_volume = dollar_volume > amount
out[:] = high_volume
return AvgDailyDollarVolumeTraded(window_length=lookback)
def period_returns(lookback):
"""
Returns a custom factor object for computing simple returns over
period.
Args:
lookback (int): lookback window size
Returns:
A custom factor object.
Examples::
# from library.pipelines.pipelines import period_returns
pipe = Pipeline()
momentum = period_returns(200)
pipe.add(momentum,'momentum')
"""
class SignalPeriodReturns(CustomFactor):
inputs = [EquityPricing.close]
def compute(self, today, assets, out, close_price):
start_price = close_price[0]
end_price = close_price[-1]
returns = end_price / start_price - 1
out[:] = returns
return SignalPeriodReturns(window_length=lookback)
def initialize(context):
'''
A function to define things to do at the start of the strategy
'''
# The context variables can be accessed by other methods
context.params = {'lookback': 12,
'percentile': 0.1,
'min_volume': 1E7
}
# Call rebalance function on the first trading day of each month
schedule_function(strategy, date_rules.month_start(),
time_rules.market_close(minutes=1))
# Set up the pipe-lines for strategies
attach_pipeline(make_strategy_pipeline(context),
name='strategy_pipeline')
def strategy(context, data):
generate_signals(context, data)
rebalance(context, data)
def make_strategy_pipeline(context):
pipe = Pipeline()
# get the strategy parameters
lookback = context.params['lookback'] * 21
v = context.params['min_volume']
# Set the volume filter
volume_filter = average_volume_filter(lookback, v)
# compute past returns
momentum = period_returns(lookback)
pipe.add(momentum, 'momentum')
pipe.set_screen(volume_filter)
return pipe
def generate_signals(context, data):
try:
pipeline_results = pipeline_output('strategy_pipeline')
except:
context.long_securities = []
context.short_securities = []
return
p = context.params['percentile']
momentum = pipeline_results
long_candidates = momentum[momentum > 0].dropna().sort_values('momentum')
short_candidates = momentum[momentum < 0].dropna().sort_values('momentum')
n_long = len(long_candidates)
n_short = len(short_candidates)
n = int(min(n_long, n_short) * p)
if n == 0:
print("{}, no signals".format(data.current_dt))
context.long_securities = []
context.short_securities = []
context.long_securities = long_candidates.index[-n:]
context.short_securities = short_candidates.index[:n]
def rebalance(context, data):
# weighing function
n = len(context.long_securities)
if n < 1:
return
weight = 0.5 / n
# square off old positions if any
for security in context.portfolio.positions:
if security not in context.long_securities and \
security not in context.short_securities:
order_target_percent(security, 0)
# Place orders for the new portfolio
for security in context.long_securities:
order_target_percent(security, weight)
for security in context.short_securities:
order_target_percent(security, -weight)
I have a custom class in my Python code, that handles k-means clustering. The class takes some arguments to customize the clustering, however when subtracting two values from a list passed to the class, I get the following error:
Traceback (most recent call last):
File "/home/dev/PycharmProjects/KMeans/KMeansApplication.py", line 22, in <module>
application()
File "/home/dev/PycharmProjects/KMeans/KMeansApplication.py", line 16, in application
opt_num_clusters = cluster_calculator.calculate_optimum_clusters()
File "/home/dev/PycharmProjects/KMeans/ClusterCalculator.py", line 19, in calculate_optimum_clusters
self.init_opt_line()
File "/home/dev/PycharmProjects/KMeans/ClusterCalculator.py", line 33, in init_opt_line
self. m = (self.sum_squared_dist[0] - self.sum_squared_dist[1]) / (1 - self.calc_border)
TypeError: unsupported operand type(s) for -: 'KMeans' and 'KMeans'
Here is the code of my custom class:
import KMeansClusterer
from math import sqrt, fabs
from matplotlib import pyplot as plp
class ClusterCalculator:
m = 0
b = 0
sum_squared_dist = []
derivates = []
distances = []
line_coordinates = []
def __init__(self, calc_border, data):
self.calc_border = calc_border
self.data = data
def calculate_optimum_clusters(self):
self.calculate_squared_dist()
self.init_opt_line()
self.calc_distances()
self.calc_line_coordinates()
opt_clusters = self.get_optimum_clusters()
print("Evaluated", opt_clusters, "as optimum number of clusters")
return opt_clusters
def calculate_squared_dist(self):
for k in range(1, self.calc_border):
kmeans = KMeansClusterer.KMeansClusterer(k, self.data)
self.sum_squared_dist.append(kmeans.calc_custom_params(self.data, k))
def init_opt_line(self):
#here the error is thrown
self. m = (self.sum_squared_dist[0] - self.sum_squared_dist[1]) / (1 - self.calc_border)
self.b = (1 * self.sum_squared_dist[0] - self.calc_border*self.sum_squared_dist[0]) / (1 - self.calc_border)
def calc_y_value(self, x_calc):
return self.m * x_calc + self.b
def calc_line_coordinates(self):
for i in range(1, self.calc_border):
self.line_coordinates.append(self.calc_y_value(i))
def calc_distances(self):
for i in range(1, self.calc_border):
self.distances.append(sqrt(fabs(self.calc_y_value(i))))
print("For border", self.calc_border, ", calculated the following distances: \n", self.distances)
def get_optimum_clusters(self):
return self.distances.index((max(self.distances)))
def plot_results(self):
plp.plot(range(1, self.calc_border), self.sum_squared_dist, "bx-")
plp.plot(range(1, self.calc_border), self.line_coordinates, "bx-")
plp.xlabel("Number of clusters")
plp.ylabel("Sum of squared distances")
plp.show()
I append the KMeansClusterer as well, because sum_squared_dist is filled with values of there:
from sklearn.cluster import KMeans
from matplotlib import pyplot as plp
class KMeansClusterer:
def __init__(self, clusters, data):
self.clusters = clusters
self.data = data
def cluster(self):
kmeans = KMeans(n_clusters=self.cluster(), random_state=0).fit(self.data)
print("Clustered", len(kmeans.labels_), "GTINs")
for i, cluster_center in enumerate(kmeans.cluster_centers_):
plp.plot(cluster_center, label="Center {0}".format(i))
plp.legend(loc="best")
plp.show()
def calc_custom_params(self, data_frame, clusters):
kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data_frame)
return kmeans
def cluster_without_plot(self):
return KMeans(n_clusters=self.cluster(), random_state=0).fit(self.data)
I cannot imagine why '-' should be unsupported, I trie to subtract two list values of type integer and 1 and a integer variable.
Python cannot automatically subtract classes. You need to implement the __sub__ method on your class for python to know how to handle subtracting these classes. You can find the full reference here https://docs.python.org/3/library/operator.html
KMeans.fit() returns a class instance, which implies calc_custom_params() returns a class instance, so your list sum_squared_dist does not contain integers, the elements are objects of the sklearn.cluster.KMeans class.