Well im new in python, im trying to tokenize and stem tweets to create a model, then use gridsearch to find the optimal hyperparameters, I'm open for any kind of feedback
this is my code:
import nltk
nltk.download("stopwords")
from nltk.corpus import stopwords
spanish_stopwords = stopwords.words('spanish')
from string import punctuation
non_words = list(punctuation)
#we add spanish punctuation
non_words.extend(['¿', '¡'])
non_words.extend(map(str,range(10)))
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize
stemmer = SnowballStemmer('spanish')
def stem_tokens(tokens, stemmer):
stemmed = []
for item in tokens:
stemmed.append(stemmer.stem(item))
return stemmed
def tokenize(text):
# remove non letters
text = ''.join([c for c in text if c not in non_words])
# tokenize
tokens = word_tokenize(text)
# stem
try:
stems = stem_tokens(tokens, stemmer)
except Exception as e:
print(e)
print(text)
stems = ['']
return stems
from sklearn.cross_validation import cross_val_score
from sklearn.svm import LinearSVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
tweets_corpus = tweets_corpus[tweets_corpus.polarity != 'NEU']
tweets_corpus['polarity_bin'] = 0
tweets_corpus.polarity_bin[tweets_corpus.polarity.isin(['P', 'P+'])] = 1
print(tweets_corpus.polarity_bin.value_counts(normalize=True))
if __name__ == '__main__':
import tokenize
vectorizer = CountVectorizer(
analyzer = 'word',
tokenizer = tokenize,
lowercase = True,
stop_words = spanish_stopwords)
pipeline = Pipeline([
('vect', vectorizer),
('cls', LinearSVC()),
])
parameters = {
'vect__max_df': (0.5, 1.9),
'vect__min_df': (10, 20,50),
'vect__max_features': (500, 1000),
'vect__ngram_range': ((1, 1), (1, 2)), # unigrams or bigrams
'cls__C': (0.2, 0.5, 0.7),
'cls__loss': ('hinge', 'squared_hinge'),
'cls__max_iter': (500, 1000)
}
from time import time
grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1,scoring='roc_auc')
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
grid_search.fit(tweets_corpus.content, tweets_corpus.polarity_bin)
print(grid_search.best_params_)
t0 = time()
print("done in %0.3fs" % (time() - t0))
This is a sample of the data im trying to upgrade
Name: polarity_bin, dtype: float64
agreement \
270 NaN
208 NaN
902 NaN
31056 NaN
1158 NaN
content \
270 #revolucion2017 #Pablo_Iglesias_ Cultura es reflexionar sobre algo q ha dicho alguien y si te gusta hacerlo tuyo.pq no?
208 #_UnaOpinionMas_ #PPopular En eso estoi de acuerdo por lo menos al PP se le ve que hace cosas y contara d nuevo cn mi voto como siempre.
902 "Grande Casillas : ""Esta victoria no solo es nuestra sino también de Jesé ."""
31056 ¿Querían que Contador analizara cualquier cosa que fuera a tomar o que la vomitara meses después para mandarla al puto laboratorio?
1158 Eliminados de champion , van terceros en la Liga y pierden la final copa del Rey , PURO REAL MADRID
polarity polarity_bin
270 P 1
208 P 1
902 P 1
31056 N 0
1158 N 0
And this is the error:
TypeError Traceback (most recent call last)
<ipython-input-9-7c9b6a1bac93> in <module>()
201 print("Performing grid search...")
202 print("pipeline:", [name for name, _ in pipeline.steps])
--> 203 grid_search.fit(tweets_corpus.content, tweets_corpus.polarity_bin)
204 print(grid_search.best_params_)
205 t0 = time()
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\grid_search.py in fit(self, X, y)
802
803 """
--> 804 return self._fit(X, y, ParameterGrid(self.param_grid))
805
806
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\grid_search.py in _fit(self, X, y, parameter_iterable)
539 n_candidates * len(cv)))
540
--> 541 base_estimator = clone(self.estimator)
542
543 pre_dispatch = self.pre_dispatch
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
49 new_object_params = estimator.get_params(deep=False)
50 for name, param in six.iteritems(new_object_params):
---> 51 new_object_params[name] = clone(param, safe=False)
52 new_object = klass(**new_object_params)
53 params_set = new_object.get_params(deep=False)
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in <listcomp>(.0)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in <listcomp>(.0)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
49 new_object_params = estimator.get_params(deep=False)
50 for name, param in six.iteritems(new_object_params):
---> 51 new_object_params[name] = clone(param, safe=False)
52 new_object = klass(**new_object_params)
53 params_set = new_object.get_params(deep=False)
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
---> 42 return copy.deepcopy(estimator)
43 else:
44 raise TypeError("Cannot clone object '%s' (type %s): "
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
180 raise Error(
181 "un(deep)copyable object of type %s" % cls)
--> 182 y = _reconstruct(x, rv, 1, memo)
183
184 # If is its own copy, don't memoize.
C:\Users\Miguel\Anaconda3\lib\copy.py in _reconstruct(x, info, deep, memo)
296 if state:
297 if deep:
--> 298 state = deepcopy(state, memo)
299 if hasattr(y, '__setstate__'):
300 y.__setstate__(state)
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
C:\Users\Miguel\Anaconda3\lib\copy.py in _deepcopy_dict(x, memo)
242 memo[id(x)] = y
243 for key, value in x.items():
--> 244 y[deepcopy(key, memo)] = deepcopy(value, memo)
245 return y
246 d[dict] = _deepcopy_dict
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
180 raise Error(
181 "un(deep)copyable object of type %s" % cls)
--> 182 y = _reconstruct(x, rv, 1, memo)
183
184 # If is its own copy, don't memoize.
C:\Users\Miguel\Anaconda3\lib\copy.py in _reconstruct(x, info, deep, memo)
296 if state:
297 if deep:
--> 298 state = deepcopy(state, memo)
299 if hasattr(y, '__setstate__'):
300 y.__setstate__(state)
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
C:\Users\Miguel\Anaconda3\lib\copy.py in _deepcopy_dict(x, memo)
242 memo[id(x)] = y
243 for key, value in x.items():
--> 244 y[deepcopy(key, memo)] = deepcopy(value, memo)
245 return y
246 d[dict] = _deepcopy_dict
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
172 reductor = getattr(x, "__reduce_ex__", None)
173 if reductor:
--> 174 rv = reductor(4)
175 else:
176 reductor = getattr(x, "__reduce__", None)
TypeError: cannot serialize '_io.TextIOWrapper' object
Thanks for your time
BTW Im working in Windows 10 and got all the tools updated
Related
After creating a classification model I need to use the k-Cross Fold Validation but I keep getting this error: AttributeError: 'Adam' object has no attribute 'build'.
from scikeras.wrappers import KerasClassifier
keras_clf = KerasClassifier(model = model, optimizer="adam", epochs=100, verbose=0)
model_kResults = cross_validation(keras_clf, X, y, 5)
print(model_kResults)
print("Mean Validation Accuracy:", model_kResults["Mean Validation Accuracy"])
print("Mean Validation F1 Score:",model_kResults["Mean Validation F1 Score"])
How can I resolve this? You can find below the full error:
in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
265 # independent, and that it is pickle-able.
266 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
--> 267 results = parallel(
268 delayed(_fit_and_score)(
269 clone(estimator),
/usr/local/lib/python3.8/dist-packages/joblib/parallel.py in __call__(self, iterable)
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1087
/usr/local/lib/python3.8/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
871 big_batch_size = batch_size * n_jobs
872
--> 873 islice = list(itertools.islice(iterator, big_batch_size))
874 if len(islice) == 0:
875 return False
/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py in <genexpr>(.0)
267 results = parallel(
268 delayed(_fit_and_score)(
--> 269 clone(estimator),
270 X,
271 y,
/usr/local/lib/python3.8/dist-packages/sklearn/base.py in clone(estimator, safe)
84 new_object_params = estimator.get_params(deep=False)
85 for name, param in new_object_params.items():
---> 86 new_object_params[name] = clone(param, safe=False)
87 new_object = klass(**new_object_params)
88 params_set = new_object.get_params(deep=False)
/usr/local/lib/python3.8/dist-packages/sklearn/base.py in clone(estimator, safe)
65 elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
66 if not safe:
---> 67 return copy.deepcopy(estimator)
68 else:
69 if isinstance(estimator, type):
/usr/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
151 copier = getattr(x, "__deepcopy__", None)
152 if copier is not None:
--> 153 y = copier(memo)
154 else:
155 reductor = dispatch_table.get(cls)
/usr/local/lib/python3.8/dist-packages/scikeras/_saving_utils.py in deepcopy_model(model, memo)
81 def deepcopy_model(model: keras.Model, memo: Dict[Hashable, Any]) -> keras.Model:
82 _, (model_bytes,) = pack_keras_model(model)
---> 83 new_model = unpack_keras_model(model_bytes)
84 memo[model] = new_model
85 return new_model
/usr/local/lib/python3.8/dist-packages/scikeras/_saving_utils.py in unpack_keras_model(packed_keras_model)
51 model: keras.Model = load_model(temp_dir)
52 model.load_weights(temp_dir)
---> 53 model.optimizer.build(model.trainable_variables)
54 return model
55
/usr/local/lib/python3.8/dist-packages/keras/optimizer_v2/optimizer_v2.py in __getattribute__(self, name)
843 if name in self._hyper:
844 return self._get_hyper(name)
--> 845 raise e
846
847 def __dir__(self):
/usr/local/lib/python3.8/dist-packages/keras/optimizer_v2/optimizer_v2.py in __getattribute__(self, name)
833 """Overridden to support hyperparameter access."""
834 try:
--> 835 return super(OptimizerV2, self).__getattribute__(name)
836 except AttributeError as e:
837 # Needed to avoid infinite recursion with __setattr__.
It seems that the program is trying to create a deep copy of a Keras model with 'copy.deepcopy' but the model doesn't have the 'deepcopy' attribute and this is the reason of the error. But I cannot understand what I'm missing beacuse it worked until today...
change your tensorflow version to 2.11.0
i did and that was okay
This appears to be an issue with how Keras is imported.
First, make sure you've got Tensorflow version 2.11.0, and that you're importing Keras from there.
>>> !pip install tensorflow==2.11.0
>>> import tensorflow as tf
Then pass the Adam optimizer from tf.keras as the optimizer argument to the KerasClassifier class
keras_clf = KerasClassifier(model = model, optimizer=tf.keras.optimizers.Adam(), epochs=100, verbose=0)
ETA: This is an answer to a similar question, and includes a solution that works with Tensorflow 2.9
Anytime I use .predict or fit_predict, Im encountering the same error. The following code works for others and Im guessing it has to do with my environment or my particular packages. Really appreciate any pointers. I have tried the following so far:
restarting the computer, rebooting kernel multiple times, checking package installations:
n_components = np.arange(1, 21)
BIC_scores = np.zeros(n_components.shape)
for i, n in enumerate(n_components):
model = GaussianMixture(n_components=n, random_state=0)
model.fit_predict(X)
BIC_scores[i] = model.bic(X)
AttributeError Traceback (most recent call last)
<ipython-input-43-1521cb235579> in <module>
4 for i, n in enumerate(n_components):
5 model = GaussianMixture(n_components=n, random_state=0)
----> 6 model.fit_predict(X)
7 BIC_scores[i] = model.bic(X)
~\Anaconda3\lib\site-packages\sklearn\mixture\_base.py in fit_predict(self, X, y)
249
250 if do_init:
--> 251 self._initialize_parameters(X, random_state)
252
253 lower_bound = -np.inf if do_init else self.lower_bound_
~\Anaconda3\lib\site-packages\sklearn\mixture\_base.py in _initialize_parameters(self, X, random_state)
141 resp = np.zeros((n_samples, self.n_components))
142 label = (
--> 143 cluster.KMeans(
144 n_clusters=self.n_components, n_init=1, random_state=random_state
145 )
~\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py in fit(self, X, y, sample_weight)
1169 if self._algorithm == "full":
1170 kmeans_single = _kmeans_single_lloyd
-> 1171 self._check_mkl_vcomp(X, X.shape[0])
1172 else:
1173 kmeans_single = _kmeans_single_elkan
~\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py in _check_mkl_vcomp(self, X, n_samples)
1026 active_threads = int(np.ceil(n_samples / CHUNK_SIZE))
1027 if active_threads < self._n_threads:
-> 1028 modules = threadpool_info()
1029 has_vcomp = "vcomp" in [module["prefix"] for module in modules]
1030 has_mkl = ("mkl", "intel") in [
~\Anaconda3\lib\site-packages\sklearn\utils\fixes.py in threadpool_info()
323 return controller.info()
324 else:
--> 325 return threadpoolctl.threadpool_info()
326
327
~\Anaconda3\lib\site-packages\threadpoolctl.py in threadpool_info()
122 In addition, each module may contain internal_api specific entries.
123 """
--> 124 return _ThreadpoolInfo(user_api=_ALL_USER_APIS).todicts()
125
126
~\Anaconda3\lib\site-packages\threadpoolctl.py in __init__(self, user_api, prefixes, modules)
338
339 self.modules = []
--> 340 self._load_modules()
341 self._warn_if_incompatible_openmp()
342 else:
~\Anaconda3\lib\site-packages\threadpoolctl.py in _load_modules(self)
371 self._find_modules_with_dyld()
372 elif sys.platform == "win32":
--> 373 self._find_modules_with_enum_process_module_ex()
374 else:
375 self._find_modules_with_dl_iterate_phdr()
~\Anaconda3\lib\site-packages\threadpoolctl.py in _find_modules_with_enum_process_module_ex(self)
483
484 # Store the module if it is supported and selected
--> 485 self._make_module_from_path(filepath)
486 finally:
487 kernel_32.CloseHandle(h_process)
~\Anaconda3\lib\site-packages\threadpoolctl.py in _make_module_from_path(self, filepath)
513 if prefix in self.prefixes or user_api in self.user_api:
514 module_class = globals()[module_class]
--> 515 module = module_class(filepath, prefix, user_api, internal_api)
516 self.modules.append(module)
517
~\Anaconda3\lib\site-packages\threadpoolctl.py in __init__(self, filepath, prefix, user_api, internal_api)
604 self.internal_api = internal_api
605 self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
--> 606 self.version = self.get_version()
607 self.num_threads = self.get_num_threads()
608 self._get_extra_info()
~\Anaconda3\lib\site-packages\threadpoolctl.py in get_version(self)
644 lambda: None)
645 get_config.restype = ctypes.c_char_p
--> 646 config = get_config().split()
647 if config[0] == b"OpenBLAS":
648 return config[1].decode("utf-8")
AttributeError: 'NoneType' object has no attribute 'split'
I am attempting to assign binary values to 10 labels using 3 features, a headline of an article, a summary of the article, and an id of who created the labels. I'm stuck on trying create a model that can accept all 3 fields as input. Currently, it only works if I only pass just one field. I know I am likely messing something up with the tfidvectorizer, but I can't quite figure it out. Any help would be appreciated. The error I receive (full traceback below) is usually
ValueError: Found input variables with inconsistent numbers of samples: [3, 75897].
screenshot of dataframe
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
import re
import string
df = pd.read_csv('../data/homework_clean.csv')
emotion_cols = ['emotion_0', 'emotion_1', 'emotion_2', 'emotion_3', 'emotion_4', 'emotion_5', 'emotion_6', 'emotion_7', 'emotion_8', 'emotion_9']
def removeStopWords(sentence):
global re_stop_words
return re_stop_words.sub(" ", sentence)
def stemming(sentence):
stemSentence = ""
for word in sentence.split():
stem = stemmer.stem(word)
stemSentence += stem
stemSentence += " "
stemSentence = stemSentence.strip()
return stemSentence
df['headline'] = df['headline'].str.lower()
df['headline'] = df['headline'].str.replace(r'[^\w\s]+', '')
df['summary'] = df['summary'].str.lower()
df['summary'] = df['summary'].str.replace(r'[^\w\s]+', '')
stop_words = set(stopwords.words('english'))
re_stop_words = re.compile(r"\b(" + "|".join(stop_words) + ")\\W", re.I)
df['headline'] = df['headline'].apply(removeStopWords)
df['summary'] = df['summary'].apply(removeStopWords)
stemmer = SnowballStemmer('english')
df['headline'] = df['headline'].apply(stemming)
df['summary'] = df['summary'].apply(stemming)
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, random_state = 42, test_size = .2, shuffle = True)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import FeatureUnion
vectorizer = FeatureUnion([
('headline', TfidfVectorizer(strip_accents='unicode', analyzer='word', ngram_range=(1,2), norm='l2')),
('summary', TfidfVectorizer(strip_accents='unicode', analyzer='word', ngram_range=(1,2), norm='l2'))])
x_train = train[['headline', 'summary', 'worker_id']]
y_train = train.drop(labels = ['headline', 'summary', 'worker_id'], axis=1)
x_test = test[['headline', 'summary', 'worker_id']]
y_test = test.drop(labels = ['headline', 'summary', 'worker_id'], axis=1)
# IF I only use one feature it works fine.
# x_train = train['headline']
# y_train = train.drop(labels = ['headline', 'summary', 'worker_id'], axis=1)
# x_test = test['headline']
# y_test = test.drop(labels = ['headline', 'summary', 'worker_id'], axis=1)
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss
from sklearn.multiclass import OneVsRestClassifier
OneVsRest_pipeline = Pipeline(steps = [
('featureunion', vectorizer),
('clf', OneVsRestClassifier(LogisticRegression(solver='sag'))),
])
OneVsRest_pipeline.fit(x_train, y_train)
predictions = OneVsRest_pipeline.predict(x_test)
prediction_prob = OneVsRest_pipeline.predict_proba(x_test)
Full Traceback
ValueError Traceback (most recent call last)
<ipython-input-27-6394288c65f8> in <module>
4 ])
5
----> 6 OneVsRest_pipeline.fit(x_train, y_train)
7 predictions = OneVsRest_pipeline.predict(x_test)
8 prediction_prob = OneVsRest_pipeline.predict_proba(x_test)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
354 self._log_message(len(self.steps) - 1)):
355 if self._final_estimator != 'passthrough':
--> 356 self._final_estimator.fit(Xt, y, **fit_params)
357 return self
358
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\multiclass.py in fit(self, X, y)
214 "not %s" % self.label_binarizer_.classes_[i],
215 self.label_binarizer_.classes_[i]])
--> 216 for i, column in enumerate(columns))
217
218 return self
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
C:\ProgramData\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
C:\ProgramData\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\multiclass.py in _fit_binary(estimator, X, y, classes)
78 else:
79 estimator = clone(estimator)
---> 80 estimator.fit(X, y)
81 return estimator
82
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py in fit(self, X, y, sample_weight)
1530
1531 X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
-> 1532 accept_large_sparse=solver != 'liblinear')
1533 check_classification_targets(y)
1534 self.classes_ = np.unique(y)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
727 y = y.astype(np.float64)
728
--> 729 check_consistent_length(X, y)
730
731 return X, y
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
203 if len(uniques) > 1:
204 raise ValueError("Found input variables with inconsistent numbers of"
--> 205 " samples: %r" % [int(l) for l in lengths])
206
207
This is a follow up of this question. I am trying to utilize 8 GPUs for training and am using the multiple_gpu_model from Keras. I specified a batch size of 128 which will be split amongst the 8 GPUs resulting in 16 per GPU. Now, with this configuration, I get the following error:
Train on 6120 samples, validate on 323 samples
Epoch 1/100
6120/6120 [==============================] - 42s 7ms/step - loss: 0.0996 - mean_iou: 0.6919 - val_loss: 0.0969 - val_mean_iou: 0.7198
Epoch 00001: val_loss improved from inf to 0.09686, saving model to test.h5
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-00e92d5b765a> in <module>()
3 checkpointer = ModelCheckpoint('test.h5', verbose=1, save_best_only=True)
4 results = parallel_model.fit(X_train, Y_train, validation_split=0.05, batch_size = 128, verbose=1, epochs=100,
----> 5 callbacks=[earlystopper, checkpointer])
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1703 initial_epoch=initial_epoch,
1704 steps_per_epoch=steps_per_epoch,
-> 1705 validation_steps=validation_steps)
1706
1707 def evaluate(self, x=None, y=None,
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
1254 for l, o in zip(out_labels, val_outs):
1255 epoch_logs['val_' + l] = o
-> 1256 callbacks.on_epoch_end(epoch, epoch_logs)
1257 if callback_model.stop_training:
1258 break
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/callbacks.py in on_epoch_end(self, epoch, logs)
75 logs = logs or {}
76 for callback in self.callbacks:
---> 77 callback.on_epoch_end(epoch, logs)
78
79 def on_batch_begin(self, batch, logs=None):
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/callbacks.py in on_epoch_end(self, epoch, logs)
445 self.model.save_weights(filepath, overwrite=True)
446 else:
--> 447 self.model.save(filepath, overwrite=True)
448 else:
449 if self.verbose > 0:
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/topology.py in save(self, filepath, overwrite, include_optimizer)
2589 """
2590 from ..models import save_model
-> 2591 save_model(self, filepath, overwrite, include_optimizer)
2592
2593 def save_weights(self, filepath, overwrite=True):
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py in save_model(model, filepath, overwrite, include_optimizer)
124 f.attrs['model_config'] = json.dumps({
125 'class_name': model.__class__.__name__,
--> 126 'config': model.get_config()
127 }, default=get_json_type).encode('utf8')
128
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/topology.py in get_config(self)
2430 model_outputs.append([layer.name, new_node_index, tensor_index])
2431 config['output_layers'] = model_outputs
-> 2432 return copy.deepcopy(config)
2433
2434 #classmethod
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_list(x, memo, deepcopy)
213 append = y.append
214 for a in x:
--> 215 append(deepcopy(a, memo))
216 return y
217 d[list] = _deepcopy_list
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_tuple(x, memo, deepcopy)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in <listcomp>(.0)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_tuple(x, memo, deepcopy)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in <listcomp>(.0)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
167 reductor = getattr(x, "__reduce_ex__", None)
168 if reductor:
--> 169 rv = reductor(4)
170 else:
171 reductor = getattr(x, "__reduce__", None)
TypeError: can't pickle module objects
When I specify a batch size of 256, the network won't even run (see the other linked question). But individual GPUs are able to handle a batch size of 32. I'm not able to pin point what's going wrong here and how to fix this error. Is it just the batch size? It seems more like a parallelization problem to me.
if you use the ModelCheckpoint function in the callbacks, you should add the para 'save_weights_only=True' in the ModelCheckpoint function:
from keras.callbacks import ModelCheckpoint
callbacks_list = [ModelCheckpoint(top_weights_path, monitor='val_loss',
verbose=1, save_best_only=True, save_weights_only=True)]
hope useful
I'm doing a deepcopy for a list of objects, but I keep getting following error:
deepcopy __deepcopy__() takes 1 positional argument but 2 were given
and following traceback:
TypeError Traceback (most recent call last)
<ipython-input-4-66b9ee5521c7> in <module>()
2
3 import copy
----> 4 regions_copy = copy.deepcopy(regions)
5 regions[0].A = 15
6 print(regions[0].A)
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in _deepcopy_list(x, memo)
216 memo[id(x)] = y
217 for a in x:
--> 218 y.append(deepcopy(a, memo))
219 return y
220 d[list] = _deepcopy_list
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
180 raise Error(
181 "un(deep)copyable object of type %s" % cls)
--> 182 y = _reconstruct(x, rv, 1, memo)
183
184 # If is its own copy, don't memoize.
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in _reconstruct(x, info, deep, memo)
295 if state is not None:
296 if deep:
--> 297 state = deepcopy(state, memo)
298 if hasattr(y, '__setstate__'):
299 y.__setstate__(state)
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in _deepcopy_dict(x, memo)
241 memo[id(x)] = y
242 for key, value in x.items():
--> 243 y[deepcopy(key, memo)] = deepcopy(value, memo)
244 return y
245 d[dict] = _deepcopy_dict
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
164 copier = getattr(x, "__deepcopy__", None)
165 if copier:
--> 166 y = copier(memo)
167 else:
168 reductor = dispatch_table.get(cls)
TypeError: __deepcopy__() takes 1 positional argument but 2 were given
The problem seems to be also when I copy a single object. Any idea what could be the cause?
I suppose it might be in my class implementation, because deepcopying a list like [object(), object(), object()] is fine. Although that would be very strange...
I found the problem was in fact in the definition of the variable regions. It is a list of classes AreaRegion, which contained assignment into class __dict__:
from matplotlib.path import Path
...
class AreaRegion:
def __init__(self):
...
self.path = Path(verts, codes, closed=True)
...
...
Apparently it didn't like this, so I've moved Path into a getter instead.