keras multiple_gpu_model causes "Can't pickle module object" error - python

This is a follow up of this question. I am trying to utilize 8 GPUs for training and am using the multiple_gpu_model from Keras. I specified a batch size of 128 which will be split amongst the 8 GPUs resulting in 16 per GPU. Now, with this configuration, I get the following error:
Train on 6120 samples, validate on 323 samples
Epoch 1/100
6120/6120 [==============================] - 42s 7ms/step - loss: 0.0996 - mean_iou: 0.6919 - val_loss: 0.0969 - val_mean_iou: 0.7198
Epoch 00001: val_loss improved from inf to 0.09686, saving model to test.h5
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-00e92d5b765a> in <module>()
3 checkpointer = ModelCheckpoint('test.h5', verbose=1, save_best_only=True)
4 results = parallel_model.fit(X_train, Y_train, validation_split=0.05, batch_size = 128, verbose=1, epochs=100,
----> 5 callbacks=[earlystopper, checkpointer])
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1703 initial_epoch=initial_epoch,
1704 steps_per_epoch=steps_per_epoch,
-> 1705 validation_steps=validation_steps)
1706
1707 def evaluate(self, x=None, y=None,
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
1254 for l, o in zip(out_labels, val_outs):
1255 epoch_logs['val_' + l] = o
-> 1256 callbacks.on_epoch_end(epoch, epoch_logs)
1257 if callback_model.stop_training:
1258 break
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/callbacks.py in on_epoch_end(self, epoch, logs)
75 logs = logs or {}
76 for callback in self.callbacks:
---> 77 callback.on_epoch_end(epoch, logs)
78
79 def on_batch_begin(self, batch, logs=None):
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/callbacks.py in on_epoch_end(self, epoch, logs)
445 self.model.save_weights(filepath, overwrite=True)
446 else:
--> 447 self.model.save(filepath, overwrite=True)
448 else:
449 if self.verbose > 0:
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/topology.py in save(self, filepath, overwrite, include_optimizer)
2589 """
2590 from ..models import save_model
-> 2591 save_model(self, filepath, overwrite, include_optimizer)
2592
2593 def save_weights(self, filepath, overwrite=True):
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py in save_model(model, filepath, overwrite, include_optimizer)
124 f.attrs['model_config'] = json.dumps({
125 'class_name': model.__class__.__name__,
--> 126 'config': model.get_config()
127 }, default=get_json_type).encode('utf8')
128
~/anaconda/envs/dl/lib/python3.6/site-packages/keras/engine/topology.py in get_config(self)
2430 model_outputs.append([layer.name, new_node_index, tensor_index])
2431 config['output_layers'] = model_outputs
-> 2432 return copy.deepcopy(config)
2433
2434 #classmethod
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_list(x, memo, deepcopy)
213 append = y.append
214 for a in x:
--> 215 append(deepcopy(a, memo))
216 return y
217 d[list] = _deepcopy_list
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_tuple(x, memo, deepcopy)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in <listcomp>(.0)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
~/anaconda/envs/dl/lib/python3.6/copy.py in _deepcopy_tuple(x, memo, deepcopy)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in <listcomp>(.0)
218
219 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 220 y = [deepcopy(a, memo) for a in x]
221 # We're not going to put the tuple in the memo, but it's still important we
222 # check for it, in case the tuple contains recursive mutable structures.
~/anaconda/envs/dl/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
167 reductor = getattr(x, "__reduce_ex__", None)
168 if reductor:
--> 169 rv = reductor(4)
170 else:
171 reductor = getattr(x, "__reduce__", None)
TypeError: can't pickle module objects
When I specify a batch size of 256, the network won't even run (see the other linked question). But individual GPUs are able to handle a batch size of 32. I'm not able to pin point what's going wrong here and how to fix this error. Is it just the batch size? It seems more like a parallelization problem to me.

if you use the ModelCheckpoint function in the callbacks, you should add the para 'save_weights_only=True' in the ModelCheckpoint function:
from keras.callbacks import ModelCheckpoint
callbacks_list = [ModelCheckpoint(top_weights_path, monitor='val_loss',
verbose=1, save_best_only=True, save_weights_only=True)]
hope useful

Related

Python debugging, deepcopy and missing information from Tracebacks

I'm working on a particularly messy bit of code which I had written some years ago.
I had, at the time, lazily used copy.deepcopy() to create a copy of something that needed to be independently manipulated and then thrown away. This has worked fine, but some specific instances now fail.
While normally I would litter the code with print / log statements to figure out what the problem is, since this is largely hidden behind deepcopy operating on a pretty big hierarchy of classes, this option is not viable.
I'm using ipython since it provides a slightly more verbose traceback for me to follow.
I see the following hints, but I'd like to get more information on what the precise problem is. Any suggestions for ways to configure ipython, alternate tools, or any other options to instrument the code itself would be greatly appreciated. Note that such instrumentation should not need to be added to every class. There are far too many of them. However, something that I can slip into the top level object I want to copy is perfectly fine.
I see that the main 'exception', so to speak, is fairly obvious.
TypeError: __init__() takes at least 4 arguments (1 given)
I do not know why this is happening only with some instances of the class, while other instances copy just fine. While there is nothing special about the failing instances that I can see, I'm assuming for the moment that they trigger the inclusion of some poorly written class by composition. I would like to know precisely which class this init belongs to. If it's something non-critical, I'll just make it accept 1 argument and move on.
In the traceback, I see many lines line :
raise Error(
189 "un(deep)copyable object of type %s" % cls)
--> 190 y = _reconstruct(x, rv, 1, memo)
I presume this "Error" is getting caught somewhere within deepcopy's call chain, and the formatted string never gets printed in the traceback. I wish to see this string. Specifically, I want to see the value of cls so I can figure out where to look. Is there any way I can add this to the printed traceback.
I have been having a lot of trouble finding a good reference guide to modify the behaviour of python's deepcopy. The module documentation says to look at python's pickle module's documentation, but I'm not really sure where. If you can point to any reasonable resources which talk about how to customize the behaviour of deepcopy, I'd be very greatful. Specifically, assuming I have a python class, I want to modify deepcopy's behaviour to ignore certain class/instance attributes, but behave exactly as usual for all the rest.
Full Traceback
In [72]: deepcopy(p2.bom)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-72-8eba7c5b3494> in <module>()
----> 1 deepcopy(p2.bom)
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
188 raise Error(
189 "un(deep)copyable object of type %s" % cls)
--> 190 y = _reconstruct(x, rv, 1, memo)
191
192 memo[d] = y
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in _reconstruct(x, info,
deep, memo)
332 if state:
333 if deep:
--> 334 state = deepcopy(state, memo)
335 if hasattr(y, '__setstate__'):
336 y.__setstate__(state)
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
161 copier = _deepcopy_dispatch.get(cls)
162 if copier:
--> 163 y = copier(x, memo)
164 else:
165 try:
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in _deepcopy_dict(x, memo)
255 memo[id(x)] = y
256 for key, value in x.iteritems():
--> 257 y[deepcopy(key, memo)] = deepcopy(value, memo)
258 return y
259 d[dict] = _deepcopy_dict
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
188 raise Error(
189 "un(deep)copyable object of type %s" % cls)
--> 190 y = _reconstruct(x, rv, 1, memo)
191
192 memo[d] = y
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in _reconstruct(x, info,
deep, memo)
332 if state:
333 if deep:
--> 334 state = deepcopy(state, memo)
335 if hasattr(y, '__setstate__'):
336 y.__setstate__(state)
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
161 copier = _deepcopy_dispatch.get(cls)
162 if copier:
--> 163 y = copier(x, memo)
164 else:
165 try:
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in _deepcopy_dict(x, memo)
255 memo[id(x)] = y
256 for key, value in x.iteritems():
--> 257 y[deepcopy(key, memo)] = deepcopy(value, memo)
258 return y
259 d[dict] = _deepcopy_dict
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
161 copier = _deepcopy_dispatch.get(cls)
162 if copier:
--> 163 y = copier(x, memo)
164 else:
165 try:
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in _deepcopy_list(x, memo)
228 memo[id(x)] = y
229 for a in x:
--> 230 y.append(deepcopy(a, memo))
231 return y
232 d[list] = _deepcopy_list
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in deepcopy(x, memo, _nil)
188 raise Error(
189 "un(deep)copyable object of type %s" % cls)
--> 190 y = _reconstruct(x, rv, 1, memo)
191
192 memo[d] = y
/home/tendril/.pyenv/versions/2.7.6/lib/python2.7/copy.pyc in _reconstruct(x, info,
deep, memo)
327 if deep:
328 args = deepcopy(args, memo)
--> 329 y = callable(*args)
330 memo[id(x)] = y
331
TypeError: __init__() takes at least 4 arguments (1 given)

AttributeError: 'Adam' object has no attribute 'build'

After creating a classification model I need to use the k-Cross Fold Validation but I keep getting this error: AttributeError: 'Adam' object has no attribute 'build'.
from scikeras.wrappers import KerasClassifier
keras_clf = KerasClassifier(model = model, optimizer="adam", epochs=100, verbose=0)
model_kResults = cross_validation(keras_clf, X, y, 5)
print(model_kResults)
print("Mean Validation Accuracy:", model_kResults["Mean Validation Accuracy"])
print("Mean Validation F1 Score:",model_kResults["Mean Validation F1 Score"])
How can I resolve this? You can find below the full error:
in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
265 # independent, and that it is pickle-able.
266 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
--> 267 results = parallel(
268 delayed(_fit_and_score)(
269 clone(estimator),
/usr/local/lib/python3.8/dist-packages/joblib/parallel.py in __call__(self, iterable)
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1087
/usr/local/lib/python3.8/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
871 big_batch_size = batch_size * n_jobs
872
--> 873 islice = list(itertools.islice(iterator, big_batch_size))
874 if len(islice) == 0:
875 return False
/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py in <genexpr>(.0)
267 results = parallel(
268 delayed(_fit_and_score)(
--> 269 clone(estimator),
270 X,
271 y,
/usr/local/lib/python3.8/dist-packages/sklearn/base.py in clone(estimator, safe)
84 new_object_params = estimator.get_params(deep=False)
85 for name, param in new_object_params.items():
---> 86 new_object_params[name] = clone(param, safe=False)
87 new_object = klass(**new_object_params)
88 params_set = new_object.get_params(deep=False)
/usr/local/lib/python3.8/dist-packages/sklearn/base.py in clone(estimator, safe)
65 elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
66 if not safe:
---> 67 return copy.deepcopy(estimator)
68 else:
69 if isinstance(estimator, type):
/usr/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
151 copier = getattr(x, "__deepcopy__", None)
152 if copier is not None:
--> 153 y = copier(memo)
154 else:
155 reductor = dispatch_table.get(cls)
/usr/local/lib/python3.8/dist-packages/scikeras/_saving_utils.py in deepcopy_model(model, memo)
81 def deepcopy_model(model: keras.Model, memo: Dict[Hashable, Any]) -> keras.Model:
82 _, (model_bytes,) = pack_keras_model(model)
---> 83 new_model = unpack_keras_model(model_bytes)
84 memo[model] = new_model
85 return new_model
/usr/local/lib/python3.8/dist-packages/scikeras/_saving_utils.py in unpack_keras_model(packed_keras_model)
51 model: keras.Model = load_model(temp_dir)
52 model.load_weights(temp_dir)
---> 53 model.optimizer.build(model.trainable_variables)
54 return model
55
/usr/local/lib/python3.8/dist-packages/keras/optimizer_v2/optimizer_v2.py in __getattribute__(self, name)
843 if name in self._hyper:
844 return self._get_hyper(name)
--> 845 raise e
846
847 def __dir__(self):
/usr/local/lib/python3.8/dist-packages/keras/optimizer_v2/optimizer_v2.py in __getattribute__(self, name)
833 """Overridden to support hyperparameter access."""
834 try:
--> 835 return super(OptimizerV2, self).__getattribute__(name)
836 except AttributeError as e:
837 # Needed to avoid infinite recursion with __setattr__.
It seems that the program is trying to create a deep copy of a Keras model with 'copy.deepcopy' but the model doesn't have the 'deepcopy' attribute and this is the reason of the error. But I cannot understand what I'm missing beacuse it worked until today...
change your tensorflow version to 2.11.0
i did and that was okay
This appears to be an issue with how Keras is imported.
First, make sure you've got Tensorflow version 2.11.0, and that you're importing Keras from there.
>>> !pip install tensorflow==2.11.0
>>> import tensorflow as tf
Then pass the Adam optimizer from tf.keras as the optimizer argument to the KerasClassifier class
keras_clf = KerasClassifier(model = model, optimizer=tf.keras.optimizers.Adam(), epochs=100, verbose=0)
ETA: This is an answer to a similar question, and includes a solution that works with Tensorflow 2.9

GridSearchCV - TypeError: an integer is required

I am trying to find the best hyperparameters for my SVM using Grid Search. When doing it the following way:
from sklearn.model_selection import GridSearchCV
param_grid = {'coef0': [10, 5, 0.5, 0.001], 'C': [100, 50, 1, 0.001]}
poly_svm_search = SVC(kernel="poly", degree="2")
grid_search = GridSearchCV(poly_svm_search, param_grid, cv=5, scoring='f1')
grid_search.fit(train_data, train_labels)
I get this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-72-dadf5782618c> in <module>
8
----> 9 grid_search.fit(train_data, train_labels)
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1189 def _run_search(self, evaluate_candidates):
1190 """Search all candidates in param_grid"""
-> 1191 evaluate_candidates(ParameterGrid(self.param_grid))
1192
1193
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
981 # remaining jobs.
982 self._iterating = False
--> 983 if self.dispatch_one_batch(iterator):
984 self._iterating = self._original_iterator is not None
985
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
823 return False
824 else:
--> 825 self._dispatch(tasks)
826 return True
827
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
780 with self._lock:
781 job_idx = len(self._jobs)
--> 782 job = self._backend.apply_async(batch, callback=cb)
783 # A job can complete so quickly than its callback is
784 # called before we get here, causing self._jobs to
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
543 # Don't delay the application, to avoid keeping the input
544 # arguments in memory
--> 545 self.results = batch()
546
547 def get(self):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
259 with parallel_backend(self._backend):
260 return [func(*args, **kwargs)
--> 261 for func, args, kwargs in self.items]
262
263 def __len__(self):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
259 with parallel_backend(self._backend):
260 return [func(*args, **kwargs)
--> 261 for func, args, kwargs in self.items]
262
263 def __len__(self):
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
526 estimator.fit(X_train, **fit_params)
527 else:
--> 528 estimator.fit(X_train, y_train, **fit_params)
529
530 except Exception as e:
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in fit(self, X, y, sample_weight)
210
211 seed = rnd.randint(np.iinfo('i').max)
--> 212 fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
213 # see comment on the other call to np.iinfo in this file
214
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed)
291 sample_weight, self.nu, self.cache_size, self.epsilon,
292 int(self.shrinking), int(self.probability), self.max_iter,
--> 293 random_seed)
294
295 self._warn_from_fit_status()
sklearn/svm/libsvm_sparse.pyx in sklearn.svm.libsvm_sparse.libsvm_sparse_train()
TypeError: an integer is required
My train_labels variable contains a list of booleans, so I have a binary classification problem. train_data is a <class'scipy.sparse.csr.csr_matrix'>, basically containing all scaled and One-Hot encoded features.
What did I do wrong? It's hard for me to track down what the issue is here. I thank you for any help in advance ;).
When you initialize the SVC using this line:
poly_svm_search = SVC(kernel="poly", degree="2")
You are supplying degree param with a string, due to inverted commas around it. But according to the documentation, degree takes an integer as value.
degree : int, optional (default=3) Degree of the polynomial kernel
function (‘poly’). Ignored by all other kernels.
So you need to do this:
poly_svm_search = SVC(kernel="poly", degree=2)
Notice how I did not use inverted commas here.

TypeError: __deepcopy__() takes 1 positional argument but 2 were given error?

I'm doing a deepcopy for a list of objects, but I keep getting following error:
deepcopy __deepcopy__() takes 1 positional argument but 2 were given
and following traceback:
TypeError Traceback (most recent call last)
<ipython-input-4-66b9ee5521c7> in <module>()
2
3 import copy
----> 4 regions_copy = copy.deepcopy(regions)
5 regions[0].A = 15
6 print(regions[0].A)
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in _deepcopy_list(x, memo)
216 memo[id(x)] = y
217 for a in x:
--> 218 y.append(deepcopy(a, memo))
219 return y
220 d[list] = _deepcopy_list
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
180 raise Error(
181 "un(deep)copyable object of type %s" % cls)
--> 182 y = _reconstruct(x, rv, 1, memo)
183
184 # If is its own copy, don't memoize.
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in _reconstruct(x, info, deep, memo)
295 if state is not None:
296 if deep:
--> 297 state = deepcopy(state, memo)
298 if hasattr(y, '__setstate__'):
299 y.__setstate__(state)
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in _deepcopy_dict(x, memo)
241 memo[id(x)] = y
242 for key, value in x.items():
--> 243 y[deepcopy(key, memo)] = deepcopy(value, memo)
244 return y
245 d[dict] = _deepcopy_dict
/home/michal/Bin/anaconda/envs/tensorflow/lib/python3.5/copy.py in deepcopy(x, memo, _nil)
164 copier = getattr(x, "__deepcopy__", None)
165 if copier:
--> 166 y = copier(memo)
167 else:
168 reductor = dispatch_table.get(cls)
TypeError: __deepcopy__() takes 1 positional argument but 2 were given
The problem seems to be also when I copy a single object. Any idea what could be the cause?
I suppose it might be in my class implementation, because deepcopying a list like [object(), object(), object()] is fine. Although that would be very strange...
I found the problem was in fact in the definition of the variable regions. It is a list of classes AreaRegion, which contained assignment into class __dict__:
from matplotlib.path import Path
...
class AreaRegion:
def __init__(self):
...
self.path = Path(verts, codes, closed=True)
...
...
Apparently it didn't like this, so I've moved Path into a getter instead.

GridSearchCV TypeError

Well im new in python, im trying to tokenize and stem tweets to create a model, then use gridsearch to find the optimal hyperparameters, I'm open for any kind of feedback
this is my code:
import nltk
nltk.download("stopwords")
from nltk.corpus import stopwords
spanish_stopwords = stopwords.words('spanish')
from string import punctuation
non_words = list(punctuation)
#we add spanish punctuation
non_words.extend(['¿', '¡'])
non_words.extend(map(str,range(10)))
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize
stemmer = SnowballStemmer('spanish')
def stem_tokens(tokens, stemmer):
stemmed = []
for item in tokens:
stemmed.append(stemmer.stem(item))
return stemmed
def tokenize(text):
# remove non letters
text = ''.join([c for c in text if c not in non_words])
# tokenize
tokens = word_tokenize(text)
# stem
try:
stems = stem_tokens(tokens, stemmer)
except Exception as e:
print(e)
print(text)
stems = ['']
return stems
from sklearn.cross_validation import cross_val_score
from sklearn.svm import LinearSVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
tweets_corpus = tweets_corpus[tweets_corpus.polarity != 'NEU']
tweets_corpus['polarity_bin'] = 0
tweets_corpus.polarity_bin[tweets_corpus.polarity.isin(['P', 'P+'])] = 1
print(tweets_corpus.polarity_bin.value_counts(normalize=True))
if __name__ == '__main__':
import tokenize
vectorizer = CountVectorizer(
analyzer = 'word',
tokenizer = tokenize,
lowercase = True,
stop_words = spanish_stopwords)
pipeline = Pipeline([
('vect', vectorizer),
('cls', LinearSVC()),
])
parameters = {
'vect__max_df': (0.5, 1.9),
'vect__min_df': (10, 20,50),
'vect__max_features': (500, 1000),
'vect__ngram_range': ((1, 1), (1, 2)), # unigrams or bigrams
'cls__C': (0.2, 0.5, 0.7),
'cls__loss': ('hinge', 'squared_hinge'),
'cls__max_iter': (500, 1000)
}
from time import time
grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1,scoring='roc_auc')
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
grid_search.fit(tweets_corpus.content, tweets_corpus.polarity_bin)
print(grid_search.best_params_)
t0 = time()
print("done in %0.3fs" % (time() - t0))
This is a sample of the data im trying to upgrade
Name: polarity_bin, dtype: float64
agreement \
270 NaN
208 NaN
902 NaN
31056 NaN
1158 NaN
content \
270 #revolucion2017 #Pablo_Iglesias_ Cultura es reflexionar sobre algo q ha dicho alguien y si te gusta hacerlo tuyo.pq no?
208 #_UnaOpinionMas_ #PPopular En eso estoi de acuerdo por lo menos al PP se le ve que hace cosas y contara d nuevo cn mi voto como siempre.
902 "Grande Casillas : ""Esta victoria no solo es nuestra sino también de Jesé ."""
31056 ¿Querían que Contador analizara cualquier cosa que fuera a tomar o que la vomitara meses después para mandarla al puto laboratorio?
1158 Eliminados de champion , van terceros en la Liga y pierden la final copa del Rey , PURO REAL MADRID
polarity polarity_bin
270 P 1
208 P 1
902 P 1
31056 N 0
1158 N 0
And this is the error:
TypeError Traceback (most recent call last)
<ipython-input-9-7c9b6a1bac93> in <module>()
201 print("Performing grid search...")
202 print("pipeline:", [name for name, _ in pipeline.steps])
--> 203 grid_search.fit(tweets_corpus.content, tweets_corpus.polarity_bin)
204 print(grid_search.best_params_)
205 t0 = time()
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\grid_search.py in fit(self, X, y)
802
803 """
--> 804 return self._fit(X, y, ParameterGrid(self.param_grid))
805
806
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\grid_search.py in _fit(self, X, y, parameter_iterable)
539 n_candidates * len(cv)))
540
--> 541 base_estimator = clone(self.estimator)
542
543 pre_dispatch = self.pre_dispatch
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
49 new_object_params = estimator.get_params(deep=False)
50 for name, param in six.iteritems(new_object_params):
---> 51 new_object_params[name] = clone(param, safe=False)
52 new_object = klass(**new_object_params)
53 params_set = new_object.get_params(deep=False)
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in <listcomp>(.0)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in <listcomp>(.0)
37 # XXX: not handling dictionaries
38 if estimator_type in (list, tuple, set, frozenset):
---> 39 return estimator_type([clone(e, safe=safe) for e in estimator])
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
49 new_object_params = estimator.get_params(deep=False)
50 for name, param in six.iteritems(new_object_params):
---> 51 new_object_params[name] = clone(param, safe=False)
52 new_object = klass(**new_object_params)
53 params_set = new_object.get_params(deep=False)
C:\Users\Miguel\Anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
40 elif not hasattr(estimator, 'get_params'):
41 if not safe:
---> 42 return copy.deepcopy(estimator)
43 else:
44 raise TypeError("Cannot clone object '%s' (type %s): "
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
180 raise Error(
181 "un(deep)copyable object of type %s" % cls)
--> 182 y = _reconstruct(x, rv, 1, memo)
183
184 # If is its own copy, don't memoize.
C:\Users\Miguel\Anaconda3\lib\copy.py in _reconstruct(x, info, deep, memo)
296 if state:
297 if deep:
--> 298 state = deepcopy(state, memo)
299 if hasattr(y, '__setstate__'):
300 y.__setstate__(state)
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
C:\Users\Miguel\Anaconda3\lib\copy.py in _deepcopy_dict(x, memo)
242 memo[id(x)] = y
243 for key, value in x.items():
--> 244 y[deepcopy(key, memo)] = deepcopy(value, memo)
245 return y
246 d[dict] = _deepcopy_dict
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
180 raise Error(
181 "un(deep)copyable object of type %s" % cls)
--> 182 y = _reconstruct(x, rv, 1, memo)
183
184 # If is its own copy, don't memoize.
C:\Users\Miguel\Anaconda3\lib\copy.py in _reconstruct(x, info, deep, memo)
296 if state:
297 if deep:
--> 298 state = deepcopy(state, memo)
299 if hasattr(y, '__setstate__'):
300 y.__setstate__(state)
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
153 copier = _deepcopy_dispatch.get(cls)
154 if copier:
--> 155 y = copier(x, memo)
156 else:
157 try:
C:\Users\Miguel\Anaconda3\lib\copy.py in _deepcopy_dict(x, memo)
242 memo[id(x)] = y
243 for key, value in x.items():
--> 244 y[deepcopy(key, memo)] = deepcopy(value, memo)
245 return y
246 d[dict] = _deepcopy_dict
C:\Users\Miguel\Anaconda3\lib\copy.py in deepcopy(x, memo, _nil)
172 reductor = getattr(x, "__reduce_ex__", None)
173 if reductor:
--> 174 rv = reductor(4)
175 else:
176 reductor = getattr(x, "__reduce__", None)
TypeError: cannot serialize '_io.TextIOWrapper' object
Thanks for your time
BTW Im working in Windows 10 and got all the tools updated

Categories

Resources