Unicode encoder error in LatentDirichletAllocation - python

When I tried to perform LatentDirichlet Allocation on an array,it kept showing
UnicodeEncodeError Traceback (most recent call last)
<timed exec> in <module>
D:\Anacondo\lib\site-packages\sklearn\decomposition\_lda.py in fit(self, X, y)
624 last_bound = None
625 n_jobs = effective_n_jobs(self.n_jobs)
--> 626 with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:
627 for i in range(max_iter):
628 if learning_method == "online":
D:\Anacondo\lib\site-packages\joblib\parallel.py in __enter__(self)
723 def __enter__(self):
724 self._managed_backend = True
--> 725 self._initialize_backend()
726 return self
727
D:\Anacondo\lib\site-packages\joblib\parallel.py in _initialize_backend(self)
733 """Build a process or thread pool and return the number of workers"""
734 try:
--> 735 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
736 **self._backend_args)
737 if self.timeout is not None and not self._backend.supports_timeout:
D:\Anacondo\lib\site-packages\joblib\_parallel_backends.py in configure(self, n_jobs, parallel, prefer, require, idle_worker_timeout, **memmappingexecutor_args)
492 SequentialBackend(nesting_level=self.nesting_level))
493
--> 494 self._workers = get_memmapping_executor(
495 n_jobs, timeout=idle_worker_timeout,
496 env=self._prepare_worker_env(n_jobs=n_jobs),
D:\Anacondo\lib\site-packages\joblib\executor.py in get_memmapping_executor(n_jobs, **kwargs)
18
19 def get_memmapping_executor(n_jobs, **kwargs):
---> 20 return MemmappingExecutor.get_memmapping_executor(n_jobs, **kwargs)
21
22
D:\Anacondo\lib\site-packages\joblib\executor.py in get_memmapping_executor(cls, n_jobs, timeout, initializer, initargs, env, temp_folder, context_id, **backend_args)
40 _executor_args = executor_args
41
---> 42 manager = TemporaryResourcesManager(temp_folder)
43
44 # reducers access the temporary folder in which to store temporary
D:\Anacondo\lib\site-packages\joblib\_memmapping_reducer.py in __init__(self, temp_folder_root, context_id)
529 # exposes exposes too many low-level details.
530 context_id = uuid4().hex
--> 531 self.set_current_context(context_id)
532
533 def set_current_context(self, context_id):
D:\Anacondo\lib\site-packages\joblib\_memmapping_reducer.py in set_current_context(self, context_id)
533 def set_current_context(self, context_id):
534 self._current_context_id = context_id
--> 535 self.register_new_context(context_id)
536
537 def register_new_context(self, context_id):
D:\Anacondo\lib\site-packages\joblib\_memmapping_reducer.py in register_new_context(self, context_id)
558 new_folder_name, self._temp_folder_root
559 )
--> 560 self.register_folder_finalizer(new_folder_path, context_id)
561 self._cached_temp_folders[context_id] = new_folder_path
562
D:\Anacondo\lib\site-packages\joblib\_memmapping_reducer.py in register_folder_finalizer(self, pool_subfolder, context_id)
588 # semaphores and pipes
589 pool_module_name = whichmodule(delete_folder, 'delete_folder')
--> 590 resource_tracker.register(pool_subfolder, "folder")
591
592 def _cleanup():
D:\Anacondo\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in register(self, name, rtype)
189 '''Register a named resource, and increment its refcount.'''
190 self.ensure_running()
--> 191 self._send('REGISTER', name, rtype)
192
193 def unregister(self, name, rtype):
D:\Anacondo\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
202
203 def _send(self, cmd, name, rtype):
--> 204 msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii')
205 if len(name) > 512:
206 # posix guarantees that writes to a pipe of less than PIPE_BUF
UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-19: ordinal not in range(128)
The following code was provided by the instructor and yet no changes have been made.
import pandas as pd
df = pd.read_csv("android.csv", sep=",", thousands=",")
df["Number of ratings"] = df["Number of ratings"].astype(int) # fix data type
df = df.drop_duplicates(subset=["App"]).reset_index(drop=True)
df.head(n=3)
permission_columns = list(df.columns[10:])
app_names = list(df["App"])
app_ratings = np.array(df["Number of ratings"])
df_perms = df[permission_columns]
X = df_perms.values
Below is my code to use TF-IDF and LatentDirichletAllocation to fit the data.
from sklearn.feature_extraction.text import TfidfTransformer
transformer = TfidfTransformer()
X_tfidf = transformer.fit_transform(X)
# convert sparse matrix to numpy array
X_tfidf = X_tfidf.toarray()
%%time
from sklearn.decomposition import LatentDirichletAllocation
n_topics = 10
lda = LatentDirichletAllocation(n_components=n_topics, max_iter=10,
learning_method='online',
n_jobs=-1, random_state=3)
lda.fit(X_tfidf)
However,it kept showing UnicodeEncodingError. I tried to add
df = pd.read_csv("android.csv", sep=",", thousands=",",engine='python',encoding = 'utf-8-sig')
it doesn't work.I tried several different encoding methods,it doesn't work either.
Is there anyway I can solve the issue? I think there might be something wrong with the X_tfidf array,but I can't tell.
Any help will be appreciated!
I tried with another array which is not taken from this dataset.It still doesn't work and shows the same error & traceback.
Edit:this works fine with Google Colab.There might be something wrong with my Jupyter Notebook settings.

Related

LoweringError: Failed in nopython mode pipeline (step: native lowering)

The following is the code I try to run. It used to work but I made changes to some installations (dont remember what unfortunately - scipy or scikit? my kmeans function also stopped working)
from umap import UMAP
umap_2d_lv=UMAP(n_components=2,random_state=0).fit(lv_data,y=cluster_num)
proj_2d_lv=umap_2d_lv.embedding_
this is how I tried to fix the error, from suggestions online:
pip install umap-learn>=0.5.1 & pip install numba==0.53.0
also tried this:
pip install umap-learn
and then
import umap.umap_ as UMAP
this is the the error that comes out:
AttributeError Traceback (most recent call last)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\errors.py in new_error_context(fmt_, *args, **kwargs)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_block(self, block)
234 """
--> 235 Create CPython wrapper(s) around this function (or generator).
236 """
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_inst(self, inst)
379
--> 380 elif isinstance(inst, ir.SetItem):
381 signature = self.fndesc.calltypes[inst]
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_assign(self, ty, inst)
581
--> 582 def cast_result(res):
583 return self.context.cast(self.builder, res,
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in incref(self, typ, val)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\runtime\context.py in incref(self, builder, typ, value)
217 """
--> 218 self._call_incref_decref(builder, typ, value, "NRT_incref")
219
~\AppData\Roaming\Python\Python38\site-packages\numba\core\runtime\context.py in _call_incref_decref(self, builder, typ, value, funcname)
206 mod = builder.module
--> 207 fn = mod.get_or_insert_function(incref_decref_ty, name=funcname)
208 # XXX "nonnull" causes a crash in test_dyn_array: can this
AttributeError: 'Module' object has no attribute 'get_or_insert_function'
During handling of the above exception, another exception occurred:
LoweringError Traceback (most recent call last)
<timed exec> in <module>
~\anaconda3\lib\site-packages\umap\__init__.py in <module>
1 from warnings import warn, catch_warnings, simplefilter
----> 2 from .umap_ import UMAP
3
4 try:
5 with catch_warnings():
~\anaconda3\lib\site-packages\umap\umap_.py in <module>
30 import umap.distances as dist
31
---> 32 import umap.sparse as sparse
33
34 from umap.utils import (
~\anaconda3\lib\site-packages\umap\sparse.py in <module>
10 import numpy as np
11
---> 12 from umap.utils import norm
13
14 locale.setlocale(locale.LC_NUMERIC, "C")
~\anaconda3\lib\site-packages\umap\utils.py in <module>
39
40 #numba.njit("i4(i8[:])")
---> 41 def tau_rand_int(state):
42 """A fast (pseudo)-random number generator.
43
~\AppData\Roaming\Python\Python38\site-packages\numba\core\decorators.py in wrapper(func)
224
225 return wrapper
--> 226
227
228 def generated_jit(function=None, target='cpu', cache=False,
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in compile(self, sig)
977 else:
978 return dict((sig, self.overloads[sig].metadata) for sig in self.signatures)
--> 979
980 def get_function_type(self):
981 """Return unique function type of dispatcher when possible, otherwise
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in compile(self, args, return_type)
139
140 def _get_implementation(self, args, kws):
--> 141 impl = self.py_func(*args, **kws)
142 # Check the generating function and implementation signatures are
143 # compatible, otherwise compiling would fail later.
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in _compile_cached(self, args, return_type)
153 pyparam.kind != implparam.kind or
154 (implparam.default is not implparam.empty and
--> 155 implparam.default != pyparam.default)):
156 ok = False
157 if not ok:
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in _compile_core(self, args, return_type)
166 '_CompileStats', ('cache_path', 'cache_hits', 'cache_misses'))
167
--> 168
169 class _CompilingCounter(object):
170 """
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in compile_extra(self, func)
426 """The default compiler
427 """
--> 428
429 def define_pipelines(self):
430 # this maintains the objmode fallback behaviour
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in _compile_bytecode(self)
490 pm.add_pass(AnnotateTypes, "annotate types")
491
--> 492 # strip phis
493 pm.add_pass(PreLowerStripPhis, "remove phis nodes")
494
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in _compile_core(self)
469 return pm
470
--> 471 #staticmethod
472 def define_nopython_lowering_pipeline(state, name='nopython_lowering'):
473 pm = PassManager(name)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in _compile_core(self)
460 pm.passes.extend(untyped_passes.passes)
461
--> 462 typed_passes = dpb.define_typed_pipeline(state)
463 pm.passes.extend(typed_passes.passes)
464
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in run(self, state)
341 def dependency_analysis(self):
342 """
--> 343 Computes dependency analysis
344 """
345 deps = dict()
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in run(self, state)
332 raise BaseException("Legacy pass in use")
333 except _EarlyPipelineCompletion as e:
--> 334 raise e
335 except Exception as e:
336 msg = "Failed in %s mode pipeline (step: %s)" % \
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_lock.py in _acquire_compile_lock(*args, **kwargs)
33 def _acquire_compile_lock(*args, **kwargs):
34 with self:
---> 35 return func(*args, **kwargs)
36 return _acquire_compile_lock
37
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in _runPass(self, index, pss, internal_state)
287 mutated |= check(pss.run_initialization, internal_state)
288 with SimpleTimer() as pass_time:
--> 289 mutated |= check(pss.run_pass, internal_state)
290 with SimpleTimer() as finalize_time:
291 mutated |= check(pss.run_finalizer, internal_state)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in check(func, compiler_state)
260
261 def check(func, compiler_state):
--> 262 mangled = func(compiler_state)
263 if mangled not in (True, False):
264 msg = ("CompilerPass implementations should return True/False. "
~\AppData\Roaming\Python\Python38\site-packages\numba\core\typed_passes.py in run_pass(self, state)
394 else:
395 if isinstance(restype,
--> 396 (types.Optional, types.Generator)):
397 pass
398 else:
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower(self)
136 self.lower_normal_function(self.fndesc)
137 else:
--> 138 self.genlower = self.GeneratorLower(self)
139 self.gentype = self.genlower.gentype
140
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_normal_function(self, fndesc)
190 entry_block_tail = self.lower_function_body()
191
--> 192 # Close tail of entry block
193 self.builder.position_at_end(entry_block_tail)
194 self.builder.branch(self.blkmap[self.firstblk])
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_function_body(self)
219
220 def lower_block(self, block):
--> 221 """
222 Lower the given block.
223 """
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_block(self, block)
233 def create_cpython_wrapper(self, release_gil=False):
234 """
--> 235 Create CPython wrapper(s) around this function (or generator).
236 """
237 if self.genlower:
~\anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
129 value = type()
130 try:
--> 131 self.gen.throw(type, value, traceback)
132 except StopIteration as exc:
133 # Suppress StopIteration *unless* it's the same exception that
~\AppData\Roaming\Python\Python38\site-packages\numba\core\errors.py in new_error_context(fmt_, *args, **kwargs)
LoweringError: Failed in nopython mode pipeline (step: native lowering)
'Module' object has no attribute 'get_or_insert_function'
File "..\..\..\anaconda3\lib\site-packages\umap\utils.py", line 53:
def tau_rand_int(state):
<source elided>
"""
state[0] = (((state[0] & 4294967294) << 12) & 0xFFFFFFFF) ^ (
^
During: lowering "state = arg(0, name=state)" at C:\Users\User\anaconda3\lib\site-packages\umap\utils.py (53)

Attribute Error with: "import umap.umap_ as UMAP"

Using jupyter lab with numba = 0.55.1 and umap learn = 0.5.2. Does it matter that umap has "pypi" as channel and numba doesn't? both in anaconda3. I've already tried several solutions shown here.
So, with the following code:
import umap.umap_ as UMAP
I get the following errors:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\errors.py in new_error_context(fmt_, *args, **kwargs)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_block(self, block)
234 """
--> 235 Create CPython wrapper(s) around this function (or generator).
236 """
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_inst(self, inst)
379
--> 380 elif isinstance(inst, ir.SetItem):
381 signature = self.fndesc.calltypes[inst]
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_assign(self, ty, inst)
581
--> 582 def cast_result(res):
583 return self.context.cast(self.builder, res,
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in incref(self, typ, val)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\runtime\context.py in incref(self, builder, typ, value)
217 """
--> 218 self._call_incref_decref(builder, typ, value, "NRT_incref")
219
~\AppData\Roaming\Python\Python38\site-packages\numba\core\runtime\context.py in _call_incref_decref(self, builder, typ, value, funcname)
206 mod = builder.module
--> 207 fn = mod.get_or_insert_function(incref_decref_ty, name=funcname)
208 # XXX "nonnull" causes a crash in test_dyn_array: can this
AttributeError: 'Module' object has no attribute 'get_or_insert_function'
During handling of the above exception, another exception occurred:
LoweringError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_12580/4057111716.py in <module>
----> 1 import umap.umap_ as UMAP
~\anaconda3\lib\site-packages\umap\umap_.py in <module>
30 import umap.distances as dist
31
---> 32 import umap.sparse as sparse
33
34 from umap.utils import (
~\anaconda3\lib\site-packages\umap\sparse.py in <module>
10 import numpy as np
11
---> 12 from umap.utils import norm
13
14 locale.setlocale(locale.LC_NUMERIC, "C")
~\anaconda3\lib\site-packages\umap\utils.py in <module>
39
40 #numba.njit("i4(i8[:])")
---> 41 def tau_rand_int(state):
42 """A fast (pseudo)-random number generator.
43
~\AppData\Roaming\Python\Python38\site-packages\numba\core\decorators.py in wrapper(func)
224
225 return wrapper
--> 226
227
228 def generated_jit(function=None, target='cpu', cache=False,
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in compile(self, sig)
977 else:
978 return dict((sig, self.overloads[sig].metadata) for sig in self.signatures)
--> 979
980 def get_function_type(self):
981 """Return unique function type of dispatcher when possible, otherwise
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in compile(self, args, return_type)
139
140 def _get_implementation(self, args, kws):
--> 141 impl = self.py_func(*args, **kws)
142 # Check the generating function and implementation signatures are
143 # compatible, otherwise compiling would fail later.
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in _compile_cached(self, args, return_type)
153 pyparam.kind != implparam.kind or
154 (implparam.default is not implparam.empty and
--> 155 implparam.default != pyparam.default)):
156 ok = False
157 if not ok:
~\AppData\Roaming\Python\Python38\site-packages\numba\core\dispatcher.py in _compile_core(self, args, return_type)
166 '_CompileStats', ('cache_path', 'cache_hits', 'cache_misses'))
167
--> 168
169 class _CompilingCounter(object):
170 """
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in compile_extra(self, func)
426 """The default compiler
427 """
--> 428
429 def define_pipelines(self):
430 # this maintains the objmode fallback behaviour
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in _compile_bytecode(self)
490 pm.add_pass(AnnotateTypes, "annotate types")
491
--> 492 # strip phis
493 pm.add_pass(PreLowerStripPhis, "remove phis nodes")
494
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in _compile_core(self)
469 return pm
470
--> 471 #staticmethod
472 def define_nopython_lowering_pipeline(state, name='nopython_lowering'):
473 pm = PassManager(name)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler.py in _compile_core(self)
460 pm.passes.extend(untyped_passes.passes)
461
--> 462 typed_passes = dpb.define_typed_pipeline(state)
463 pm.passes.extend(typed_passes.passes)
464
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in run(self, state)
341 def dependency_analysis(self):
342 """
--> 343 Computes dependency analysis
344 """
345 deps = dict()
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in run(self, state)
332 raise BaseException("Legacy pass in use")
333 except _EarlyPipelineCompletion as e:
--> 334 raise e
335 except Exception as e:
336 msg = "Failed in %s mode pipeline (step: %s)" % \
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_lock.py in _acquire_compile_lock(*args, **kwargs)
33 def _acquire_compile_lock(*args, **kwargs):
34 with self:
---> 35 return func(*args, **kwargs)
36 return _acquire_compile_lock
37
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in _runPass(self, index, pss, internal_state)
287 mutated |= check(pss.run_initialization, internal_state)
288 with SimpleTimer() as pass_time:
--> 289 mutated |= check(pss.run_pass, internal_state)
290 with SimpleTimer() as finalize_time:
291 mutated |= check(pss.run_finalizer, internal_state)
~\AppData\Roaming\Python\Python38\site-packages\numba\core\compiler_machinery.py in check(func, compiler_state)
260
261 def check(func, compiler_state):
--> 262 mangled = func(compiler_state)
263 if mangled not in (True, False):
264 msg = ("CompilerPass implementations should return True/False. "
~\AppData\Roaming\Python\Python38\site-packages\numba\core\typed_passes.py in run_pass(self, state)
394 else:
395 if isinstance(restype,
--> 396 (types.Optional, types.Generator)):
397 pass
398 else:
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower(self)
136 self.lower_normal_function(self.fndesc)
137 else:
--> 138 self.genlower = self.GeneratorLower(self)
139 self.gentype = self.genlower.gentype
140
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_normal_function(self, fndesc)
190 entry_block_tail = self.lower_function_body()
191
--> 192 # Close tail of entry block
193 self.builder.position_at_end(entry_block_tail)
194 self.builder.branch(self.blkmap[self.firstblk])
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_function_body(self)
219
220 def lower_block(self, block):
--> 221 """
222 Lower the given block.
223 """
~\AppData\Roaming\Python\Python38\site-packages\numba\core\lowering.py in lower_block(self, block)
233 def create_cpython_wrapper(self, release_gil=False):
234 """
--> 235 Create CPython wrapper(s) around this function (or generator).
236 """
237 if self.genlower:
~\anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
129 value = type()
130 try:
--> 131 self.gen.throw(type, value, traceback)
132 except StopIteration as exc:
133 # Suppress StopIteration *unless* it's the same exception that
~\AppData\Roaming\Python\Python38\site-packages\numba\core\errors.py in new_error_context(fmt_, *args, **kwargs)
LoweringError: Failed in nopython mode pipeline (step: native lowering)
'Module' object has no attribute 'get_or_insert_function'
File "..\..\..\anaconda3\lib\site-packages\umap\utils.py", line 53:
def tau_rand_int(state):
<source elided>
"""
state[0] = (((state[0] & 4294967294) << 12) & 0xFFFFFFFF) ^ (
^
During: lowering "state = arg(0, name=state)" at C:\Users\User\anaconda3\lib\site-packages\umap\utils.py (53)

Is it possible to use both MLPRegressor() and make_pipeline() within GridSearchCV()?

I'm trying to utilize make_pipeline() from scikit-learn along with GridSearchCV(). The Pipeline is simple and only includes two steps, a StandardScaler() and an MLPRegressor(). The GridSearchCV()is also pretty simple with the slight wrinkle that I'm using TimeSeriesSplit() for cross-validation.
The error I'm getting is as follows:
ValueError: Invalid parameter MLPRegressor for estimator Pipeline(steps=[('standardscaler', StandardScaler()),('mlpregressor', MLPRegressor())]). Check the list of available parameters with estimator.get_params().keys().
Can someone help me understand how I can rectify this problem so I can use the make_pipeline() framework with both GridSearchCV() and MLPRegressor() .
from sklearn.neural_network import MLPRegressor
...: from sklearn.preprocessing import StandardScaler
...: from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
...: from sklearn.pipeline import make_pipeline
...: import numpy as np
In [2]: tscv = TimeSeriesSplit(n_splits = 5)
In [3]: pipe = make_pipeline(StandardScaler(), MLPRegressor())
In [4]: param_grid = {'MLPRegressor__hidden_layer_sizes': [(16,16,), (64,64,), (
...: 128,128,)], 'MLPRegressor__activation': ['identity', 'logistic', 'tanh',
...: 'relu'],'MLPRegressor__solver': ['adam','sgd']}
In [5]: grid = GridSearchCV(pipe, param_grid = param_grid, cv = tscv)
In [6]: features = np.random.random([1000,10])
In [7]: target = np.random.normal(0,10,1000)
In [8]: grid.fit(features, target)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-7233f9f2005e> in <module>
----> 1 grid.fit(features, target)
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
839 return results
840
--> 841 self._run_search(evaluate_candidates)
842
843 # multimetric is determined here because in the case of a callable
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1294 def _run_search(self, evaluate_candidates):
1295 """Search all candidates in param_grid"""
-> 1296 evaluate_candidates(ParameterGrid(self.param_grid))
1297
1298
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params, cv, more_results)
793 n_splits, n_candidates, n_candidates * n_splits))
794
--> 795 out = parallel(delayed(_fit_and_score)(clone(base_estimator),
796 X, y,
797 train=train, test=test,
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/joblib/parallel.py in __call__(self, iterable)
1039 # remaining jobs.
1040 self._iterating = False
-> 1041 if self.dispatch_one_batch(iterator):
1042 self._iterating = self._original_iterator is not None
1043
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
857 return False
858 else:
--> 859 self._dispatch(tasks)
860 return True
861
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/joblib/parallel.py in _dispatch(self, batch)
775 with self._lock:
776 job_idx = len(self._jobs)
--> 777 job = self._backend.apply_async(batch, callback=cb)
778 # A job can complete so quickly than its callback is
779 # called before we get here, causing self._jobs to
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/joblib/parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/joblib/parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/utils/fixes.py in __call__(self, *args, **kwargs)
220 def __call__(self, *args, **kwargs):
221 with config_context(**self.config):
--> 222 return self.function(*args, **kwargs)
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
584 cloned_parameters[k] = clone(v, safe=False)
585
--> 586 estimator = estimator.set_params(**cloned_parameters)
587
588 start_time = time.time()
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/pipeline.py in set_params(self, **kwargs)
148 self
149 """
--> 150 self._set_params('steps', **kwargs)
151 return self
152
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/utils/metaestimators.py in _set_params(self, attr, **params)
52 self._replace_estimator(attr, name, params.pop(name))
53 # 3. Step parameters and other initialisation arguments
---> 54 super().set_params(**params)
55 return self
56
~/opt/miniconda3/envs/practice/lib/python3.9/site-packages/sklearn/base.py in set_params(self, **params)
228 key, delim, sub_key = key.partition('__')
229 if key not in valid_params:
--> 230 raise ValueError('Invalid parameter %s for estimator %s. '
231 'Check the list of available parameters '
232 'with `estimator.get_params().keys()`.' %
ValueError: Invalid parameter MLPRegressor for estimator Pipeline(steps=[('standardscaler', StandardScaler()),
('mlpregressor', MLPRegressor())]). Check the list of available parameters with `estimator.get_params().keys()`.
Solution
Yes. Make the pipeline first. Then treat the pipeline as your model and pass it to GridSearchCV.
Your problem is in the following line (you had it mislabeled):
Replace MLPRegressor__ with mlpregressor__.
The Fix:
The pipeline named_step for MLPRegressor estimator was mislabeled as MLPRegressor__ in the param_grid.
Changing it to mlpregressor__ fixed the problem.
You may run and check it in this colab notebook.
# INCORRECT
param_grid = {
'MLPRegressor__hidden_layer_sizes': [(16, 16,), (64, 64,), (128, 128,)],
'MLPRegressor__activation': ['identity', 'logistic', 'tanh', 'relu'],
'MLPRegressor__solver': ['adam', 'sgd'],
}
# CORRECTED
param_grid = {
'mlpregressor__hidden_layer_sizes': [(16, 16,), (64, 64,), (128, 128,)],
'mlpregressor__activation': ['identity', 'logistic', 'tanh', 'relu'],
'mlpregressor__solver': ['adam', 'sgd'],
}
Note
The key to understand what was wrong here, was to observe the last two lines of the error stack.
ValueError: Invalid parameter MLPRegressor for estimator Pipeline(steps=[('standardscaler', StandardScaler()),
('mlpregressor', MLPRegressor())]). Check the list of available parameters with `estimator.get_params().keys()`.

UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-23: ordinal not in range(128)

I tried to fit the model but got one weird error
So, I have Win10(64), Python 3.7
This is my code:
clf = LogisticRegression(solver='saga')
param_grid = {
'C': np.arange(1, 5),
'penalty': ['l1', 'l2'],
}
search = GridSearchCV(clf, param_grid, n_jobs=-1, cv=5, refit=True, scoring='accuracy')
search.fit(feature_matrix, labels)
And this is traceback:
---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-13-93a1aa3c1ec2> in <module>
12
13
---> 14 search.fit(feature_matrix, labels)
15
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
71 FutureWarning)
72 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 73 return f(**kwargs)
74 return inner_f
75
C:\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
693 verbose=self.verbose)
694 results = {}
--> 695 with parallel:
696 all_candidate_params = []
697 all_out = []
C:\Anaconda3\lib\site-packages\joblib\parallel.py in __enter__(self)
709 def __enter__(self):
710 self._managed_backend = True
--> 711 self._initialize_backend()
712 return self
713
C:\Anaconda3\lib\site-packages\joblib\parallel.py in _initialize_backend(self)
720 try:
721 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
--> 722 **self._backend_args)
723 if self.timeout is not None and not self._backend.supports_timeout:
724 warnings.warn(
C:\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in configure(self, n_jobs, parallel, prefer, require, idle_worker_timeout, **memmappingexecutor_args)
495 n_jobs, timeout=idle_worker_timeout,
496 env=self._prepare_worker_env(n_jobs=n_jobs),
--> 497 context_id=parallel._id, **memmappingexecutor_args)
498 self.parallel = parallel
499 return n_jobs
C:\Anaconda3\lib\site-packages\joblib\executor.py in get_memmapping_executor(n_jobs, **kwargs)
18
19 def get_memmapping_executor(n_jobs, **kwargs):
---> 20 return MemmappingExecutor.get_memmapping_executor(n_jobs, **kwargs)
21
22
C:\Anaconda3\lib\site-packages\joblib\executor.py in get_memmapping_executor(cls, n_jobs, timeout, initializer, initargs, env, temp_folder, context_id, **backend_args)
40 _executor_args = executor_args
41
---> 42 manager = TemporaryResourcesManager(temp_folder)
43
44 # reducers access the temporary folder in which to store temporary
C:\Anaconda3\lib\site-packages\joblib\_memmapping_reducer.py in __init__(self, temp_folder_root, context_id)
529 # exposes exposes too many low-level details.
530 context_id = uuid4().hex
--> 531 self.set_current_context(context_id)
532
533 def set_current_context(self, context_id):
C:\Anaconda3\lib\site-packages\joblib\_memmapping_reducer.py in set_current_context(self, context_id)
533 def set_current_context(self, context_id):
534 self._current_context_id = context_id
--> 535 self.register_new_context(context_id)
536
537 def register_new_context(self, context_id):
C:\Anaconda3\lib\site-packages\joblib\_memmapping_reducer.py in register_new_context(self, context_id)
558 new_folder_name, self._temp_folder_root
559 )
--> 560 self.register_folder_finalizer(new_folder_path, context_id)
561 self._cached_temp_folders[context_id] = new_folder_path
562
C:\Anaconda3\lib\site-packages\joblib\_memmapping_reducer.py in register_folder_finalizer(self, pool_subfolder, context_id)
588 # semaphores and pipes
589 pool_module_name = whichmodule(delete_folder, 'delete_folder')
--> 590 resource_tracker.register(pool_subfolder, "folder")
591
592 def _cleanup():
C:\Anaconda3\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in register(self, name, rtype)
189 '''Register a named resource, and increment its refcount.'''
190 self.ensure_running()
--> 191 self._send('REGISTER', name, rtype)
192
193 def unregister(self, name, rtype):
C:\Anaconda3\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
202
203 def _send(self, cmd, name, rtype):
--> 204 msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii')
205 if len(name) > 512:
206 # posix guarantees that writes to a pipe of less than PIPE_BUF
UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-23: ordinal not in range(128)
I tried to fix the _send by cutting .encode('ascii'), but it didn't help me. It generates another error (a bytes-like object is required, not 'str') with variable msg from the traceback above
I'm looking for advice about this. Thanks a lot.
Try encoding using utf-8.
msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('utf-8')
These messages usually means that you’re trying to either mix Unicode strings with 8-bit strings, or is trying to write Unicode strings to an output file or device that only handles ASCII.
When you do this, Python will usually assume that the 8-bit string contains ASCII data only, and will raise an error if this is not the case.
The best way to avoid this on input is to convert all incoming strings to Unicode, do the processing in Unicode, and then convert back to encoded byte strings on the way out.

sklearn error while n_jobs is not 1 using VScode

I'm trying to do a support vector machine with GridSearchCV using VScode, code as below:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
parameters = {'C':(1,10,), 'kernel': ("rbf",),'gamma':(1,10,100,1000)}
svc = SVC(probability = True)
svc_cv = GridSearchCV(svc, param_grid = parameters, refit = True, n_jobs= -1)
svc_cv.fit(x_train, y_train)
The problem is that when I pass the n_jobs that is not 1 (either n_jobs = -1 or 2 or anyting), the error occur as :
---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
in
----> 1 svc_cv.fit(x_train, y_train)
C:\Python38\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
71 FutureWarning)
72 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 73 return f(**kwargs)
74 return inner_f
75
C:\Python38\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
693 verbose=self.verbose)
694 results = {}
--> 695 with parallel:
696 all_candidate_params = []
697 all_out = []
C:\Python38\lib\site-packages\joblib\parallel.py in __enter__(self)
709 def __enter__(self):
710 self._managed_backend = True
--> 711 self._initialize_backend()
712 return self
713
C:\Python38\lib\site-packages\joblib\parallel.py in _initialize_backend(self)
719 """Build a process or thread pool and return the number of workers"""
720 try:
--> 721 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
722 **self._backend_args)
723 if self.timeout is not None and not self._backend.supports_timeout:
C:\Python38\lib\site-packages\joblib\_parallel_backends.py in configure(self, n_jobs, parallel, prefer, require, idle_worker_timeout, **memmappingexecutor_args)
490 SequentialBackend(nesting_level=self.nesting_level))
491
--> 492 self._workers = get_memmapping_executor(
493 n_jobs, timeout=idle_worker_timeout,
494 env=self._prepare_worker_env(n_jobs=n_jobs),
C:\Python38\lib\site-packages\joblib\executor.py in get_memmapping_executor(n_jobs, **kwargs)
18
19 def get_memmapping_executor(n_jobs, **kwargs):
---> 20 return MemmappingExecutor.get_memmapping_executor(n_jobs, **kwargs)
21
22
C:\Python38\lib\site-packages\joblib\executor.py in get_memmapping_executor(cls, n_jobs, timeout, initializer, initargs, env, temp_folder, context_id, **backend_args)
40 _executor_args = executor_args
41
---> 42 manager = TemporaryResourcesManager(temp_folder)
43
44 # reducers access the temporary folder in which to store temporary
C:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in __init__(self, temp_folder_root, context_id)
529 # exposes exposes too many low-level details.
530 context_id = uuid4().hex
--> 531 self.set_current_context(context_id)
532
533 def set_current_context(self, context_id):
C:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in set_current_context(self, context_id)
533 def set_current_context(self, context_id):
534 self._current_context_id = context_id
--> 535 self.register_new_context(context_id)
536
537 def register_new_context(self, context_id):
C:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in register_new_context(self, context_id)
558 new_folder_name, self._temp_folder_root
559 )
--> 560 self.register_folder_finalizer(new_folder_path, context_id)
561 self._cached_temp_folders[context_id] = new_folder_path
562
C:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in register_folder_finalizer(self, pool_subfolder, context_id)
588 # semaphores and pipes
589 pool_module_name = whichmodule(delete_folder, 'delete_folder')
--> 590 resource_tracker.register(pool_subfolder, "folder")
591
592 def _cleanup():
C:\Python38\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in register(self, name, rtype)
189 '''Register a named resource, and increment its refcount.'''
190 self.ensure_running()
--> 191 self._send('REGISTER', name, rtype)
192
193 def unregister(self, name, rtype):
C:\Python38\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
202
203 def _send(self, cmd, name, rtype):
--> 204 msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii')
205 if len(name) > 512:
206 # posix guarantees that writes to a pipe of less than PIPE_BUF
UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-19: ordinal not in range(128)
However, I can run it perfectly normal with n_jobs = -1 using browser-based jupyter notebook, wondering what's wrong.
It looks like if you specify running with more than one job, sklearn will try to spawn more processes. When it does this it triggers an error with what it's sending over the pipe to the process because it isn't ASCII. I would make sure that whatever sklearn might need to be sending is pure ASCII (whatever is being used to make msg).

Categories

Resources