Error in NLTK file - python

I have installed Anaconda3-4.2.0 for Windows (64 bit) and nltk-3.2.1. While i am running the following code in Jupyter Notebook
`para = "Hello World. It's good to see you. Thanks for buying this book."
import nltk.data tokenizer = nltk.data.load('tokenizers/punkt/PY3/english.pickle') tokenizer.tokenize(para)'
I am getting the following error:
'OSError Traceback (most recent call last)
<ipython-input-1-a87e01558cc4> in <module>()
1 para = "Hello World. It's good to see you. Thanks for buying this book."
2 import nltk.data
----> 3 tokenizer = nltk.data.load('tokenizers/punkt/PY3/english.pickle')
4 tokenizer.tokenize(para)
C:\Anaconda3\lib\site-packages\nltk\data.py in load(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)
799
800 # Load the resource.
--> 801 opened_resource = _open(resource_url)
802
803 if format == 'raw':
C:\Anaconda3\lib\site-packages\nltk\data.py in _open(resource_url)
917
918 if protocol is None or protocol.lower() == 'nltk':
--> 919 return find(path_, path + ['']).open()
920 elif protocol.lower() == 'file':
921 # urllib might not use mode='rb', so handle this one ourselves:
C:\Anaconda3\lib\site-packages\nltk\data.py in find(resource_name, paths)
607 return GzipFileSystemPathPointer(p)
608 else:
--> 609 return FileSystemPathPointer(p)
610 else:
611 p = os.path.join(path_, url2pathname(zipfile))
C:\Anaconda3\lib\site-packages\nltk\compat.py in _decorator(*args, **kwargs)
559 def _decorator(*args, **kwargs):
560 args = (args[0], add_py3_data(args[1])) + args[2:]
--> 561 return init_func(*args, **kwargs)
562 return wraps(init_func)(_decorator)
563
C:\Anaconda3\lib\site-packages\nltk\data.py in __init__(self, _path)
298 _path = os.path.abspath(_path)
299 if not os.path.exists(_path):
--> 300 raise IOError('No such file or directory: %r' % _path)
301 self._path = _path
302
OSError: No such file or directory: 'C:\\nltk_data\\tokenizers\\punkt\\PY3\\PY3\\english.pickle'`
I have downloaded punktword tokenizer in nltk. Why I am seeing this error?Please give me an answer.

It seems tokenizers/punkt/PY3/english.pickle file not exists. You need check it.
NLTK can download pickle file use download function:
import nltk
nltk.download()

Related

OSError: dlopen(/opt/anaconda3/lib/python3.8/site-packages/wntr/epanet/Darwin/libepanet22_win32.dylib, 6): image not found

I updated billiard,celery,kombu,amqp : nothing worked, Please help me resolve this. I am trying to use https://wntr.readthedocs.io/
OSError Traceback (most recent call last)
<ipython-input-9-6ccee6a8a438> in <module>
1 # Simulate hydraulics
2 sim = wntr.sim.EpanetSimulator(wn)
----> 3 results = sim.run_sim()
/opt/anaconda3/lib/python3.8/site-packages/wntr/sim/epanet.py in run_sim(self, file_prefix, save_hyd, use_hyd, hydfile, version)
94 inpfile = file_prefix + '.inp'
95 self._wn.write_inpfile(inpfile, units=self._wn.options.hydraulic.inpfile_units, version=version)
---> 96 enData = wntr.epanet.toolkit.ENepanet(version=version)
97 rptfile = file_prefix + '.rpt'
98 outfile = file_prefix + '.bin'
/opt/anaconda3/lib/python3.8/site-packages/wntr/epanet/toolkit.py in __init__(self, inpfile, rptfile, binfile, version)
155 except Exception as E1:
156 if lib == libnames[-1]:
--> 157 raise E1
158 pass
159 return
/opt/anaconda3/lib/python3.8/site-packages/wntr/epanet/toolkit.py in __init__(self, inpfile, rptfile, binfile, version)
148 elif sys.platform in ['darwin']:
149 libepanet = resource_filename(epanet_toolkit,'Darwin/lib%s.dylib' % lib)
--> 150 self.ENlib = ctypes.cdll.LoadLibrary(libepanet)
151 else:
152 libepanet = resource_filename(epanet_toolkit,'Linux/lib%s.so' % lib)
/opt/anaconda3/lib/python3.8/ctypes/__init__.py in LoadLibrary(self, name)
457
458 def LoadLibrary(self, name):
--> 459 return self._dlltype(name)
460
461 cdll = LibraryLoader(CDLL)
/opt/anaconda3/lib/python3.8/ctypes/__init__.py in __init__(self, name, mode, handle, use_errno, use_last_error, winmode)
379
380 if handle is None:
--> 381 self._handle = _dlopen(self._name, mode)
382 else:
383 self._handle = handle
OSError: dlopen(/opt/anaconda3/lib/python3.8/site-packages/wntr/epanet/Darwin/libepanet22_win32.dylib, 6): image not found
Everything worked earlier. I am using MacOS Sierra 10.13.6
I came across the same issue both with OWA-epanet and epanet-toolkit python libraries.
I solved this by putting the Epanet .dylib file in ~/lib/
This way MacOS is able to find it without messing with the standard dylibs path.
The other option is to place it within your virtual environment folder

Gensim -- [Errno 2] No such file or directory: 'model.wv'

I've got a question during following the simple gensim tutorial on gensim website,
>>> from gensim.test.utils import common_texts, get_tmpfile
>>> from gensim.models import Word2Vec
>>>
>>> path = get_tmpfile("word2vec.model")
>>>
>>> model = Word2Vec(common_texts, size=100, window=5, min_count=1, workers=4)
>>> model.save("word2vec.model")
>>> model = Word2Vec.load("word2vec.model")
>>> model.train([["hello", "world"]], total_examples=1, epochs=1)
>>> from gensim.models import KeyedVectors
>>>
>>> path = get_tmpfile("wordvectors.kv")
>>>
And when I tried below,
>>> model.wv.save(path)
>>> wv = KeyedVectors.load("model.wv", mmap='r')
I've got a following error :
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-81-eee6865b677b> in <module>
1 path = get_tmpfile('wordvectors.kv')
2 model.wv.save(path)
----> 3 KeyedVectors.load("model.wv",mmap='r')
/anaconda3/lib/python3.7/site-packages/gensim/models/keyedvectors.py in load(cls, fname_or_handle, **kwargs)
210 #classmethod
211 def load(cls, fname_or_handle, **kwargs):
--> 212 return super(BaseKeyedVectors, cls).load(fname_or_handle, **kwargs)
213
214 def similarity(self, entity1, entity2):
/anaconda3/lib/python3.7/site-packages/gensim/utils.py in load(cls, fname, mmap)
420 compress, subname = SaveLoad._adapt_by_suffix(fname)
421
--> 422 obj = unpickle(fname)
423 obj._load_specials(fname, mmap, compress, subname)
424 logger.info("loaded %s", fname)
/anaconda3/lib/python3.7/site-packages/gensim/utils.py in unpickle(fname)
1356
1357 """
-> 1358 with smart_open(fname, 'rb') as f:
1359 # Because of loading from S3 load can't be used (missing readline in smart_open)
1360 if sys.version_info > (3, 0):
/anaconda3/lib/python3.7/site-packages/smart_open/smart_open_lib.py in smart_open(uri, mode, **kw)
179 raise TypeError('mode should be a string')
180
--> 181 fobj = _shortcut_open(uri, mode, **kw)
182 if fobj is not None:
183 return fobj
/anaconda3/lib/python3.7/site-packages/smart_open/smart_open_lib.py in _shortcut_open(uri, mode, **kw)
299 #
300 if six.PY3:
--> 301 return open(parsed_uri.uri_path, mode, buffering=buffering, **open_kwargs)
302 elif not open_kwargs:
303 return open(parsed_uri.uri_path, mode, buffering=buffering)
FileNotFoundError: [Errno 2] No such file or directory: 'model.wv'
Does anyone know the reason for this message? How can I know that I do have 'model.wv' file?
Thank you in advance!
Change it from: wv = KeyedVectors.load("model.wv", mmap='r')
to: wv = KeyedVectors.load(path, mmap='r')
You should be loading the file 'wordvectors.kv'

Error with asammdf: "error: unpack requires a buffer of 56 bytes"

I am using the asammdf package to load MDF file. The code below works for only some of my MDF files. Many files give me the error below. I am able to open the file in CANape with no issues.
Load files in a desktop app (this works)
from asammdf import MDF
mdf = MDF(r'\\Stnafddco123.us123.corpintra.net\veh_test\Vehicles_Data_and_Truck_Info\APTIV_logger_data\1FUJHLDR8KLXXXXXX_Trip-Detail_2019-07-16 15-18-25.mf4')
mdf
error Traceback (most recent call last)
<ipython-input-16-c9001bcab2a9> in <module>()
1 from asammdf import MDF
----> 2 mdf = MDF(r'\\Stnafddco123.us123.corpintra.net\veh_test\Vehicles_Data_and_Truck_Info\APTIV_logger_data\1FUJHLDR8KLXXXXXX_Trip-Detail_2019-07-16 15-18-25.mf4')
3 mdf
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\mdf.py in __init__(self, name, version, **kwargs)
124 self._mdf = MDF3(name, **kwargs)
125 elif version in MDF4_VERSIONS:
--> 126 self._mdf = MDF4(name, **kwargs)
127 elif version in MDF2_VERSIONS:
128 self._mdf = MDF2(name, **kwargs)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\mdf_v4.py in __init__(self, name, version, **kwargs)
247 self._file = mmap.mmap(x.fileno(), 0, access=mmap.ACCESS_READ)
248 self._from_filelike = False
--> 249 self._read(mapped=True)
250
251 self._file.close()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\mdf_v4.py in _read(self, mapped)
415 # Read channels by walking recursively in the channel group
416 # starting from the first channel
--> 417 self._read_channels(ch_addr, grp, stream, dg_cntr, ch_cntr, mapped=mapped)
418
419 cg_addr = channel_group.next_cg_addr
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\mdf_v4.py in _read_channels(self, ch_addr, grp, stream, dg_cntr, ch_cntr, channel_composition, mapped)
760 at_map=self._attachments_map,
761 use_display_names=self._use_display_names,
--> 762 mapped=mapped,
763 )
764
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\v4_blocks.py in __init__(self, **kwargs)
606 conv = ChannelConversion(
607 raw_bytes=raw_bytes, stream=stream, address=address,
--> 608 mapped=mapped,
609 )
610 cc_map[raw_bytes] = conv
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\v4_blocks.py in __init__(self, **kwargs)
2010 self.min_phy_value,
2011 self.max_phy_value,
-> 2012 ) = v4c.CONVERSION_NONE_INIT_u(block)
2013
2014 elif conv == v4c.CONVERSION_TYPE_LIN:
error: unpack requires a buffer of 56 bytes
Do you see any errors when you open the file with MDFValidator?

Word2Vec error when loading in GoogleNews data

I am following a tutorial here: https://towardsdatascience.com/multi-class-text-classification-model-comparison-and-selection-5eb066197568
I am at the part "Word2vec and Logistic Regression". I have downloaded the "GoogleNews-vectors-negative300.bin.gz" file and I am tyring to apply it to my own text data. However when I get to the following code:
%%time
from gensim.models import Word2Vec
wv = gensim.models.KeyedVectors.load_word2vec_format("/data/users/USERS/File_path/classifier/GoogleNews_Embedding/GoogleNews-vectors-negative300.bin.gz", binary=True)
wv.init_sims(replace=True)
I run into the following error:
/data/users/msmith/env/lib64/python3.6/site-packages/smart_open/smart_open_lib.py:398: UserWarning: This function is deprecated, use smart_open.open instead. See the migration notes for details: https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst#migrating-to-the-new-open-function
'See the migration notes for details: %s' % _MIGRATION_NOTES_URL
---------------------------------------------------------------------------
EOFError Traceback (most recent call last)
<timed exec> in <module>
~/env/lib64/python3.6/site-packages/gensim/models/keyedvectors.py in load_word2vec_format(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)
1492 return _load_word2vec_format(
1493 cls, fname, fvocab=fvocab, binary=binary, encoding=encoding, unicode_errors=unicode_errors,
-> 1494 limit=limit, datatype=datatype)
1495
1496 def get_keras_embedding(self, train_embeddings=False):
~/env/lib64/python3.6/site-packages/gensim/models/utils_any2vec.py in _load_word2vec_format(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)
383 with utils.ignore_deprecation_warning():
384 # TODO use frombuffer or something similar
--> 385 weights = fromstring(fin.read(binary_len), dtype=REAL).astype(datatype)
386 add_word(word, weights)
387 else:
/usr/lib64/python3.6/gzip.py in read(self, size)
274 import errno
275 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
--> 276 return self._buffer.read(size)
277
278 def read1(self, size=-1):
/usr/lib64/python3.6/_compression.py in readinto(self, b)
66 def readinto(self, b):
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
70 return len(data)
/usr/lib64/python3.6/gzip.py in read(self, size)
480 break
481 if buf == b"":
--> 482 raise EOFError("Compressed file ended before the "
483 "end-of-stream marker was reached")
484
EOFError: Compressed file ended before the end-of-stream marker was reached
Any idea whats gone wrong/ how to overcome this issue?
Thanks in advance!

Problems with PLY LEX and YACC

I am trying to run the first part of a simple example of the PLY but I encounter a strange error. When I run the following code, it gives me an error regarding lex.lex()
Anyone knows what the problem is?
import ply.lex as lex
tokens = [ 'NAME','NUMBER','PLUS','MINUS','TIMES', 'DIVIDE', 'EQUALS' ]
t_ignore = '\t'
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
lex.lex() # Build the lexer
This is the error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-e527bd224769> in <module>()
14 return t
15
---> 16 ply.lex.lex() # Build the lexer
c:\python27\lib\site-packages\ply\lex.pyc in lex(module, object, debug, optimize, lextab, reflags, nowarn, outputdir, debuglog, errorlog)
904 linfo.get_all()
905 if not optimize:
--> 906 if linfo.validate_all():
907 raise SyntaxError("Can't build lexer")
908
c:\python27\lib\site-packages\ply\lex.pyc in validate_all(self)
578 self.validate_tokens()
579 self.validate_literals()
--> 580 self.validate_rules()
581 return self.error
582
c:\python27\lib\site-packages\ply\lex.pyc in validate_rules(self)
820
821 for module in self.modules:
--> 822 self.validate_module(module)
823
824 # -----------------------------------------------------------------------------
c:\python27\lib\site-packages\ply\lex.pyc in validate_module(self, module)
831
832 def validate_module(self, module):
--> 833 lines, linen = inspect.getsourcelines(module)
834
835 fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
c:\python27\lib\inspect.pyc in getsourcelines(object)
688 original source file the first line of code was found. An IOError is
689 raised if the source code cannot be retrieved."""
--> 690 lines, lnum = findsource(object)
691
692 if ismodule(object): return lines, 0
c:\python27\lib\inspect.pyc in findsource(object)
524 is raised if the source code cannot be retrieved."""
525
--> 526 file = getfile(object)
527 sourcefile = getsourcefile(object)
528 if not sourcefile and file[:1] + file[-1:] != '<>':
c:\python27\lib\inspect.pyc in getfile(object)
401 if hasattr(object, '__file__'):
402 return object.__file__
--> 403 raise TypeError('{!r} is a built-in module'.format(object))
404 if isclass(object):
405 object = sys.modules.get(object.__module__)
TypeError: <module '__main__' (built-in)> is a built-in module
You are trying to run ply from some kind of REPL (ipython, at a guess).
For whatever reason, that won't work. Ply insists that the grammar be a module, which means it must be in a file. The error precisely indicates that there was no file associated with the grammar source.
It turned out that the issue is that I was running the code via iPython Notebook and it didn't like it for some reason. Saved the code as a regular .py file and ran it through a command prompt and no errors occurred!
P.S. I appreciate it if anyone can elaborate on why the code doesn't run in an iPython Notebook environment!

Categories

Resources