I would like to define my own namespace "http://example.org/" in rdflib, but apparently that can't be done. Can't figure out what is the proper way to do it...
In [1]: import rdflib
INFO:rdflib:RDFLib Version: 4.2.2
In [2]: g = rdflib.Graph()
In [3]: from rdflib import Namespace
In [4]: n1 = Namespace("http://example.org/")
In [5]: u1 = n1['1']
In [6]: u1
Out[6]: rdflib.term.URIRef(u'http://example.org/1')
In [7]: g.bind('ex', n1)
In [8]: g.add((u1, u1, u1)
...: )
In [9]: g.serialize()
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-9-25a09aa9a7b5> in <module>()
----> 1 g.serialize()
/usr/local/lib/python2.7/site-packages/rdflib/graph.pyc in serialize(self, destination, format, base, encoding, **args)
937 if destination is None:
938 stream = BytesIO()
--> 939 serializer.serialize(stream, base=base, encoding=encoding, **args)
940 return stream.getvalue()
941 if hasattr(destination, "write"):
/usr/local/lib/python2.7/site-packages/rdflib/plugins/serializers/rdfxml.pyc in serialize(self, stream, base, encoding, **args)
64 # assert(
65 # namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
---> 66 bindings = list(self.__bindings())
67 bindings.sort()
68
/usr/local/lib/python2.7/site-packages/rdflib/plugins/serializers/rdfxml.pyc in __bindings(self)
31
32 for predicate in set(store.predicates()):
---> 33 prefix, namespace, name = nm.compute_qname(predicate)
34 bindings[prefix] = URIRef(namespace)
35
/usr/local/lib/python2.7/site-packages/rdflib/namespace.pyc in compute_qname(self, uri, generate)
328
329 if not uri in self.__cache:
--> 330 namespace, name = split_uri(uri)
331 namespace = URIRef(namespace)
332 prefix = self.store.prefix(namespace)
/usr/local/lib/python2.7/site-packages/rdflib/namespace.pyc in split_uri(uri)
500 return (ns, ln)
501 break
--> 502 raise Exception("Can't split '%s'" % uri)
Exception: Can't split 'http://example.org/1'
Related
I have been using the ic-brown for my synset similarity, however I have training data now and wish to find similarity measures of words using my own corpus.
I have followed the code from : Create Information content corpora to be used by webnet from a custom dump
and implemented like this :
reader_bnc = nltk.corpus.reader.BNCCorpusReader(root='', fileids ="all_text_train.txt")
bnc_ic = wn.ic(reader_bnc, False, 0.0)
def is_root(synset_x):
if synset_x.root_hypernyms()[0] == synset_x:
return True
return False
def generate_ic_file(IC, output_filename):
"""Dump in output_filename the IC counts.
The expected format of IC is a dict
{'v':defaultdict, 'n':defaultdict, 'a':defaultdict, 'r':defaultdict}"""
with codecs.open(output_filename, 'w', encoding='utf-8') as fid:
# Hash code of WordNet 3.0
fid.write("wnver::eOS9lXC6GvMWznF1wkZofDdtbBU"+"\n")
# We only stored nouns and verbs because those are the only POS tags
# supported by wordnet.ic() function
for tag_type in ['v', 'n']:#IC:
for key, value in IC[tag_type].items():
if key != 0:
synset_x = wn.of2ss(of="{:08d}".format(key)+tag_type)
if is_root(synset_x):
fid.write(str(key)+tag_type+" "+str(value)+" ROOT\n")
else:
fid.write(str(key)+tag_type+" "+str(value)+"\n")
print("Done")
generate_ic_file(bnc_ic, "holmes.dat")
custom_ic = wordnet_ic.ic('holmes.dat')
I am getting this error
OSError Traceback (most recent call last)
<ipython-input-39-488657fa3717> in <module>
----> 1 custom_ic = wordnet_ic.ic('holmes.dat')
~\anaconda3\lib\site-packages\nltk\corpus\reader\wordnet.py in ic(self, icfile)
2068 ic[NOUN] = defaultdict(float)
2069 ic[VERB] = defaultdict(float)
-> 2070 for num, line in enumerate(self.open(icfile)):
2071 if num == 0: # skip the header
2072 continue
~\anaconda3\lib\site-packages\nltk\corpus\reader\api.py in open(self, file)
206 """
207 encoding = self.encoding(file)
--> 208 stream = self._root.join(file).open(encoding)
209 return stream
210
~\anaconda3\lib\site-packages\nltk\data.py in join(self, fileid)
333 def join(self, fileid):
334 _path = os.path.join(self._path, fileid)
--> 335 return FileSystemPathPointer(_path)
336
337 def __repr__(self):
~\anaconda3\lib\site-packages\nltk\compat.py in _decorator(*args, **kwargs)
39 def _decorator(*args, **kwargs):
40 args = (args[0], add_py3_data(args[1])) + args[2:]
---> 41 return init_func(*args, **kwargs)
42
43 return wraps(init_func)(_decorator)
~\anaconda3\lib\site-packages\nltk\data.py in __init__(self, _path)
311 _path = os.path.abspath(_path)
312 if not os.path.exists(_path):
--> 313 raise IOError("No such file or directory: %r" % _path)
314 self._path = _path
315
OSError: No such file or directory: 'C:\\Users\\the-e\\AppData\\Roaming\\nltk_data\\corpora\\wordnet_ic\\holmes.dat'
and I'm not sure where it's going wrong. Any help would be highly appreciated.
Thank you
Trying to Access a Shared Folder using the following code :
credentials = Credentials(username = user_name, password = "secret")
config = Configuration(server ='outlook.office365.com', credentials = credentials, auth_type=NTLM)
account = Account(primary_smtp_address = 'shared_mail#domain.com', credentials = credentials, autodiscover = False, config = config, access_type = DELEGATE,)
The above three lines of Code work perfectly but we are unable to get the root,
the following code : account.root.tree() or account.root throws the following error:
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\cached_property.py in __get__(self, obj, cls)
68 # check if the value was computed before the lock was acquired
---> 69 return obj_dict[name]
70
KeyError: 'root'
During handling of the above exception, another exception occurred:
ErrorNonExistentMailbox Traceback (most recent call last)
<ipython-input-46-a90a4f76ca21> in <module>
2 logging.basicConfig(level=logging.DEBUG)
3
----> 4 account.root.tree()
~\anaconda3\lib\site-packages\cached_property.py in __get__(self, obj, cls)
71 except KeyError:
72 # if not, do the calculation and release the lock
---> 73 return obj_dict.setdefault(name, self.func(obj))
74
75
~\anaconda3\lib\site-packages\exchangelib\account.py in root(self)
268 #threaded_cached_property
269 def root(self):
--> 270 return Root.get_distinguished(account=self)
271
272 #threaded_cached_property
~\anaconda3\lib\site-packages\exchangelib\folders\roots.py in get_distinguished(cls, account)
107 return cls.resolve(
108 account=account,
--> 109 folder=cls(account=account, name=cls.DISTINGUISHED_FOLDER_ID, is_distinguished=True)
110 )
111 except ErrorFolderNotFound:
~\anaconda3\lib\site-packages\exchangelib\folders\base.py in resolve(cls, account, folder)
485 def resolve(cls, account, folder):
486 # Resolve a single folder
--> 487 folders = list(FolderCollection(account=account, folders=[folder]).resolve())
488 if not folders:
489 raise ErrorFolderNotFound('Could not find folder %r' % folder)
~\anaconda3\lib\site-packages\exchangelib\folders\collections.py in resolve(self)
254 additional_fields = self.get_folder_fields(target_cls=self._get_target_cls(), is_complex=None)
255 for f in self.__class__(account=self.account, folders=resolveable_folders).get_folders(
--> 256 additional_fields=additional_fields
257 ):
258 yield f
~\anaconda3\lib\site-packages\exchangelib\folders\collections.py in get_folders(self, additional_fields)
317 folders=self.folders,
318 additional_fields=additional_fields,
--> 319 shape=ID_ONLY,
320 ):
321 yield f
~\anaconda3\lib\site-packages\exchangelib\services\get_folder.py in call(self, folders, additional_fields, shape)
32 **dict(
33 additional_fields=additional_fields,
---> 34 shape=shape,
35 )
36 )):
~\anaconda3\lib\site-packages\exchangelib\services\common.py in _pool_requests(self, payload_func, items, **kwargs)
538 for i, chunk in enumerate(chunkify(items, self.chunk_size), start=1):
539 log.debug('Processing %s chunk %s containing %s items', self.__class__.__name__, i, len(chunk))
--> 540 for elem in self._get_elements(payload=payload_func(chunk, **kwargs)):
541 yield elem
542
~\anaconda3\lib\site-packages\exchangelib\services\common.py in _get_elements_in_response(self, response)
401 def _get_elements_in_response(self, response):
402 for msg in response:
--> 403 container_or_exc = self._get_element_container(message=msg, name=self.element_container_name)
404 if isinstance(container_or_exc, (bool, Exception)):
405 yield container_or_exc
~\anaconda3\lib\site-packages\exchangelib\services\common.py in _get_element_container(self, message, response_message, name)
360 # rspclass == 'Error', or 'Success' and not 'NoError'
361 try:
--> 362 raise self._get_exception(code=response_code, text=msg_text, msg_xml=msg_xml)
363 except self.ERRORS_TO_CATCH_IN_RESPONSE as e:
364 return e
ErrorNonExistentMailbox: Mailbox does not exist.
The same code seems to be working here : https://medium.com/#theamazingexposure/accessing-shared-mailbox-using-exchangelib-python-f020e71a96ab
Also checked this thread https://github.com/ecederstrand/exchangelib/issues/391 and tried almost all the solutions but facing the same error.
I want to use ggplot2 within Jupyter Notebook. However, when I try to make an R magic cell and introduce a variable, I get an error.
Here is the code (one paragraph indicates one cell):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rpy2
%matplotlib inline
from rpy2.robjects import pandas2ri
pandas2ri.activate()
%load_ext rpy2.ipython
%%R
library(ggplot2)
data = pd.read_csv('train_titanic.csv')
%%R -i data -w 900 -h 480 -u px
With this last cell, I get the following error (incl traceback):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
54 try:
---> 55 od[name] = conversion.py2rpy(values)
56 except Exception as e:
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasseries(obj)
125 if type(x) is not homogeneous_type:
--> 126 raise ValueError('Series can only be of one type, or None.')
127 # TODO: Could this be merged with obj.type.name == 'O' case above ?
ValueError: Series can only be of one type, or None.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
367 try:
--> 368 mv = memoryview(obj)
369 res = cls.from_memoryview(mv)
TypeError: memoryview: a bytes-like object is required, not 'Series'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-14-75e210679e4a> in <module>
----> 1 get_ipython().run_cell_magic('R', '-i data -w 900 -h 480 -u px', '\n\n')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2360 with self.builtin_trap:
2361 args = (magic_arg_s, cell)
-> 2362 result = fn(*args, **kwargs)
2363 return result
2364
</home/morgan/anaconda3/envs/catenv/lib/python3.7/site-packages/decorator.py:decorator-gen-130> in R(self, line, cell, local_ns)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
721 raise NameError("name '%s' is not defined" % input)
722 with localconverter(converter) as cv:
--> 723 ro.r.assign(input, val)
724
725 tmpd = self.setup_graphics(args)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
190 kwargs[r_k] = v
191 return (super(SignatureTranslatedFunction, self)
--> 192 .__call__(*args, **kwargs))
193
194
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in <listcomp>(.0)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
838 '1 positional argument')
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
842 funcname = getattr(func, '__name__', 'singledispatch function')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
59 'The error is: %s'
60 % (name, str(e)))
---> 61 od[name] = StrVector(values)
62
63 return DataFrame(od)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/vectors.py in __init__(self, obj)
382
383 def __init__(self, obj):
--> 384 super().__init__(obj)
385 self._add_rops()
386
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in __init__(self, obj)
286 super().__init__(obj)
287 elif isinstance(obj, collections.abc.Sized):
--> 288 super().__init__(type(self).from_object(obj).__sexp__)
289 else:
290 raise TypeError('The constructor must be called '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
370 except (TypeError, ValueError):
371 try:
--> 372 res = cls.from_iterable(obj)
373 except ValueError:
374 msg = ('The class methods from_memoryview() and '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
26 def _cdata_res_to_rinterface(function):
27 def _(*args, **kwargs):
---> 28 cdata = function(*args, **kwargs)
29 # TODO: test cdata is of the expected CType
30 return _cdata_to_rinterface(cdata)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_iterable(cls, iterable, populate_func)
317 if populate_func is None:
318 cls._populate_r_vector(iterable,
--> 319 r_vector)
320 else:
321 populate_func(iterable, r_vector)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(cls, iterable, r_vector)
300 r_vector,
301 cls._R_SET_VECTOR_ELT,
--> 302 cls._CAST_IN)
303
304 #classmethod
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(iterable, r_vector, set_elt, cast_value)
237 def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
238 for i, v in enumerate(iterable):
--> 239 set_elt(r_vector, i, cast_value(v))
240
241
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _as_charsxp_cdata(x)
430 return x.__sexp__._cdata
431 else:
--> 432 return conversion._str_to_charsxp(x)
433
434
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_charsxp(val)
118 s = rlib.R_NaString
119 else:
--> 120 cchar = _str_to_cchar(val)
121 s = rlib.Rf_mkCharCE(cchar, _CE_UTF8)
122 return s
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_cchar(s, encoding)
97 def _str_to_cchar(s, encoding: str = 'utf-8'):
98 # TODO: use isStrinb and installTrChar
---> 99 b = s.encode(encoding)
100 return ffi.new('char[]', b)
101
AttributeError: 'float' object has no attribute 'encode'
So I find that it is not possible to even start an R magic cell while importing my pandas dataframe object. However, I have tried creating R vectors inside the cell, and find I can plot these using ggplot2 with no issues.
I am using Python 3.7.6, rpy2 3.1.0, jupyter-notebook 6.0.3and am using Ubuntu 18.04.2 LTS on Windows Subsystem for Linux.
The problem is most likely with one (or more) columns having more than one type - therefore it is impossible to transfer the data into an R vector (which can hold only one data type). The traceback may be overwhelming, but here is the relevant part:
ValueError: Series can only be of one type, or None.
Which column it is? Difficult to say without looking at the dataset that you load, but my general solution is to check the types in the columns:
types = data.applymap(type).apply(set)
types[types.apply(len) > 1]
Anything returned by the snippet above would be a candidate culprit. There are many different ways of dealing with the problem, depending on the exact nature of the data. Workarounds that I frequently use include:
calling data = data.infer_objects() - helps if the pandas did not catch up with a dtype change and still stores the data with (suboptimal) Python objects
filling NaN with an empty string or a string constant if you have missing values in a string column (e.g. str_columns = str_columns.fillna(''))
dates.apply(pd.to_datetime, axis=1) if you have datetime objects but the dtype is object
using df.applymap(lambda x: datetime.combine(x, datetime.min.time()) if not isinstance(x, datetime) else x) if you have a mixture of date and datetime objects
In some vary rare cases pandas stores the data differently than expected by rpy2 (following certain manipulations); then writing the dataframe down to a csv file and reading it from the disk again helps - but this is likely not what you are facing here, as you start from a newly read dataframe.
I just noticed there might be an even simpler reason for the problem. For some reason, pandas2ri requires you to call pandas2ri.activate()after importing it. This solved the problem for me.
I'm parsing a site and the following fails with an error (I'm not able to resolve with Google):
from lxml.html.soupparser import fromstring
# etree.LXML_VERSION = (4, 1, 1, 0)
# www.hbs-info.de /produkte /schweisselemente.html
fromstring(open(r"HBS Schweißelemente.htm").read())
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-caba4799682e> in <module>()
1 from lxml.html.soupparser import fromstring
----> 2 fromstring(open(r"HBS Schweißelemente.htm").read())
\lib\site-packages\lxml\html\soupparser.py in fromstring(data, beautifulsoup, makeelement, **bsargs)
31 used.
32 """
---> 33 return _parse(data, beautifulsoup, makeelement, **bsargs)
34
35
\lib\site-packages\lxml\html\soupparser.py in _parse(source, beautifulsoup, makeelement, **bsargs)
77 bsargs['features'] = 'html.parser' # use Python html parser
78 tree = beautifulsoup(source, **bsargs)
---> 79 root = _convert_tree(tree, makeelement)
80 # from ET: wrap the document in a html root element, if necessary
81 if len(root) == 1 and root[0].tag == "html":
\lib\site-packages\lxml\html\soupparser.py in _convert_tree(beautiful_soup_tree, makeelement)
153 prev = res_root
154 for e in reversed(pre_root):
--> 155 converted = convert_node(e)
156 if converted is not None:
157 prev.addprevious(converted)
\lib\site-packages\lxml\html\soupparser.py in convert_node(bs_node, parent)
214 if handler is None:
215 return None
--> 216 return handler(bs_node, parent)
217
218 def map_attrs(bs_attrs):
\lib\site-packages\lxml\html\soupparser.py in convert_pi(bs_node, parent)
271 # interpreted it as being SGML style (<?as df>). Fix.
272 bs_node = bs_node[:-1]
--> 273 res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
274 if parent is not None:
275 parent.append(res)
src/lxml/etree.pyx in lxml.etree.ProcessingInstruction (src\lxml\etree.c:79300)()
ValueError: Invalid PI name 'b'xml''
What could be the cause?
I am using inbuilt python template library..
from string import Template
feature_names = ['f1','f2']
load_identifiers = ["foo","bar"]
fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$1) ? null : BagToTuple($load_identifier.\$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
But I am getting the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-177-ddbfd32ef884> in <module>()
----> 1 fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$1) ? null : BagToTuple($load_identifier.\$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
<ipython-input-177-ddbfd32ef884> in <lambda>((load_identifier, feature_name))
----> 1 fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$1) ? null : BagToTuple($load_identifier.\$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
/anaconda/lib/python2.7/string.pyc in substitute(self, *args, **kws)
170 raise ValueError('Unrecognized named group in pattern',
171 self.pattern)
--> 172 return self.pattern.sub(convert, self.template)
173
174 def safe_substitute(self, *args, **kws):
/anaconda/lib/python2.7/string.pyc in convert(mo)
167 return self.delimiter
168 if mo.group('invalid') is not None:
--> 169 self._invalid(mo)
170 raise ValueError('Unrecognized named group in pattern',
171 self.pattern)
/anaconda/lib/python2.7/string.pyc in _invalid(self, mo)
144 lineno = len(lines)
145 raise ValueError('Invalid placeholder in string: line %d, col %d' %
--> 146 (lineno, colno))
147
148 def substitute(self, *args, **kws):
ValueError: Invalid placeholder in string: line 1, col 36
OP here.. Answering the question:
There is a '$' in the pattern.. which needs to be escaped as '$var' is used for variable substitution.
So,
fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$$1) ? null : BagToTuple($load_identifier.\$$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
works
No Template import needed.
feature_names = ['f1','f2']
load_identifiers = ["foo","bar"]
ts = "FLATTEN((IsEmpty(%(load_identifier)s.$1) ? null : BagToTuple(%(load_identifier)s.$1))) AS %(feature_name)s"
fleh = [ts % d for d in [{'feature_name': fn, 'load_identifier': li}
for fn, li in zip(feature_names, load_identifiers)]]