I am using inbuilt python template library..
from string import Template
feature_names = ['f1','f2']
load_identifiers = ["foo","bar"]
fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$1) ? null : BagToTuple($load_identifier.\$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
But I am getting the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-177-ddbfd32ef884> in <module>()
----> 1 fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$1) ? null : BagToTuple($load_identifier.\$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
<ipython-input-177-ddbfd32ef884> in <lambda>((load_identifier, feature_name))
----> 1 fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$1) ? null : BagToTuple($load_identifier.\$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
/anaconda/lib/python2.7/string.pyc in substitute(self, *args, **kws)
170 raise ValueError('Unrecognized named group in pattern',
171 self.pattern)
--> 172 return self.pattern.sub(convert, self.template)
173
174 def safe_substitute(self, *args, **kws):
/anaconda/lib/python2.7/string.pyc in convert(mo)
167 return self.delimiter
168 if mo.group('invalid') is not None:
--> 169 self._invalid(mo)
170 raise ValueError('Unrecognized named group in pattern',
171 self.pattern)
/anaconda/lib/python2.7/string.pyc in _invalid(self, mo)
144 lineno = len(lines)
145 raise ValueError('Invalid placeholder in string: line %d, col %d' %
--> 146 (lineno, colno))
147
148 def substitute(self, *args, **kws):
ValueError: Invalid placeholder in string: line 1, col 36
OP here.. Answering the question:
There is a '$' in the pattern.. which needs to be escaped as '$var' is used for variable substitution.
So,
fleh = map(lambda (load_identifier,feature_name):Template("FLATTEN((IsEmpty($load_identifier.\$$1) ? null : BagToTuple($load_identifier.\$$1))) AS $feature_name ").substitute(load_identifier=load_identifier, feature_name=feature_name), zip(load_identifiers, feature_names))
works
No Template import needed.
feature_names = ['f1','f2']
load_identifiers = ["foo","bar"]
ts = "FLATTEN((IsEmpty(%(load_identifier)s.$1) ? null : BagToTuple(%(load_identifier)s.$1))) AS %(feature_name)s"
fleh = [ts % d for d in [{'feature_name': fn, 'load_identifier': li}
for fn, li in zip(feature_names, load_identifiers)]]
Related
I have been using the ic-brown for my synset similarity, however I have training data now and wish to find similarity measures of words using my own corpus.
I have followed the code from : Create Information content corpora to be used by webnet from a custom dump
and implemented like this :
reader_bnc = nltk.corpus.reader.BNCCorpusReader(root='', fileids ="all_text_train.txt")
bnc_ic = wn.ic(reader_bnc, False, 0.0)
def is_root(synset_x):
if synset_x.root_hypernyms()[0] == synset_x:
return True
return False
def generate_ic_file(IC, output_filename):
"""Dump in output_filename the IC counts.
The expected format of IC is a dict
{'v':defaultdict, 'n':defaultdict, 'a':defaultdict, 'r':defaultdict}"""
with codecs.open(output_filename, 'w', encoding='utf-8') as fid:
# Hash code of WordNet 3.0
fid.write("wnver::eOS9lXC6GvMWznF1wkZofDdtbBU"+"\n")
# We only stored nouns and verbs because those are the only POS tags
# supported by wordnet.ic() function
for tag_type in ['v', 'n']:#IC:
for key, value in IC[tag_type].items():
if key != 0:
synset_x = wn.of2ss(of="{:08d}".format(key)+tag_type)
if is_root(synset_x):
fid.write(str(key)+tag_type+" "+str(value)+" ROOT\n")
else:
fid.write(str(key)+tag_type+" "+str(value)+"\n")
print("Done")
generate_ic_file(bnc_ic, "holmes.dat")
custom_ic = wordnet_ic.ic('holmes.dat')
I am getting this error
OSError Traceback (most recent call last)
<ipython-input-39-488657fa3717> in <module>
----> 1 custom_ic = wordnet_ic.ic('holmes.dat')
~\anaconda3\lib\site-packages\nltk\corpus\reader\wordnet.py in ic(self, icfile)
2068 ic[NOUN] = defaultdict(float)
2069 ic[VERB] = defaultdict(float)
-> 2070 for num, line in enumerate(self.open(icfile)):
2071 if num == 0: # skip the header
2072 continue
~\anaconda3\lib\site-packages\nltk\corpus\reader\api.py in open(self, file)
206 """
207 encoding = self.encoding(file)
--> 208 stream = self._root.join(file).open(encoding)
209 return stream
210
~\anaconda3\lib\site-packages\nltk\data.py in join(self, fileid)
333 def join(self, fileid):
334 _path = os.path.join(self._path, fileid)
--> 335 return FileSystemPathPointer(_path)
336
337 def __repr__(self):
~\anaconda3\lib\site-packages\nltk\compat.py in _decorator(*args, **kwargs)
39 def _decorator(*args, **kwargs):
40 args = (args[0], add_py3_data(args[1])) + args[2:]
---> 41 return init_func(*args, **kwargs)
42
43 return wraps(init_func)(_decorator)
~\anaconda3\lib\site-packages\nltk\data.py in __init__(self, _path)
311 _path = os.path.abspath(_path)
312 if not os.path.exists(_path):
--> 313 raise IOError("No such file or directory: %r" % _path)
314 self._path = _path
315
OSError: No such file or directory: 'C:\\Users\\the-e\\AppData\\Roaming\\nltk_data\\corpora\\wordnet_ic\\holmes.dat'
and I'm not sure where it's going wrong. Any help would be highly appreciated.
Thank you
I want to use ggplot2 within Jupyter Notebook. However, when I try to make an R magic cell and introduce a variable, I get an error.
Here is the code (one paragraph indicates one cell):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rpy2
%matplotlib inline
from rpy2.robjects import pandas2ri
pandas2ri.activate()
%load_ext rpy2.ipython
%%R
library(ggplot2)
data = pd.read_csv('train_titanic.csv')
%%R -i data -w 900 -h 480 -u px
With this last cell, I get the following error (incl traceback):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
54 try:
---> 55 od[name] = conversion.py2rpy(values)
56 except Exception as e:
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasseries(obj)
125 if type(x) is not homogeneous_type:
--> 126 raise ValueError('Series can only be of one type, or None.')
127 # TODO: Could this be merged with obj.type.name == 'O' case above ?
ValueError: Series can only be of one type, or None.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
367 try:
--> 368 mv = memoryview(obj)
369 res = cls.from_memoryview(mv)
TypeError: memoryview: a bytes-like object is required, not 'Series'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-14-75e210679e4a> in <module>
----> 1 get_ipython().run_cell_magic('R', '-i data -w 900 -h 480 -u px', '\n\n')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2360 with self.builtin_trap:
2361 args = (magic_arg_s, cell)
-> 2362 result = fn(*args, **kwargs)
2363 return result
2364
</home/morgan/anaconda3/envs/catenv/lib/python3.7/site-packages/decorator.py:decorator-gen-130> in R(self, line, cell, local_ns)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
721 raise NameError("name '%s' is not defined" % input)
722 with localconverter(converter) as cv:
--> 723 ro.r.assign(input, val)
724
725 tmpd = self.setup_graphics(args)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
190 kwargs[r_k] = v
191 return (super(SignatureTranslatedFunction, self)
--> 192 .__call__(*args, **kwargs))
193
194
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in <listcomp>(.0)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
838 '1 positional argument')
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
842 funcname = getattr(func, '__name__', 'singledispatch function')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
59 'The error is: %s'
60 % (name, str(e)))
---> 61 od[name] = StrVector(values)
62
63 return DataFrame(od)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/vectors.py in __init__(self, obj)
382
383 def __init__(self, obj):
--> 384 super().__init__(obj)
385 self._add_rops()
386
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in __init__(self, obj)
286 super().__init__(obj)
287 elif isinstance(obj, collections.abc.Sized):
--> 288 super().__init__(type(self).from_object(obj).__sexp__)
289 else:
290 raise TypeError('The constructor must be called '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
370 except (TypeError, ValueError):
371 try:
--> 372 res = cls.from_iterable(obj)
373 except ValueError:
374 msg = ('The class methods from_memoryview() and '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
26 def _cdata_res_to_rinterface(function):
27 def _(*args, **kwargs):
---> 28 cdata = function(*args, **kwargs)
29 # TODO: test cdata is of the expected CType
30 return _cdata_to_rinterface(cdata)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_iterable(cls, iterable, populate_func)
317 if populate_func is None:
318 cls._populate_r_vector(iterable,
--> 319 r_vector)
320 else:
321 populate_func(iterable, r_vector)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(cls, iterable, r_vector)
300 r_vector,
301 cls._R_SET_VECTOR_ELT,
--> 302 cls._CAST_IN)
303
304 #classmethod
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(iterable, r_vector, set_elt, cast_value)
237 def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
238 for i, v in enumerate(iterable):
--> 239 set_elt(r_vector, i, cast_value(v))
240
241
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _as_charsxp_cdata(x)
430 return x.__sexp__._cdata
431 else:
--> 432 return conversion._str_to_charsxp(x)
433
434
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_charsxp(val)
118 s = rlib.R_NaString
119 else:
--> 120 cchar = _str_to_cchar(val)
121 s = rlib.Rf_mkCharCE(cchar, _CE_UTF8)
122 return s
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_cchar(s, encoding)
97 def _str_to_cchar(s, encoding: str = 'utf-8'):
98 # TODO: use isStrinb and installTrChar
---> 99 b = s.encode(encoding)
100 return ffi.new('char[]', b)
101
AttributeError: 'float' object has no attribute 'encode'
So I find that it is not possible to even start an R magic cell while importing my pandas dataframe object. However, I have tried creating R vectors inside the cell, and find I can plot these using ggplot2 with no issues.
I am using Python 3.7.6, rpy2 3.1.0, jupyter-notebook 6.0.3and am using Ubuntu 18.04.2 LTS on Windows Subsystem for Linux.
The problem is most likely with one (or more) columns having more than one type - therefore it is impossible to transfer the data into an R vector (which can hold only one data type). The traceback may be overwhelming, but here is the relevant part:
ValueError: Series can only be of one type, or None.
Which column it is? Difficult to say without looking at the dataset that you load, but my general solution is to check the types in the columns:
types = data.applymap(type).apply(set)
types[types.apply(len) > 1]
Anything returned by the snippet above would be a candidate culprit. There are many different ways of dealing with the problem, depending on the exact nature of the data. Workarounds that I frequently use include:
calling data = data.infer_objects() - helps if the pandas did not catch up with a dtype change and still stores the data with (suboptimal) Python objects
filling NaN with an empty string or a string constant if you have missing values in a string column (e.g. str_columns = str_columns.fillna(''))
dates.apply(pd.to_datetime, axis=1) if you have datetime objects but the dtype is object
using df.applymap(lambda x: datetime.combine(x, datetime.min.time()) if not isinstance(x, datetime) else x) if you have a mixture of date and datetime objects
In some vary rare cases pandas stores the data differently than expected by rpy2 (following certain manipulations); then writing the dataframe down to a csv file and reading it from the disk again helps - but this is likely not what you are facing here, as you start from a newly read dataframe.
I just noticed there might be an even simpler reason for the problem. For some reason, pandas2ri requires you to call pandas2ri.activate()after importing it. This solved the problem for me.
I'm trying to follow the example on this notebook.
As suggested in this github thread:
I've upped the ulimit to 9999.
I've already converted the csv files to hdf5
My code fails when trying to open a single hdf5 file into a dataframe:
df = vaex.open('data/chat_history_00.hdf5')
Here's the rest of the code:
import re
import glob
import vaex
import numpy as np
def tryint(s):
try:
return int(s)
except:
return s
def alphanum_key(s):
""" Turn a string into a list of string and number chunks.
"z23a" -> ["z", 23, "a"]
"""
return [ tryint(c) for c in re.split('([0-9]+)', s) ]
hdf5_list = glob.glob('data/*.hdf5')
hdf5_list.sort(key=alphanum_key)
hdf5_list = np.array(hdf5_list)
assert len(hdf5_list) == 11, "Incorrect number of files"
# Check how the single file looks like:
df = vaex.open('data/chat_history_10.hdf5')
df
Error generated:
ERROR:MainThread:vaex:error opening 'data/chat_history_00.hdf5'
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
1 # Check how the single file looks like:
----> 2 df = vaex.open('data/chat_history_10.hdf5')
3 df
/usr/local/anaconda3/lib/python3.7/site-packages/vaex/init.py in
open(path, convert, shuffle, copy_index, *args, **kwargs)
207 ds = from_csv(path, copy_index=copy_index, **kwargs)
208 else:
--> 209 ds = vaex.file.open(path, *args, **kwargs)
210 if convert and ds:
211 ds.export_hdf5(filename_hdf5, shuffle=shuffle)
/usr/local/anaconda3/lib/python3.7/site-packages/vaex/file/init.py
in open(path, *args, **kwargs)
39 break
40 if dataset_class:
---> 41 dataset = dataset_class(path, *args, **kwargs)
42 return dataset
43
/usr/local/anaconda3/lib/python3.7/site-packages/vaex/hdf5/dataset.py
in init(self, filename, write)
84 self.h5table_root_name = None
85 self._version = 1
---> 86 self._load()
87
88 def write_meta(self):
/usr/local/anaconda3/lib/python3.7/site-packages/vaex/hdf5/dataset.py
in _load(self)
182 def _load(self):
183 if "data" in self.h5file:
--> 184 self._load_columns(self.h5file["/data"])
185 self.h5table_root_name = "/data"
186 if "table" in self.h5file:
/usr/local/anaconda3/lib/python3.7/site-packages/vaex/hdf5/dataset.py
in _load_columns(self, h5data, first)
348 self.add_column(column_name, self._map_hdf5_array(data, column['mask']))
349 else:
--> 350 self.add_column(column_name, self._map_hdf5_array(data))
351 else:
352 transposed = shape1 < shape[0]
/usr/local/anaconda3/lib/python3.7/site-packages/vaex/dataframe.py in
add_column(self, name, f_or_array, dtype) 2929
if len(self) == len(ar): 2930 raise
ValueError("Array is of length %s, while the length of the DataFrame
is %s due to the filtering, the (unfiltered) length is %s." %
(len(ar), len(self), self.length_unfiltered()))
-> 2931 raise ValueError("array is of length %s, while the length of the DataFrame is %s" % (len(ar),
self.length_original())) 2932 # assert
self.length_unfiltered() == len(data), "columns should be of equal
length, length should be %d, while it is %d" % (
self.length_unfiltered(), len(data)) 2933 valid_name =
vaex.utils.find_valid_name(name)
ValueError: array is of length 2578961, while the length of the
DataFrame is 6
What does this mean and how do I troubleshoot it? All the files has 6 columns.
EDIT:
Here's how I created the hdf5 file:
pd.read_csv(r'G:/path/to/file/data/chat_history-00.csv').to_hdf(r'data/chat_history_00.hdf5', key='data')
The question has been answered by Jovan of vaex on Github:
You should not use pandas .to_hdf if you want to read the data with
vaex in a memory-mapped way. Please see this link for more details.
I used this instead:
vdf = vaex.from_pandas(df, copy_index=False)
vdf.export_hdf5('chat_history_00.hdf5')
I'm trying to take an integer column and map discrete values to another column. Basically, if a credit tier is marked, 1, 2, 3, antoher column maps those to no credit state, no hit or thin files. Then fill the null values with vaild. I tried However, I keep getting this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-129-926e6625f2b6> in <module>
1 #train.dtypes
----> 2 df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-129-926e6625f2b6> in <lambda>(row)
1 #train.dtypes
----> 2 df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
<ipython-input-126-462888d46184> in discrete_credit(row, variable)
6
7 """
----> 8 score = row[variable].map({1:'no_credit_state', 2:'thin_file', 3:"no_hit"})
9 score = row[score].fillna('valid')
10 score = pd.Categorical(row[score], ['valid', 'no_credit_state','thin_file', 'no_hit'])
AttributeError: ("'numpy.int64' object has no attribute 'map'", 'occurred at index 0')
Here is a code example that is throwing the same error:
import pandas as pd
credit = {'credit_52278':[1,2,3,500,550,600,650,700,750,800,900]
}
df = pd.DataFrame(credit)
def discrete_credit(row, variable):
"""
allows thin files, no hits and no credit scores to float which will then allow the rest of the credit score to be fit \
with a spline
"""
score = row[variable].map({1:'no_credit_state', 2:'thin_file', 3:"no_hit"})
score = row[score].fillna('valid')
score = pd.Categorical(row[score], ['valid', 'no_credit_state','thin_file', 'no_hit'])
return score
df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
map is a Series method, but you are trying to use it on a scalar (float) value.
You could simply do something like:
df['discrete_52278'] = (
df['credit_52278']
.map({
1: 'no_credit_state',
2: 'thin_file',
3: 'no_hit'
})
.fillna('valid')
.astype('category')
)
I would like to define my own namespace "http://example.org/" in rdflib, but apparently that can't be done. Can't figure out what is the proper way to do it...
In [1]: import rdflib
INFO:rdflib:RDFLib Version: 4.2.2
In [2]: g = rdflib.Graph()
In [3]: from rdflib import Namespace
In [4]: n1 = Namespace("http://example.org/")
In [5]: u1 = n1['1']
In [6]: u1
Out[6]: rdflib.term.URIRef(u'http://example.org/1')
In [7]: g.bind('ex', n1)
In [8]: g.add((u1, u1, u1)
...: )
In [9]: g.serialize()
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-9-25a09aa9a7b5> in <module>()
----> 1 g.serialize()
/usr/local/lib/python2.7/site-packages/rdflib/graph.pyc in serialize(self, destination, format, base, encoding, **args)
937 if destination is None:
938 stream = BytesIO()
--> 939 serializer.serialize(stream, base=base, encoding=encoding, **args)
940 return stream.getvalue()
941 if hasattr(destination, "write"):
/usr/local/lib/python2.7/site-packages/rdflib/plugins/serializers/rdfxml.pyc in serialize(self, stream, base, encoding, **args)
64 # assert(
65 # namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
---> 66 bindings = list(self.__bindings())
67 bindings.sort()
68
/usr/local/lib/python2.7/site-packages/rdflib/plugins/serializers/rdfxml.pyc in __bindings(self)
31
32 for predicate in set(store.predicates()):
---> 33 prefix, namespace, name = nm.compute_qname(predicate)
34 bindings[prefix] = URIRef(namespace)
35
/usr/local/lib/python2.7/site-packages/rdflib/namespace.pyc in compute_qname(self, uri, generate)
328
329 if not uri in self.__cache:
--> 330 namespace, name = split_uri(uri)
331 namespace = URIRef(namespace)
332 prefix = self.store.prefix(namespace)
/usr/local/lib/python2.7/site-packages/rdflib/namespace.pyc in split_uri(uri)
500 return (ns, ln)
501 break
--> 502 raise Exception("Can't split '%s'" % uri)
Exception: Can't split 'http://example.org/1'