I am trying to read a GML file using the following code:
import geopandas as gpd
G = gpd.read_file('extract/top10nl_gebouw.gml', driver='gml')
But the following error is raised:
---------------------------------------------------------------------------
CPLE_AppDefinedError Traceback (most recent call last)
Input In [4], in <cell line: 3>()
1 import geopandas as gpd
----> 3 G = gpd.read_file('extract/top10nl_gebouw.gml', driver='gml')
File ~/opt/anaconda3/lib/python3.9/site-packages/geopandas/io/file.py:253, in _read_file(filename, bbox, mask, rows, engine, **kwargs)
250 path_or_bytes = filename
252 if engine == "fiona":
--> 253 return _read_file_fiona(
254 path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
255 )
256 elif engine == "pyogrio":
257 return _read_file_pyogrio(
258 path_or_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
259 )
File ~/opt/anaconda3/lib/python3.9/site-packages/geopandas/io/file.py:301, in _read_file_fiona(path_or_bytes, from_bytes, bbox, mask, rows, **kwargs)
293 with fiona_env():
294 with reader(path_or_bytes, **kwargs) as features:
295
296 # In a future Fiona release the crs attribute of features will
297 # no longer be a dict, but will behave like a dict. So this should
298 # be forwards compatible
299 crs = (
300 features.crs["init"]
--> 301 if features.crs and "init" in features.crs
302 else features.crs_wkt
303 )
305 # handle loading the bounding box
306 if bbox is not None:
File ~/opt/anaconda3/lib/python3.9/site-packages/fiona/collection.py:215, in Collection.crs(self)
213 """Returns a Proj4 string."""
214 if self._crs is None and self.session:
--> 215 self._crs = self.session.get_crs()
216 return self._crs
File fiona/ogrext.pyx:742, in fiona.ogrext.Session.get_crs()
File fiona/_err.pyx:291, in fiona._err.exc_wrap_pointer()
CPLE_AppDefinedError: Cannot import 28992 due to ALLOW_FILE_ACCESS=NO
I have tried installing and uninstalling different versions of GeoPandas and Fiona, but nothing seems to fix the issue. My colleague who runs the exact same code (with Fiona 1.8.6) is not experiencing any issues.
Related
I'm running into an issue with the causalimpact package not recognizing the date index in my dataframe.
I get the error 20210626 not present in input data index. The blob below includes the traceback of the error.
ValueError Traceback (most recent call last)
Input In [97], in <cell line: 3>()
1 pre_period = ['20210626','20210628']
2 post_period = ['20210629','20210702']
----> 3 ci = CausalImpact(data, pre_period, post_period)
File ~/homebrew/lib/python3.8/site-packages/causalimpact/main.py:228, in CausalImpact.__init__(self, data, pre_period, post_period, model, alpha, **kwargs)
227 def __init__(self, data, pre_period, post_period, model=None, alpha=0.05, **kwargs):
--> 228 checked_input = self._process_input_data(
229 data, pre_period, post_period, model, alpha, **kwargs
230 )
231 super(CausalImpact, self).__init__(**checked_input)
232 self.model_args = checked_input['model_args']
File ~/homebrew/lib/python3.8/site-packages/causalimpact/main.py:377, in CausalImpact._process_input_data(self, data, pre_period, post_period, model, alpha, **kwargs)
374 raise ValueError('{args} input cannot be empty'.format(
375 args=', '.join(none_args)))
376 processed_data = self._format_input_data(data)
--> 377 pre_data, post_data = self._process_pre_post_data(processed_data, pre_period,
378 post_period)
379 alpha = self._process_alpha(alpha)
380 model_args = self._process_model_args(**kwargs)
File ~/homebrew/lib/python3.8/site-packages/causalimpact/main.py:658, in CausalImpact._process_pre_post_data(self, data, pre_period, post_period)
637 def _process_pre_post_data(self, data, pre_period, post_period):
638 """
639 Checks `pre_period`, `post_period` and returns data sliced accordingly to each
640 period.
(...)
656 ValueError: if pre_period last value is bigger than post intervention period.
657 """
--> 658 checked_pre_period = self._process_period(pre_period, data)
659 checked_post_period = self._process_period(post_period, data)
661 if checked_pre_period[1] > checked_post_period[0]:
File ~/homebrew/lib/python3.8/site-packages/causalimpact/main.py:727, in CausalImpact._process_period(self, period, data)
725 if isinstance(point, pd.Timestamp):
726 point = point.strftime('%Y%m%d')
--> 727 raise ValueError("{point} not present in input data index.".format(
728 point=str(point)
729 )
730 )
731 if isinstance(period[0], str) or isinstance(period[0], pd.Timestamp):
732 period = self._convert_str_period_to_int(period, data)
ValueError: 20210626 not present in input data index.
The code and the sample data are given below. Can anyone help with this?
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
from causalimpact import CausalImpact
data = pd.read_csv('~/datasets/results_covariates.csv',encoding='utf-8')
data.set_index('DT', inplace=True, drop=False)
pre_period = ['20210626','20210628']
post_period = ['20210629','20210702']
ci = CausalImpact(data, pre_period, post_period)
DT Y X1 X2
6/26/21 1016.15 8616.033333 164
6/27/21 1174.983333 18156.85 444
6/28/21 56571.43333 417270.6 11664
6/29/21 64821.75 420466.3167 11322
6/30/21 178269.8 2331084.75 66434
7/1/21 62314.28333 391890.9 11221
7/2/21 141387.3833 1286635.85 35207
Managed to get this working.
causalimpact expects the index to either be an int, str or pd.Timestamp and the dt column had to be reformatted after reading in from my csv / database connector in actual code.
data['DT'] = pd.to_datetime(data['DT'])
I want to use ggplot2 within Jupyter Notebook. However, when I try to make an R magic cell and introduce a variable, I get an error.
Here is the code (one paragraph indicates one cell):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rpy2
%matplotlib inline
from rpy2.robjects import pandas2ri
pandas2ri.activate()
%load_ext rpy2.ipython
%%R
library(ggplot2)
data = pd.read_csv('train_titanic.csv')
%%R -i data -w 900 -h 480 -u px
With this last cell, I get the following error (incl traceback):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
54 try:
---> 55 od[name] = conversion.py2rpy(values)
56 except Exception as e:
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasseries(obj)
125 if type(x) is not homogeneous_type:
--> 126 raise ValueError('Series can only be of one type, or None.')
127 # TODO: Could this be merged with obj.type.name == 'O' case above ?
ValueError: Series can only be of one type, or None.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
367 try:
--> 368 mv = memoryview(obj)
369 res = cls.from_memoryview(mv)
TypeError: memoryview: a bytes-like object is required, not 'Series'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-14-75e210679e4a> in <module>
----> 1 get_ipython().run_cell_magic('R', '-i data -w 900 -h 480 -u px', '\n\n')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2360 with self.builtin_trap:
2361 args = (magic_arg_s, cell)
-> 2362 result = fn(*args, **kwargs)
2363 return result
2364
</home/morgan/anaconda3/envs/catenv/lib/python3.7/site-packages/decorator.py:decorator-gen-130> in R(self, line, cell, local_ns)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
721 raise NameError("name '%s' is not defined" % input)
722 with localconverter(converter) as cv:
--> 723 ro.r.assign(input, val)
724
725 tmpd = self.setup_graphics(args)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
190 kwargs[r_k] = v
191 return (super(SignatureTranslatedFunction, self)
--> 192 .__call__(*args, **kwargs))
193
194
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in <listcomp>(.0)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
838 '1 positional argument')
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
842 funcname = getattr(func, '__name__', 'singledispatch function')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
59 'The error is: %s'
60 % (name, str(e)))
---> 61 od[name] = StrVector(values)
62
63 return DataFrame(od)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/vectors.py in __init__(self, obj)
382
383 def __init__(self, obj):
--> 384 super().__init__(obj)
385 self._add_rops()
386
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in __init__(self, obj)
286 super().__init__(obj)
287 elif isinstance(obj, collections.abc.Sized):
--> 288 super().__init__(type(self).from_object(obj).__sexp__)
289 else:
290 raise TypeError('The constructor must be called '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
370 except (TypeError, ValueError):
371 try:
--> 372 res = cls.from_iterable(obj)
373 except ValueError:
374 msg = ('The class methods from_memoryview() and '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
26 def _cdata_res_to_rinterface(function):
27 def _(*args, **kwargs):
---> 28 cdata = function(*args, **kwargs)
29 # TODO: test cdata is of the expected CType
30 return _cdata_to_rinterface(cdata)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_iterable(cls, iterable, populate_func)
317 if populate_func is None:
318 cls._populate_r_vector(iterable,
--> 319 r_vector)
320 else:
321 populate_func(iterable, r_vector)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(cls, iterable, r_vector)
300 r_vector,
301 cls._R_SET_VECTOR_ELT,
--> 302 cls._CAST_IN)
303
304 #classmethod
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(iterable, r_vector, set_elt, cast_value)
237 def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
238 for i, v in enumerate(iterable):
--> 239 set_elt(r_vector, i, cast_value(v))
240
241
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _as_charsxp_cdata(x)
430 return x.__sexp__._cdata
431 else:
--> 432 return conversion._str_to_charsxp(x)
433
434
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_charsxp(val)
118 s = rlib.R_NaString
119 else:
--> 120 cchar = _str_to_cchar(val)
121 s = rlib.Rf_mkCharCE(cchar, _CE_UTF8)
122 return s
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_cchar(s, encoding)
97 def _str_to_cchar(s, encoding: str = 'utf-8'):
98 # TODO: use isStrinb and installTrChar
---> 99 b = s.encode(encoding)
100 return ffi.new('char[]', b)
101
AttributeError: 'float' object has no attribute 'encode'
So I find that it is not possible to even start an R magic cell while importing my pandas dataframe object. However, I have tried creating R vectors inside the cell, and find I can plot these using ggplot2 with no issues.
I am using Python 3.7.6, rpy2 3.1.0, jupyter-notebook 6.0.3and am using Ubuntu 18.04.2 LTS on Windows Subsystem for Linux.
The problem is most likely with one (or more) columns having more than one type - therefore it is impossible to transfer the data into an R vector (which can hold only one data type). The traceback may be overwhelming, but here is the relevant part:
ValueError: Series can only be of one type, or None.
Which column it is? Difficult to say without looking at the dataset that you load, but my general solution is to check the types in the columns:
types = data.applymap(type).apply(set)
types[types.apply(len) > 1]
Anything returned by the snippet above would be a candidate culprit. There are many different ways of dealing with the problem, depending on the exact nature of the data. Workarounds that I frequently use include:
calling data = data.infer_objects() - helps if the pandas did not catch up with a dtype change and still stores the data with (suboptimal) Python objects
filling NaN with an empty string or a string constant if you have missing values in a string column (e.g. str_columns = str_columns.fillna(''))
dates.apply(pd.to_datetime, axis=1) if you have datetime objects but the dtype is object
using df.applymap(lambda x: datetime.combine(x, datetime.min.time()) if not isinstance(x, datetime) else x) if you have a mixture of date and datetime objects
In some vary rare cases pandas stores the data differently than expected by rpy2 (following certain manipulations); then writing the dataframe down to a csv file and reading it from the disk again helps - but this is likely not what you are facing here, as you start from a newly read dataframe.
I just noticed there might be an even simpler reason for the problem. For some reason, pandas2ri requires you to call pandas2ri.activate()after importing it. This solved the problem for me.
I am using the asammdf package to load MDF file. The code below works for only some of my MDF files. Many files give me the error below. I am able to open the file in CANape with no issues.
Load files in a desktop app (this works)
from asammdf import MDF
mdf = MDF(r'\\Stnafddco123.us123.corpintra.net\veh_test\Vehicles_Data_and_Truck_Info\APTIV_logger_data\1FUJHLDR8KLXXXXXX_Trip-Detail_2019-07-16 15-18-25.mf4')
mdf
error Traceback (most recent call last)
<ipython-input-16-c9001bcab2a9> in <module>()
1 from asammdf import MDF
----> 2 mdf = MDF(r'\\Stnafddco123.us123.corpintra.net\veh_test\Vehicles_Data_and_Truck_Info\APTIV_logger_data\1FUJHLDR8KLXXXXXX_Trip-Detail_2019-07-16 15-18-25.mf4')
3 mdf
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\mdf.py in __init__(self, name, version, **kwargs)
124 self._mdf = MDF3(name, **kwargs)
125 elif version in MDF4_VERSIONS:
--> 126 self._mdf = MDF4(name, **kwargs)
127 elif version in MDF2_VERSIONS:
128 self._mdf = MDF2(name, **kwargs)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\mdf_v4.py in __init__(self, name, version, **kwargs)
247 self._file = mmap.mmap(x.fileno(), 0, access=mmap.ACCESS_READ)
248 self._from_filelike = False
--> 249 self._read(mapped=True)
250
251 self._file.close()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\mdf_v4.py in _read(self, mapped)
415 # Read channels by walking recursively in the channel group
416 # starting from the first channel
--> 417 self._read_channels(ch_addr, grp, stream, dg_cntr, ch_cntr, mapped=mapped)
418
419 cg_addr = channel_group.next_cg_addr
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\mdf_v4.py in _read_channels(self, ch_addr, grp, stream, dg_cntr, ch_cntr, channel_composition, mapped)
760 at_map=self._attachments_map,
761 use_display_names=self._use_display_names,
--> 762 mapped=mapped,
763 )
764
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\v4_blocks.py in __init__(self, **kwargs)
606 conv = ChannelConversion(
607 raw_bytes=raw_bytes, stream=stream, address=address,
--> 608 mapped=mapped,
609 )
610 cc_map[raw_bytes] = conv
~\AppData\Local\Continuum\anaconda3\lib\site-packages\asammdf\blocks\v4_blocks.py in __init__(self, **kwargs)
2010 self.min_phy_value,
2011 self.max_phy_value,
-> 2012 ) = v4c.CONVERSION_NONE_INIT_u(block)
2013
2014 elif conv == v4c.CONVERSION_TYPE_LIN:
error: unpack requires a buffer of 56 bytes
Do you see any errors when you open the file with MDFValidator?
I am following a tutorial here: https://towardsdatascience.com/multi-class-text-classification-model-comparison-and-selection-5eb066197568
I am at the part "Word2vec and Logistic Regression". I have downloaded the "GoogleNews-vectors-negative300.bin.gz" file and I am tyring to apply it to my own text data. However when I get to the following code:
%%time
from gensim.models import Word2Vec
wv = gensim.models.KeyedVectors.load_word2vec_format("/data/users/USERS/File_path/classifier/GoogleNews_Embedding/GoogleNews-vectors-negative300.bin.gz", binary=True)
wv.init_sims(replace=True)
I run into the following error:
/data/users/msmith/env/lib64/python3.6/site-packages/smart_open/smart_open_lib.py:398: UserWarning: This function is deprecated, use smart_open.open instead. See the migration notes for details: https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst#migrating-to-the-new-open-function
'See the migration notes for details: %s' % _MIGRATION_NOTES_URL
---------------------------------------------------------------------------
EOFError Traceback (most recent call last)
<timed exec> in <module>
~/env/lib64/python3.6/site-packages/gensim/models/keyedvectors.py in load_word2vec_format(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)
1492 return _load_word2vec_format(
1493 cls, fname, fvocab=fvocab, binary=binary, encoding=encoding, unicode_errors=unicode_errors,
-> 1494 limit=limit, datatype=datatype)
1495
1496 def get_keras_embedding(self, train_embeddings=False):
~/env/lib64/python3.6/site-packages/gensim/models/utils_any2vec.py in _load_word2vec_format(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)
383 with utils.ignore_deprecation_warning():
384 # TODO use frombuffer or something similar
--> 385 weights = fromstring(fin.read(binary_len), dtype=REAL).astype(datatype)
386 add_word(word, weights)
387 else:
/usr/lib64/python3.6/gzip.py in read(self, size)
274 import errno
275 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
--> 276 return self._buffer.read(size)
277
278 def read1(self, size=-1):
/usr/lib64/python3.6/_compression.py in readinto(self, b)
66 def readinto(self, b):
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
70 return len(data)
/usr/lib64/python3.6/gzip.py in read(self, size)
480 break
481 if buf == b"":
--> 482 raise EOFError("Compressed file ended before the "
483 "end-of-stream marker was reached")
484
EOFError: Compressed file ended before the end-of-stream marker was reached
Any idea whats gone wrong/ how to overcome this issue?
Thanks in advance!
I want to calculate distance between two lat/log point using geopandas series.distance and measure the result by unit meters, I know I should define crs first, but I tried several times to use to_crs(), but it is showing an error: b'no arguments in initialization list' seems like the function never worked. Anyone can help me on this problem?
def wgs84_to_CGCS2000(df,code):
result=df.to_crs(from_epsg(code))
return result
city=wgs84_to_CGCS2000(city,4549)
kfc=wgs84_to_CGCS2000(kfc,4549)
RuntimeError Traceback (most recent call last)
<ipython-input-42-c0d1c4e6af6a> in <module>
2 result=df.to_crs(from_epsg(code))
3 return result
----> 4 city=wgs84_to_CGCS2000(city,4549)
5 kfc=wgs84_to_CGCS2000(kfc,4549)
<ipython-input-42-c0d1c4e6af6a> in wgs84_to_CGCS2000(df, code)
1 def wgs84_to_CGCS2000(df,code):
----> 2 result=df.to_crs(from_epsg(code))
3 return result
4 city=wgs84_to_CGCS2000(city,4549)
5 kfc=wgs84_to_CGCS2000(kfc,4549)
C:\ProgramData\Anaconda3\lib\site-packages\geopandas\geodataframe.py in to_crs(self, crs, epsg, inplace)
441 else:
442 df = self.copy()
--> 443 geom = df.geometry.to_crs(crs=crs, epsg=epsg)
444 df.geometry = geom
445 df.crs = geom.crs
C:\ProgramData\Anaconda3\lib\site-packages\geopandas\geoseries.py in to_crs(self, crs, epsg)
302 except TypeError:
303 raise TypeError('Must set either crs or epsg for output.')
--> 304 proj_in = pyproj.Proj(self.crs, preserve_units=True)
305 proj_out = pyproj.Proj(crs, preserve_units=True)
306 project = partial(pyproj.transform, proj_in, proj_out)
C:\ProgramData\Anaconda3\lib\site-packages\pyproj\__init__.py in __new__(self, projparams, preserve_units, **kwargs)
360 # on case-insensitive filesystems).
361 projstring = projstring.replace('EPSG','epsg')
--> 362 return _proj.Proj.__new__(self, projstring)
363
364 def __call__(self, *args, **kw):
_proj.pyx in _proj.Proj.__cinit__()
RuntimeError: b'no arguments in initialization list'