geoseries.to_crs() failed when use geopandas to calculate distance - python

I want to calculate distance between two lat/log point using geopandas series.distance and measure the result by unit meters, I know I should define crs first, but I tried several times to use to_crs(), but it is showing an error: b'no arguments in initialization list' seems like the function never worked. Anyone can help me on this problem?
def wgs84_to_CGCS2000(df,code):
result=df.to_crs(from_epsg(code))
return result
city=wgs84_to_CGCS2000(city,4549)
kfc=wgs84_to_CGCS2000(kfc,4549)
RuntimeError Traceback (most recent call last)
<ipython-input-42-c0d1c4e6af6a> in <module>
2 result=df.to_crs(from_epsg(code))
3 return result
----> 4 city=wgs84_to_CGCS2000(city,4549)
5 kfc=wgs84_to_CGCS2000(kfc,4549)
<ipython-input-42-c0d1c4e6af6a> in wgs84_to_CGCS2000(df, code)
1 def wgs84_to_CGCS2000(df,code):
----> 2 result=df.to_crs(from_epsg(code))
3 return result
4 city=wgs84_to_CGCS2000(city,4549)
5 kfc=wgs84_to_CGCS2000(kfc,4549)
C:\ProgramData\Anaconda3\lib\site-packages\geopandas\geodataframe.py in to_crs(self, crs, epsg, inplace)
441 else:
442 df = self.copy()
--> 443 geom = df.geometry.to_crs(crs=crs, epsg=epsg)
444 df.geometry = geom
445 df.crs = geom.crs
C:\ProgramData\Anaconda3\lib\site-packages\geopandas\geoseries.py in to_crs(self, crs, epsg)
302 except TypeError:
303 raise TypeError('Must set either crs or epsg for output.')
--> 304 proj_in = pyproj.Proj(self.crs, preserve_units=True)
305 proj_out = pyproj.Proj(crs, preserve_units=True)
306 project = partial(pyproj.transform, proj_in, proj_out)
C:\ProgramData\Anaconda3\lib\site-packages\pyproj\__init__.py in __new__(self, projparams, preserve_units, **kwargs)
360 # on case-insensitive filesystems).
361 projstring = projstring.replace('EPSG','epsg')
--> 362 return _proj.Proj.__new__(self, projstring)
363
364 def __call__(self, *args, **kw):
_proj.pyx in _proj.Proj.__cinit__()
RuntimeError: b'no arguments in initialization list'

Related

Reading GML file with GeoPandas raises CPLE_AppDefinedError

I am trying to read a GML file using the following code:
import geopandas as gpd
G = gpd.read_file('extract/top10nl_gebouw.gml', driver='gml')
But the following error is raised:
---------------------------------------------------------------------------
CPLE_AppDefinedError Traceback (most recent call last)
Input In [4], in <cell line: 3>()
1 import geopandas as gpd
----> 3 G = gpd.read_file('extract/top10nl_gebouw.gml', driver='gml')
File ~/opt/anaconda3/lib/python3.9/site-packages/geopandas/io/file.py:253, in _read_file(filename, bbox, mask, rows, engine, **kwargs)
250 path_or_bytes = filename
252 if engine == "fiona":
--> 253 return _read_file_fiona(
254 path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
255 )
256 elif engine == "pyogrio":
257 return _read_file_pyogrio(
258 path_or_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
259 )
File ~/opt/anaconda3/lib/python3.9/site-packages/geopandas/io/file.py:301, in _read_file_fiona(path_or_bytes, from_bytes, bbox, mask, rows, **kwargs)
293 with fiona_env():
294 with reader(path_or_bytes, **kwargs) as features:
295
296 # In a future Fiona release the crs attribute of features will
297 # no longer be a dict, but will behave like a dict. So this should
298 # be forwards compatible
299 crs = (
300 features.crs["init"]
--> 301 if features.crs and "init" in features.crs
302 else features.crs_wkt
303 )
305 # handle loading the bounding box
306 if bbox is not None:
File ~/opt/anaconda3/lib/python3.9/site-packages/fiona/collection.py:215, in Collection.crs(self)
213 """Returns a Proj4 string."""
214 if self._crs is None and self.session:
--> 215 self._crs = self.session.get_crs()
216 return self._crs
File fiona/ogrext.pyx:742, in fiona.ogrext.Session.get_crs()
File fiona/_err.pyx:291, in fiona._err.exc_wrap_pointer()
CPLE_AppDefinedError: Cannot import 28992 due to ALLOW_FILE_ACCESS=NO
I have tried installing and uninstalling different versions of GeoPandas and Fiona, but nothing seems to fix the issue. My colleague who runs the exact same code (with Fiona 1.8.6) is not experiencing any issues.

How to plot SymPy's built-in functions using sympy.plotting

Hey I am pretty new to SymPy. I am trying to use SymPy's built-in plotting to plot some functions. My codes are as follows:
from sympy import Symbol, sin, Function, pi,exp
from sympy.plotting import plot
t = Symbol('t')
source = t**2
plot(source,(t,0,1))
This works perfectly. However, notice that I have also imported functions such as sin and exp above. However, I am finding anything involving these functions, SymPy will not plot. For example,
source1 = exp(t)
source2= sin(t)
plot(source1,(t,0,1))
plot(source2,(t,0,1))
Neither of these will work. The error I received is as follows, using the case with sin as an example:
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/sympy/plotting/experimental_lambdify.py in __call__(self, args, kwargs)
194 #The result can be sympy.Float. Hence wrap it with complex type.
--> 195 result = complex(self.lambda_func(args))
196 if abs(result.imag) > 1e-7 * abs(result):
TypeError: complex() first argument must be a string or a number, not 'FunctionClass'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-45-85c4f08c25e4> in <module>
1 f0 = 15
2 source = sin(t)
----> 3 plot(exp(t),(t,0,1))
/usr/local/lib/python3.6/dist-packages/sympy/plotting/plot.py in plot(*args, **kwargs)
1556 plots = Plot(*series, **kwargs)
1557 if show:
-> 1558 plots.show()
1559 return plots
1560
/usr/local/lib/python3.6/dist-packages/sympy/plotting/plot.py in show(self)
189 self._backend.close()
190 self._backend = self.backend(self)
--> 191 self._backend.show()
192
193 def save(self, path):
/usr/local/lib/python3.6/dist-packages/sympy/plotting/plot.py in show(self)
1246
1247 def show(self):
-> 1248 self.process_series()
1249 #TODO after fixing https://github.com/ipython/ipython/issues/1255
1250 # you can uncomment the next line and remove the pyplot.show() call
/usr/local/lib/python3.6/dist-packages/sympy/plotting/plot.py in process_series(self)
1243 if isinstance(self.parent, PlotGrid):
1244 parent = self.parent.args[i]
-> 1245 self._process_series(series, ax, parent)
1246
1247 def show(self):
/usr/local/lib/python3.6/dist-packages/sympy/plotting/plot.py in _process_series(self, series, ax, parent)
1072 # Create the collections
1073 if s.is_2Dline:
-> 1074 collection = self.LineCollection(s.get_segments())
1075 ax.add_collection(collection)
1076 elif s.is_contour:
/usr/local/lib/python3.6/dist-packages/sympy/plotting/plot.py in get_segments(self)
661 list_segments.append([p, q])
662
--> 663 f_start = f(self.start)
664 f_end = f(self.end)
665 sample(np.array([self.start, f_start]),
/usr/local/lib/python3.6/dist-packages/sympy/plotting/experimental_lambdify.py in __call__(self, args, kwargs)
234 ' problematic. We are trying a failback method'
235 ' that may still work. Please report this as a bug.')
--> 236 if abs(result.imag) > 1e-7 * abs(result):
237 return None
238 else:
AttributeError: type object 'exp' has no attribute 'imag'
This might be something trivial, but as a beginner I am having quite a hard time figuring these out. Really appreciate your help!!

Error when using pandas dataframe in R cell, in rpy2, Jupyter Notebook

I want to use ggplot2 within Jupyter Notebook. However, when I try to make an R magic cell and introduce a variable, I get an error.
Here is the code (one paragraph indicates one cell):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rpy2
%matplotlib inline
from rpy2.robjects import pandas2ri
pandas2ri.activate()
%load_ext rpy2.ipython
%%R
library(ggplot2)
data = pd.read_csv('train_titanic.csv')
%%R -i data -w 900 -h 480 -u px
With this last cell, I get the following error (incl traceback):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
54 try:
---> 55 od[name] = conversion.py2rpy(values)
56 except Exception as e:
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasseries(obj)
125 if type(x) is not homogeneous_type:
--> 126 raise ValueError('Series can only be of one type, or None.')
127 # TODO: Could this be merged with obj.type.name == 'O' case above ?
ValueError: Series can only be of one type, or None.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
367 try:
--> 368 mv = memoryview(obj)
369 res = cls.from_memoryview(mv)
TypeError: memoryview: a bytes-like object is required, not 'Series'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-14-75e210679e4a> in <module>
----> 1 get_ipython().run_cell_magic('R', '-i data -w 900 -h 480 -u px', '\n\n')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2360 with self.builtin_trap:
2361 args = (magic_arg_s, cell)
-> 2362 result = fn(*args, **kwargs)
2363 return result
2364
</home/morgan/anaconda3/envs/catenv/lib/python3.7/site-packages/decorator.py:decorator-gen-130> in R(self, line, cell, local_ns)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
721 raise NameError("name '%s' is not defined" % input)
722 with localconverter(converter) as cv:
--> 723 ro.r.assign(input, val)
724
725 tmpd = self.setup_graphics(args)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
190 kwargs[r_k] = v
191 return (super(SignatureTranslatedFunction, self)
--> 192 .__call__(*args, **kwargs))
193
194
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in <listcomp>(.0)
111
112 def __call__(self, *args, **kwargs):
--> 113 new_args = [conversion.py2rpy(a) for a in args]
114 new_kwargs = {}
115 for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
838 '1 positional argument')
839
--> 840 return dispatch(args[0].__class__)(*args, **kw)
841
842 funcname = getattr(func, '__name__', 'singledispatch function')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
59 'The error is: %s'
60 % (name, str(e)))
---> 61 od[name] = StrVector(values)
62
63 return DataFrame(od)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/vectors.py in __init__(self, obj)
382
383 def __init__(self, obj):
--> 384 super().__init__(obj)
385 self._add_rops()
386
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in __init__(self, obj)
286 super().__init__(obj)
287 elif isinstance(obj, collections.abc.Sized):
--> 288 super().__init__(type(self).from_object(obj).__sexp__)
289 else:
290 raise TypeError('The constructor must be called '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
370 except (TypeError, ValueError):
371 try:
--> 372 res = cls.from_iterable(obj)
373 except ValueError:
374 msg = ('The class methods from_memoryview() and '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
26 def _cdata_res_to_rinterface(function):
27 def _(*args, **kwargs):
---> 28 cdata = function(*args, **kwargs)
29 # TODO: test cdata is of the expected CType
30 return _cdata_to_rinterface(cdata)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_iterable(cls, iterable, populate_func)
317 if populate_func is None:
318 cls._populate_r_vector(iterable,
--> 319 r_vector)
320 else:
321 populate_func(iterable, r_vector)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(cls, iterable, r_vector)
300 r_vector,
301 cls._R_SET_VECTOR_ELT,
--> 302 cls._CAST_IN)
303
304 #classmethod
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(iterable, r_vector, set_elt, cast_value)
237 def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
238 for i, v in enumerate(iterable):
--> 239 set_elt(r_vector, i, cast_value(v))
240
241
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _as_charsxp_cdata(x)
430 return x.__sexp__._cdata
431 else:
--> 432 return conversion._str_to_charsxp(x)
433
434
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_charsxp(val)
118 s = rlib.R_NaString
119 else:
--> 120 cchar = _str_to_cchar(val)
121 s = rlib.Rf_mkCharCE(cchar, _CE_UTF8)
122 return s
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_cchar(s, encoding)
97 def _str_to_cchar(s, encoding: str = 'utf-8'):
98 # TODO: use isStrinb and installTrChar
---> 99 b = s.encode(encoding)
100 return ffi.new('char[]', b)
101
AttributeError: 'float' object has no attribute 'encode'
So I find that it is not possible to even start an R magic cell while importing my pandas dataframe object. However, I have tried creating R vectors inside the cell, and find I can plot these using ggplot2 with no issues.
I am using Python 3.7.6, rpy2 3.1.0, jupyter-notebook 6.0.3and am using Ubuntu 18.04.2 LTS on Windows Subsystem for Linux.
The problem is most likely with one (or more) columns having more than one type - therefore it is impossible to transfer the data into an R vector (which can hold only one data type). The traceback may be overwhelming, but here is the relevant part:
ValueError: Series can only be of one type, or None.
Which column it is? Difficult to say without looking at the dataset that you load, but my general solution is to check the types in the columns:
types = data.applymap(type).apply(set)
types[types.apply(len) > 1]
Anything returned by the snippet above would be a candidate culprit. There are many different ways of dealing with the problem, depending on the exact nature of the data. Workarounds that I frequently use include:
calling data = data.infer_objects() - helps if the pandas did not catch up with a dtype change and still stores the data with (suboptimal) Python objects
filling NaN with an empty string or a string constant if you have missing values in a string column (e.g. str_columns = str_columns.fillna(''))
dates.apply(pd.to_datetime, axis=1) if you have datetime objects but the dtype is object
using df.applymap(lambda x: datetime.combine(x, datetime.min.time()) if not isinstance(x, datetime) else x) if you have a mixture of date and datetime objects
In some vary rare cases pandas stores the data differently than expected by rpy2 (following certain manipulations); then writing the dataframe down to a csv file and reading it from the disk again helps - but this is likely not what you are facing here, as you start from a newly read dataframe.
I just noticed there might be an even simpler reason for the problem. For some reason, pandas2ri requires you to call pandas2ri.activate()after importing it. This solved the problem for me.

PoissonZiGMLE : predict not implemented?

I successfully run the function for the zero inflated Poisson model:
(Successfully = it seems to converge, when I print the summary)
PoissonZiGMLE:
PZI = PoissonZiGMLE(df_zip['obs'],Xmat,offset=df_zip['offsetv'])
result = PZI.fit(maxiter = 1000)
print result.summary()
however when I try:
result.predict(df_zip, offset=offsetv)
I get this error:
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call
last)
in ()
----> 1 result.predict(df_zip, offset=offsetv)
/software/centos6/x86_64/canopy-1.7.4/Canopy_64bit/User/lib/python2.7/site-packages/statsmodels/base/model.py
in predict(self, exog, transform, *args, **kwargs)
747 exog = np.atleast_2d(exog) # needed in count model shape1
748
--> 749 return self.model.predict(self.params, exog, *args, **kwargs)
750
751
/software/centos6/x86_64/canopy-1.7.4/Canopy_64bit/User/lib/python2.7/site-packages/statsmodels/base/model.py
in predict(self, params, exog, *args, **kwargs)
175 This is a placeholder intended to be overwritten by individual models.
176 """
--> 177 raise NotImplementedError
178
179
NotImplementedError:
before submitting an issue on github i was wondering if anyone has used PoissonZiGMLE and has any insight on how i can bypass the predict function if not implemented.

Error when trying to apply log method to pandas data frame column in Python

So, I am very new to Python and Pandas (and programming in general), but am having trouble with a seemingly simple function. So I created the following dataframe using data pulled with a SQL query (if you need to see the SQL query, let me know and I'll paste it)
spydata = pd.DataFrame(row,columns=['date','ticker','close', 'iv1m', 'iv3m'])
tickerlist = unique(spydata[spydata['date'] == '2013-05-31'])
After that, I have written a function to create some new columns in the dataframe using the data already held in it:
def demean(arr):
arr['retlog'] = log(arr['close']/arr['close'].shift(1))
arr['10dvol'] = sqrt(252)*sqrt(pd.rolling_std(arr['ret'] , 10 ))
arr['60dvol'] = sqrt(252)*sqrt(pd.rolling_std(arr['ret'] , 10 ))
arr['90dvol'] = sqrt(252)*sqrt(pd.rolling_std(arr['ret'] , 10 ))
arr['1060rat'] = arr['10dvol']/arr['60dvol']
arr['1090rat'] = arr['10dvol']/arr['90dvol']
arr['60dis'] = (arr['1060rat'] - arr['1060rat'].mean())/arr['1060rat'].std()
arr['90dis'] = (arr['1090rat'] - arr['1090rat'].mean())/arr['1090rat'].std()
return arr
The only part that I'm having a problem with is the first line of the function:
arr['retlog'] = log(arr['close']/arr['close'].shift(1))
Which, when I run, with this command, I get an error:
result = spydata.groupby(['ticker']).apply(demean)
Error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-196-4a66225e12ea> in <module>()
----> 1 result = spydata.groupby(['ticker']).apply(demean)
2 results2 = result[result.date == result.date.max()]
3
C:\Python27\lib\site-packages\pandas-0.11.0-py2.7-win32.egg\pandas\core\groupby.pyc in apply(self, func, *args, **kwargs)
323 func = _intercept_function(func)
324 f = lambda g: func(g, *args, **kwargs)
--> 325 return self._python_apply_general(f)
326
327 def _python_apply_general(self, f):
C:\Python27\lib\site-packages\pandas-0.11.0-py2.7-win32.egg\pandas\core\groupby.pyc in _python_apply_general(self, f)
326
327 def _python_apply_general(self, f):
--> 328 keys, values, mutated = self.grouper.apply(f, self.obj, self.axis)
329
330 return self._wrap_applied_output(keys, values,
C:\Python27\lib\site-packages\pandas-0.11.0-py2.7-win32.egg\pandas\core\groupby.pyc in apply(self, f, data, axis, keep_internal)
632 # group might be modified
633 group_axes = _get_axes(group)
--> 634 res = f(group)
635 if not _is_indexed_like(res, group_axes):
636 mutated = True
C:\Python27\lib\site-packages\pandas-0.11.0-py2.7-win32.egg\pandas\core\groupby.pyc in <lambda>(g)
322 """
323 func = _intercept_function(func)
--> 324 f = lambda g: func(g, *args, **kwargs)
325 return self._python_apply_general(f)
326
<ipython-input-195-47b6faa3f43c> in demean(arr)
1 def demean(arr):
----> 2 arr['retlog'] = log(arr['close']/arr['close'].shift(1))
3 arr['10dvol'] = sqrt(252)*sqrt(pd.rolling_std(arr['ret'] , 10 ))
4 arr['60dvol'] = sqrt(252)*sqrt(pd.rolling_std(arr['ret'] , 10 ))
5 arr['90dvol'] = sqrt(252)*sqrt(pd.rolling_std(arr['ret'] , 10 ))
AttributeError: log
I have tried changing the function to np.log as well as math.log, in which case I get the error
TypeError: only length-1 arrays can be converted to Python scalars
I've tried looking this up, but haven't found anything directly applicable. Any clues?
This happens when the datatype of the column is not numeric. Try
arr['retlog'] = log(arr['close'].astype('float64')/arr['close'].astype('float64').shift(1))
I suspect that the numbers are stored as generic 'object' types, which I know causes log to throw that error. Here is a simple illustration of the problem:
In [15]: np.log(Series([1,2,3,4], dtype='object'))
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-15-25deca6462b7> in <module>()
----> 1 np.log(Series([1,2,3,4], dtype='object'))
AttributeError: log
In [16]: np.log(Series([1,2,3,4], dtype='float64'))
Out[16]:
0 0.000000
1 0.693147
2 1.098612
3 1.386294
dtype: float64
Your attempt with math.log did not work because that function is designed for single numbers (scalars) only, not lists or arrays.
For what it's worth, I think this is a confusing error message; it once stumped me for awhile, anyway. I wonder if it can be improved.

Categories

Resources