Timeout during .getOrCreate() in spark session pyspark

Timeout during .getOrCreate() in spark session pyspark - python

Im trying to create a pyspark program and Im running into some errors related to the .getOrCreate() function trying to create a spark session.
from pyspark.sql import SparkSession
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS
spark = SparkSession.builder.master('local[*]').config("spark.driver.memory", "15g").appName("Spark ML").getOrCreate()
When I execute this block (in ipynb, vscode) It times out in the .getOrCreate() with the following error:
Cell In[5], line 5
2 from pyspark.ml.evaluation import RegressionEvaluator
3 from pyspark.ml.recommendation import ALS
----> 5 spark = SparkSession.builder.master('local[*]').config("spark.driver.memory", "15g").appName("Spark ML").getOrCreate()
File c:\Users\----\anaconda3\envs\bigdatas\lib\site-packages\pyspark\sql\session.py:269, in SparkSession.Builder.getOrCreate(self)
267 sparkConf.set(key, value)
268 # This SparkContext may be an existing one.
--> 269 sc = SparkContext.getOrCreate(sparkConf)
270 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
271 # by all sessions.
272 session = SparkSession(sc, options=self._options)
File c:\Users\----\anaconda3\envs\bigdatas\lib\site-packages\pyspark\context.py:483, in SparkContext.getOrCreate(cls, conf)
481 with SparkContext._lock:
482 if SparkContext._active_spark_context is None:
--> 483 SparkContext(conf=conf or SparkConf())
484 assert SparkContext._active_spark_context is not None
485 return SparkContext._active_spark_context
File c:\Users\----\anaconda3\envs\bigdatas\lib\site-packages\pyspark\context.py:195, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls, udf_profiler_cls)
189 if gateway is not None and gateway.gateway_parameters.auth_token is None:
190 raise ValueError(
...
--> 103 time.sleep(0.1)
105 if not os.path.isfile(conn_info_file):
106 raise RuntimeError("Java gateway process exited before sending its port number")
Also im using a conda environment with python 3.9.16.

Related

Accessing dockerized neo4j using neo4j vs py2neo

I have setup neo4j to run in docker and exposed the http and bolt ports (7474, 7687).
This is the setup I used :
docker run \
--name testneo4j \
-p7474:7474 -p7687:7687 \
-d \
-v `pwd`/neo4j/data:/data \
-v `pwd`/neo4j/logs:/logs \
-v `pwd`/import:/var/lib/neo4j/import \
-v `pwd`/neo4j/plugins:/plugins \
--env NEO4J_AUTH=neo4j/XXXXXXX \
I am now trying to connect to the graph database using Python
Using the py2neo library works fine:
In [1]: from py2neo import Graph
In [2]: graph=Graph('bolt://localhost:7687',user="neo4j", password="XXXXXXX")
...: graph.run('MATCH(x) RETURN COUNT(x)')
COUNT(x)
----------
0
But when I use the neo4j module:
from neo4j import GraphDatabase, TRUST_ALL_CERTIFICATES
trust=TRUST_ALL_CERTIFICATES
neo4j_user="neo4j"
neo4j_passwd="XXXXXXX"
uri="bolt://localhost:7687"
driver = GraphDatabase.driver(uri,
auth=(neo4j_user, neo4j_passwd),
encrypted=False, trust=trust)
I get this error:
File ~/local/anaconda3/lib/python3.8/site-packages/neo4j/__init__.py:120, in GraphDatabase.driver(cls, uri, **config)
114 #classmethod
115 def driver(cls, uri, **config):
116 """ Create a :class:`.Driver` object. Calling this method provides
117 identical functionality to constructing a :class:`.Driver` or
118 :class:`.Driver` subclass instance directly.
119 """
--> 120 return Driver(uri, **config)
File ~/local/anaconda3/lib/python3.8/site-packages/neo4j/__init__.py:161, in Driver.__new__(cls, uri, **config)
159 for subclass in Driver.__subclasses__():
160 if parsed_scheme in subclass.uri_schemes:
--> 161 return subclass(uri, **config)
162 raise ValueError("URI scheme %r not supported" % parsed.scheme)
File ~/local/anaconda3/lib/python3.8/site-packages/neo4j/__init__.py:235, in DirectDriver.__new__(cls, uri, **config)
232 return connect(address, **dict(config, **kwargs))
234 pool = ConnectionPool(connector, instance.address, **config)
--> 235 pool.release(pool.acquire())
236 instance._pool = pool
237 instance._max_retry_time = config.get("max_retry_time", default_config["max_retry_time"])
File ~/local/anaconda3/lib/python3.8/site-packages/neobolt/direct.py:715, in ConnectionPool.acquire(self, access_mode)
714 def acquire(self, access_mode=None):
--> 715 return self.acquire_direct(self.address)
File ~/local/anaconda3/lib/python3.8/site-packages/neobolt/direct.py:608, in AbstractConnectionPool.acquire_direct(self, address)
606 if can_create_new_connection:
607 try:
--> 608 connection = self.connector(address, error_handler=self.connection_error_handler)
609 except ServiceUnavailable:
610 self.remove(address)
File ~/local/anaconda3/lib/python3.8/site-packages/neo4j/__init__.py:232, in DirectDriver.__new__.<locals>.connector(address, **kwargs)
231 def connector(address, **kwargs):
--> 232 return connect(address, **dict(config, **kwargs))
File ~/local/anaconda3/lib/python3.8/site-packages/neobolt/direct.py:972, in connect(address, **config)
970 raise ServiceUnavailable("Failed to resolve addresses for %s" % address)
971 else:
--> 972 raise last_error
File ~/local/anaconda3/lib/python3.8/site-packages/neobolt/direct.py:964, in connect(address, **config)
962 s = _connect(resolved_address, **config)
963 s, der_encoded_server_certificate = _secure(s, host, security_plan.ssl_context, **config)
--> 964 connection = _handshake(s, address, der_encoded_server_certificate, **config)
965 except Exception as error:
966 last_error = error
File ~/local/anaconda3/lib/python3.8/site-packages/neobolt/direct.py:920, in _handshake(s, resolved_address, der_encoded_server_certificate, **config)
918 if agreed_version == 0:
919 log_debug("[#%04X] C: <CLOSE>", local_port)
--> 920 s.shutdown(SHUT_RDWR)
921 s.close()
922 elif agreed_version in (1, 2):
OSError: [Errno 57] Socket is not connected
Does anyone know why the former works but the latter doesn't?

It turns out that the problem was I was using an older version of the neo4j library (1.7.6). I did a pip install neo4j --upgrade to version 5.5 and I am no longer getting any errors.

Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext

Anyon know Why I keeo getting this error in Jupyter Notebooks??? I've been trying to load my Tensorflow model into Apache Spark vis SparlFlowbut I can't seem to figure out how to get past this error. Any help would be much appreciated.
First Jupyter cell:
from sparkflow.graph_utils import build_graph
from sparkflow.tensorflow_async import SparkAsyncDL
import tensorflow as tf
from pyspark.ml.feature import VectorAssembler, OneHotEncoder
from pyspark.ml.pipeline import Pipeline
from pyspark.sql import SparkSession
from tensorflow.keras import layers
from tensorflow.keras import losses
Second Jupyter cell:
def lstm_model(X_train, y_train):
# Reshapes to input neuron
inputs= keras.Input(shape = (X_train.shape[1], 1))\
#Training Layers
x_1 = layers.LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1))(inputs)
x_1 = layers.Dropout(0.2)(x_1)
x_1 = layers.LSTM(units = 50, return_sequences = True)(x_1)
x_1 = layers.Dropout(0.2)(x_1)
x_1 = layers.LSTM(units = 50, return_sequences = True)(x_1)
x_1 = layers.Dropout(0.2)(x_1)
x_1 = layers.LSTM(units = 50, return_sequences = True)(x_1)
x_1 = layers.Dropout(0.2)(x_1)
x_1 = layers.Flatten()(x_1)
# 1 output neuron for each column prediction
output = Dense(units=1)(x_1)
return losses.MeanSquaredError(y_train,output)
Third Jupyter Cell:
def dataframe_input(pandas_dataframe):
train_data = pandas_dataframe[self.column_name].values
# Reshaping to a 2D array
train_data = train_data.reshape(-1,1)
print(train_data.dtype)
print(type(train_data))
print(train_data.shape)
# Feature Scaling
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_data =scaler.fit_transform(train_data)
# Initialzing each x_train and y_train datasets for each column
X_train = []
y_train = []
# Appending scaled training data to each dataset
for i in range(self.timesteps, len(train_data)):
X_train.append(scaled_train_data[i - self.timesteps:i, 0])
y_train.append(scaled_train_data[i, 0])
# Numpy array creation, Keras requires numpy arrays for Inputs
X_train, y_train = np.array(X_train, dtype=int), np.array(y_train)
print(X_train.shape)
print(X_train.dtype)
# Reshaping to a 3D matrix (970, 30, 1)
#X_train = np.reshape(X_train, (X_train[0], X_train[1], 1))
print(X_train.shape)
return X_train, y_train
Fourth Jupyter Cell( Where Im getting the error):
# Spark Session
# In order to use APIs of SQL, HIVE, and Streaming, no need to create separate contexts as sparkSession includes all the APIs.
spark = SparkSession \
.builder \
.appName("Python Spark SQL basic example") \
.getOrCreate()
# Reading CSVto a Spark DataFrame
df = spark.read.option("inferSchema", "true").csv('"../csv_test_files/stats.csv"')
# Convert the Spark dataframe into a Pandas Dataframe
pandas_dataframe = df.select("*").toPandas()
# Get the input and ouput data for passing to the model
X_train, y_train = dataframe_input(pandas_dataframe)
Error Output:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-25-5143cc437b69> in <module>
3 spark = SparkSession \
4 .builder \
----> 5 .appName("Python Spark SQL basic example") \
6 .getOrCreate()
7
~/anaconda3/lib/python3.7/site-packages/pyspark/sql/session.py in getOrCreate(self)
171 for key, value in self._options.items():
172 sparkConf.set(key, value)
--> 173 sc = SparkContext.getOrCreate(sparkConf)
174 # This SparkContext may be an existing one.
175 for key, value in self._options.items():
~/anaconda3/lib/python3.7/site-packages/pyspark/context.py in getOrCreate(cls, conf)
365 with SparkContext._lock:
366 if SparkContext._active_spark_context is None:
--> 367 SparkContext(conf=conf or SparkConf())
368 return SparkContext._active_spark_context
369
~/anaconda3/lib/python3.7/site-packages/pyspark/context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
134 try:
135 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
--> 136 conf, jsc, profiler_cls)
137 except:
138 # If an error occurs, clean up in order to allow future SparkContext creation:
~/anaconda3/lib/python3.7/site-packages/pyspark/context.py in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
196
197 # Create the Java SparkContext through Py4J
--> 198 self._jsc = jsc or self._initialize_context(self._conf._jconf)
199 # Reset the SparkConf to the one actually used by the SparkContext in JVM.
200 self._conf = SparkConf(_jconf=self._jsc.sc().conf())
~/anaconda3/lib/python3.7/site-packages/pyspark/context.py in _initialize_context(self, jconf)
304 Initialize SparkContext in function to allow subclass specific initialization
305 """
--> 306 return self._jvm.JavaSparkContext(jconf)
307
308 #classmethod
~/anaconda3/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
1523 answer = self._gateway_client.send_command(command)
1524 return_value = get_return_value(
-> 1525 answer, self._gateway_client, None, self._fqn)
1526
1527 for temp_arg in temp_args:
~/anaconda3/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.net.BindException: Can't assign requested address: Service 'sparkDriver' failed after 16 retries (on a random free port)! Consider explicitly setting the appropriate binding address for the service 'sparkDriver' (for example spark.driver.bindAddress for SparkDriver) to the correct binding address.
at java.base/sun.nio.ch.Net.bind0(Native Method)
at java.base/sun.nio.ch.Net.bind(Net.java:461)
at java.base/sun.nio.ch.Net.bind(Net.java:453)
at java.base/sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:227)
at io.netty.channel.socket.nio.NioServerSocketChannel.doBind(NioServerSocketChannel.java:128)
at io.netty.channel.AbstractChannel$AbstractUnsafe.bind(AbstractChannel.java:558)
at io.netty.channel.DefaultChannelPipeline$HeadContext.bind(DefaultChannelPipeline.java:1283)
at io.netty.channel.AbstractChannelHandlerContext.invokeBind(AbstractChannelHandlerContext.java:501)
at io.netty.channel.AbstractChannelHandlerContext.bind(AbstractChannelHandlerContext.java:486)
at io.netty.channel.DefaultChannelPipeline.bind(DefaultChannelPipeline.java:989)
at io.netty.channel.AbstractChannel.bind(AbstractChannel.java:254)
at io.netty.bootstrap.AbstractBootstrap$2.run(AbstractBootstrap.java:364)
at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:403)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:463)
at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
at java.base/java.lang.Thread.run(Thread.java:834)

Seems like you have too many running SparkSessions. In the default configuration you can only have 16, because there are 16 retries to get a port for Spark's job overview page.
This could be because you work on a busy cluster with many users running jobs, or, e.g., because you have a lot of Jupyter notebooks with SparkSessions running.
Depending on which resource manager you use there are different ways to check how many SparkSessions are currently open.
To circumvent the problem you can also increase the number of retries to find an unused port Spark makes when creating the SparkSession. For this you have to set the config parameter spark.port.maxRetries to a larger value (see also here: https://spark.apache.org/docs/latest/configuration.html):
spark = SparkSession.builder.config('spark.port.maxRetries', 100).getOrCreate()

pyspark Py4J error using canopy :PythonAccumulatorV2([class java.lang.String, class java.lang.Integer, class java.lang.String]) does not exist

I installed canopy IDE on windows as well as python and pyspark. When executing the code of a program, there was problem of a sparK Context:
findspark.init()
conf = SparkConf().setMaster('local').setAppName('MonEssai')
sc = SparkContext.getOrCreate();
lines = sc.textFile("file:///PremiéreEssai/ file9.txt")
fun = lines.flatMap(listsGraph)
results =fun.collect()
for result1 in results:
if(result1):
if ((result1[0].strip().startswith("sub_"))|(result1[0].strip().startswith("start"))):
for k in range(0,len(result1)):
if result1[k] not in Loc:
Loc.append(result1[k])
else :
for j in range(0,len(result1)):
if result1[j] not in Ext:
Ext.append(result1[j])
result3 = sc.parallelize(Ext)
ExtSimilarity= result3.map(MatchExt).filter(lambda x: x != None).collect()
#print(ExtSimilarity)
#print(Loc)
result3 = sc.parallelize(Loc)
result9= result3.map(pos_debut)
result11= result9.map(opcode)
VectOpcode= result11.flatMapValues(f).flatMap(lambda X: [((X[0],len(X[1])))]).groupByKey().mapValues(list)
VectOpcode2 = VectOpcode.collect()
And I got the following error:
Py4JError: An error occurred while calling
None.org.apache.spark.api.python.PythonAccumulatorV2. Trace:
py4j.Py4JException: Constructor
org.apache.spark.api.python.PythonAccumulatorV2([class
java.lang.String, class java.lang.Integer, class java.lang.String])
does not exist
Py4JErrorTraceback (most recent call last)
C:\Premi�reEssai\maman.py in <module>()
818 findspark.init()
819 conf = SparkConf().setMaster('local').setAppName('MonEssai')
--> 820 sc = SparkContext.getOrCreate();
821 lines = sc.textFile("file:///PremiéreEssai/ file9.txt")
822 fun = lines.flatMap(listsGraph)
C:\Users\hene\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\pyspark\context.pyc in getOrCreate(cls, conf)
347 with SparkContext._lock:
348 if SparkContext._active_spark_context is None:
--> 349 SparkContext(conf=conf or SparkConf())
350 return SparkContext._active_spark_context
351
C:\Users\hene\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\pyspark\context.pyc in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
116 try:
117 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
--> 118 conf, jsc, profiler_cls)
119 except:
120 # If an error occurs, clean up in order to allow future SparkContext creation:
C:\Users\hene\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\pyspark\context.pyc in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
187 self._accumulatorServer = accumulators._start_update_server(auth_token)
188 (host, port) = self._accumulatorServer.server_address
--> 189 self._javaAccumulator = self._jvm.PythonAccumulatorV2(host, port, auth_token)
190 self._jsc.sc().register(self._javaAccumulator)
191
C:\Users\hene\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\py4j\java_gateway.pyc in __call__(self, *args)
1523 answer = self._gateway_client.send_command(command)
1524 return_value = get_return_value(
-> 1525 answer, self._gateway_client, None, self._fqn)
1526
1527 for temp_arg in temp_args:
C:\Users\hene\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\py4j\protocol.pyc in get_return_value(answer, gateway_client, target_id, name)
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
--> 332 format(target_id, ".", name, value))
333 else:
334 raise Py4JError(
Py4JError: An error occurred while calling None.org.apache.spark.api.python.PythonAccumulatorV2. Trace:
py4j.Py4JException: Constructor org.apache.spark.api.python.PythonAccumulatorV2([class java.lang.String, class java.lang.Integer, class java.lang.String]) does not exist
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:179)
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:196)
at py4j.Gateway.invoke(Gateway.java:237)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
So, I'm stuck in this what should I do?

While my setup is different, I just had the exact same error an hour ago. The problem I had is pyspark version is differnt from the spark version. You can run pip list to check your pyspark version.

Same error was coming for me also. Then I have instlled Spark with 2.4.4 as same as PySpark version 2.4.4.
Issue resolved

Setting a environment variable called PYTHONPATH = {hadoop_path}/python would help

Got Py4JError when setting SparkContext

When I tried to set up SparkContext with SparkConf, I ran into Py4JError.
My code is as follows:
from pyspark import SparkContext, SparkConf
conf = SparkConf().setMaster("local").setAppName("Reference")
sc = SparkContext(conf = conf)
And the error is like this:
Py4JError Traceback (most recent call `last)`
<ipython-input-5-313e7d9fd8ee> in <module>()
1 from pyspark import SparkContext, SparkConf
2 conf = SparkConf().setMaster("local").setAppName("Reference")
--> 3 sc = SparkContext(conf = conf)
4 #ref = sc.textFile("reference.csv").map(lambda line: line.split(","))
5 #ref_res = ref.saveAsTextFile("ref.txt")
~/anaconda3/lib/python3.6/site-packages/pyspark/context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
116 try:
117 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
--> 118 conf, jsc, profiler_cls)
119 except:
120 # If an error occurs, clean up in order to allow future SparkContext creation:
~/anaconda3/lib/python3.6/site-packages/pyspark/context.py in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
193 # data via a socket.
194 # scala's mangled names w/ $ in them require special treatment.
--> 195 self._encryption_enabled = self._jvm.PythonUtils.getEncryptionEnabled(self._jsc)
196
197 self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
~/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py in __getattr__(self, name)
1514 else:
1515 raise Py4JError(
-> 1516 "{0}.{1} does not exist in the JVM".format(self._fqn, name))
1517
1518 def _get_args(self, args):
Py4JError: org.apache.spark.api.python.PythonUtils.getEncryptionEnabled does not exist in the JVM
Any help or advice please.

ImportError, No module named rdbms_googleapi on GAE python SDK when using the remote shell

We're running a Django application on AppEngine (python).
When making a query using the remote shell so the sql driver points to CloudSQL, GAE is throwing an exception:
ImportError: No module named rdbms_googleapi.
This problem affects deployed apps when using the remote shell and attempting to run SQL queries.
We tested SDKs versions 1.9.36 and 1.9.37 under Mac OS X 10.11.4
You can reproduce this bugs with the following steps:
Have a GAE application with Django.
Use the remote shell to connect to a running instance of the application.
The database engine in use is google.appengine.ext.django.backends.rdbms.
Query the database.
The expected output is the query result, however we get:
ImportError: No module named rdbms_googleapi
This is the full traceback of the exception:
In [2]: MyModel.objects.get(id=1)
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-2-d1e136dd4ed5> in <module>()
----> 1 MyModel.objects.get(id=1)
/usr/local/google_appengine/lib/django-1.5/django/db/models/manager.py in get(self, *args, **kwargs)
141
142 def get(self, *args, **kwargs):
--> 143 return self.get_query_set().get(*args, **kwargs)
144
145 def get_or_create(self, **kwargs):
/usr/local/google_appengine/lib/django-1.5/django/db/models/query.py in get(self, *args, **kwargs)
396 if self.query.can_filter():
397 clone = clone.order_by()
--> 398 num = len(clone)
399 if num == 1:
400 return clone._result_cache[0]
/usr/local/google_appengine/lib/django-1.5/django/db/models/query.py in __len__(self)
104 self._result_cache = list(self._iter)
105 else:
--> 106 self._result_cache = list(self.iterator())
107 elif self._iter:
108 self._result_cache.extend(self._iter)
/usr/local/google_appengine/lib/django-1.5/django/db/models/query.py in iterator(self)
315 klass_info = get_klass_info(model, max_depth=max_depth,
316 requested=requested, only_load=only_load)
--> 317 for row in compiler.results_iter():
318 if fill_cache:
319 obj, _ = get_cached_row(row, index_start, db, klass_info,
/usr/local/google_appengine/lib/django-1.5/django/db/models/sql/compiler.py in results_iter(self)
773 if self.query.select_for_update and transaction.is_managed(self.using):
774 transaction.set_dirty(self.using)
--> 775 for rows in self.execute_sql(MULTI):
776 for row in rows:
777 if has_aggregate_select:
/usr/local/google_appengine/lib/django-1.5/django/db/models/sql/compiler.py in execute_sql(self, result_type)
843 return
844
--> 845 cursor = self.connection.cursor()
846 cursor.execute(sql, params)
847
/usr/local/google_appengine/lib/django-1.5/django/db/backends/__init__.py in cursor(self)
324 cursor = self.make_debug_cursor(self._cursor())
325 else:
--> 326 cursor = util.CursorWrapper(self._cursor(), self)
327 return cursor
328
/usr/local/google_appengine/google/storage/speckle/python/django/backend/base.pyc in _cursor(self)
274 "You must specify a '%s' for database '%s'" %
275 (settings_key, self.alias))
--> 276 self.connection = Connect(**kwargs)
277 encoders = {safestring.SafeUnicode: self.connection.encoders[unicode],
278 safestring.SafeString: self.connection.encoders[str]}
/usr/local/google_appengine/google/storage/speckle/python/django/backend/base.pyc in Connect(driver_name, oauth2_refresh_token, **kwargs)
165 found in storage and no oauth2_refresh_token was given.
166 """
--> 167 driver = _GetDriver(driver_name)
168 server_software = os.getenv('SERVER_SOFTWARE', '').split('/')[0]
169 if (server_software in (DEV_SERVER_SOFTWARE, PROD_SERVER_SOFTWARE) and
/usr/local/google_appengine/google/storage/speckle/python/django/backend/base.pyc in _GetDriver(driver_name)
142 else:
143 driver_name = base_pkg_path + 'rdbms_googleapi'
--> 144 __import__(driver_name)
145 return sys.modules[driver_name]
146
ImportError: No module named rdbms_googleapi
Has anyone experienced something similar? We believe the SDK is not installing all the required files.
It's worth mentioning that previous versions of the AppEngine python SDK worked correctly.
After messing around with the SDK we noticed that there is a missing file compared to previous versions.
We copied the file at https://chromium.googlesource.com/external/googleappengine/python/+/master/google/storage/speckle/python/api/rdbms_googleapi.py
into our /usr/local/google_appengine/google/storage/speckle/python/api directory and we resolved the issue.
This indicates that the GAE SDK installer is missing that file.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Timeout during .getOrCreate() in spark session pyspark - python

Related

Accessing dockerized neo4j using neo4j vs py2neo

Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext

pyspark Py4J error using canopy :PythonAccumulatorV2([class java.lang.String, class java.lang.Integer, class java.lang.String]) does not exist

Got Py4JError when setting SparkContext

ImportError, No module named rdbms_googleapi on GAE python SDK when using the remote shell

Categories

Resources