Tornado is throwing stream closed error in chalice python - python

I am trying out AWS Neptune for the first time using Chalice.
This is the entire error
Traceback (most recent call last):
File "/var/task/chalice/app.py", line 1104, in _get_view_function_response
response = view_function(**function_args)
File "/var/task/app.py", line 44, in getPosts
raise e
File "/var/task/app.py", line 37, in getPosts
result = g.V().has('name', 'test1').toList()
File "/var/task/gremlin_python/process/traversal.py", line 58, in toList
return list(iter(self))
File "/var/task/gremlin_python/process/traversal.py", line 48, in __next__
self.traversal_strategies.apply_strategies(self)
File "/var/task/gremlin_python/process/traversal.py", line 573, in apply_strategies
traversal_strategy.apply(traversal)
File "/var/task/gremlin_python/driver/remote_connection.py", line 149, in apply
remote_traversal = self.remote_connection.submit(traversal.bytecode)
File "/var/task/gremlin_python/driver/driver_remote_connection.py", line 55, in submit
result_set = self._client.submit(bytecode)
File "/var/task/gremlin_python/driver/client.py", line 111, in submit
return self.submitAsync(message, bindings=bindings).result()
File "/var/task/gremlin_python/driver/client.py", line 127, in submitAsync
return conn.write(message)
File "/var/task/gremlin_python/driver/connection.py", line 55, in write
self.connect()
File "/var/task/gremlin_python/driver/connection.py", line 45, in connect
self._transport.connect(self._url, self._headers)
File "/var/task/gremlin_python/driver/tornado/transport.py", line 36, in connect
lambda: websocket.websocket_connect(url))
File "/var/task/tornado/ioloop.py", line 576, in run_sync
return future_cell[0].result()
tornado.simple_httpclient.HTTPStreamClosedError: Stream closed
and here is my code
import logging
from chalice import Chalice, BadRequestError, NotFoundError
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *
from gremlin_python.process.traversal import T, P, Operator
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from datetime import datetime
app = Chalice(app_name='chalice-neptune')
app.debug = True
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)
def setup_graph():
try:
graph = Graph()
connstring = 'ws://NEPTUNE-ENDPOINT-HERE:8182/gremlin'
g = graph.traversal().withRemote(DriverRemoteConnection(connstring, 'g'))
logging.info('Connected to Neptune')
except Exception as e:
logging.error(e, exc_info = True)
raise BadRequestError("Could not connect to Neptune")
return g
#app.route('/getPosts')
def getPosts():
g = setup_graph()
try:
result = g.V().has('name', 'test1').toList()
response = {
'status_code': 200,
'data': result
}
except Exception as e:
raise e
return response
Any one who have tried this?
I have followed the example found in this bucket gremlin-python-example
I know I have not missed anything from the example but it is still throwing stream closed error.

Apparently the only thing I changed was my connection string and it is now working fine.
connstring = 'wss://NEPTUNE-ENDPOINT-HERE:8182/gremlin'
I changed it from ws to wss.
As to the difference between the two you can refer to this answer
Difference between ws and wss?

Related

dom.getElementsByTagNameNS("urn:oasis:names:tc:SAML:1.0:assertion", 'Assertion')[0].toxml() IndexError: list index out of range

We have used "Office365-REST-Python-Client 2.3.11" library to upload file on sharepoint.
Suddenly code stopped working and started giving below issue.
code:
import json
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
site_url = "https://{your-tenant-prefix}.sharepoint.com"
ctx = ClientContext(site_url).with_credentials(UserCredential("{username}", "{password}"))
request = RequestOptions("{0}/_api/web/".format(site_url))
response = ctx.execute_request_direct(request)
json = json.loads(response.content)
web_title = json['d']['Title']
print("Web title: {0}".format(web_title))
Error:
Traceback (most recent call last):
File "test_upload.py", line 81, in <module>
response = ctx.execute_request_direct(request)
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/client_runtime_context.py", line 131, in execute_request_direct
return self.pending_request().execute_request_direct(self._normalize_request(request))
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/odata/odata_request.py", line 36, in execute_request_direct
return super(ODataRequest, self).execute_request_direct(request)
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/client_request.py", line 91, in execute_request_direct
self.context.authenticate_request(request)
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/sharepoint/client_context.py", line 230, in authenticate_request
self._auth_context.authenticate_request(request)
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/auth/authentication_context.py", line 89, in authenticate_request
self._provider.authenticate_request(request)
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/auth/providers/saml_token_provider.py", line 77, in authenticate_request
self.ensure_authentication_cookie()
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/auth/providers/saml_token_provider.py", line 84, in ensure_authentication_cookie
self._cached_auth_cookies = self.get_authentication_cookie()
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/auth/providers/saml_token_provider.py", line 97, in get_authentication_cookie
token = self._acquire_service_token_from_adfs(user_realm.STSAuthUrl)
File "/home/ubuntu/atlas/national_kpi_table/venv/lib/python3.8/site-packages/office365/runtime/auth/providers/saml_token_provider.py", line 140, in _acquire_service_token_from_adfs
assertion_node = dom.getElementsByTagNameNS("urn:oasis:names:tc:SAML:1.0:assertion", 'Assertion')[0].toxml()
IndexError: list index out of range
Any help appreciated.

Multiprocessing Gremlin "OSError: [Errno 9] Bad file descriptor"

I'm trying to compute a feature for every vertex in my graph using gremlinpython. It's too slow to sequentially iterate over every single vertex. While batching could help to provide a speedup, I thought first I'd try parallizing the query.
Broadly, 1. get the full set of vertices, 2. split them over num_cores=x, 3. iterate over each sub-vertex set in parallel.
But I'm getting the error "OSError: [Errno 9] Bad file descriptor". The below code is my latest attempt at solving this.
import multiprocessing
from gremlin_python.structure.graph import Graph
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.process.traversal import lt
def create_traversal_object():
graph = Graph()
g = graph.traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin', 'g'))
return g
g = create_traversal_object()
num_cores = 1
vertex_lsts = np.array_split(g.V().limit(30).id().toList(), num_cores)
class FeatureClass():
def __init__(self, g, vertex_list):
self.g = g
self.vertex_list = vertex_list
def orchestrator(self):
for vertex_id in self.vertex_list:
self.compute_number_of_names(float(vertex_id))
def get_names(self, vertex_id):
return self.g.V(vertex_id).inE().values('benef_nm').dedup().toList()
class Simulation(multiprocessing.Process):
def __init__(self, id, worker, *args, **kwargs):
# must call this before anything else
multiprocessing.Process.__init__(self)
self.id = id
self.worker = worker
self.args = args
self.kwargs = kwargs
sys.stdout.write('[%d] created\n' % (self.id))
def run(self):
sys.stdout.write('[%d] running ... process id: %s\n' % (self.id, os.getpid()))
self.worker.orchestrator()
sys.stdout.write('[%d] completed\n' % (self.id))
list_of_objects = [FeatureClass(create_traversal_object(), vertex_lst) for vertex_lst in vertex_lsts]
list_of_sim = [Simulation(id=k, worker=obj) for k, obj in enumerate(list_of_objects)]
for sim in list_of_sim:
sim.start()
Here's the full stack-trace, looks like it's an issue with tornado, which gremlinpython uses.
Process Simulation-1:
Traceback (most recent call last):
File "/Users/greatora/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "<ipython-input-4-b3177477fabe>", line 42, in run
self.worker.orchestrator()
File "<ipython-input-4-b3177477fabe>", line 23, in orchestrator
self.compute_number_of_names(float(vertex_id))
File "<ipython-input-4-b3177477fabe>", line 26, in compute_number_of_names
print(self.g.V(vertex_id).inE().values('benef_nm').dedup().count().next())
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/process/traversal.py", line 88, in next
return self.__next__()
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/process/traversal.py", line 47, in __next__
self.traversal_strategies.apply_strategies(self)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/process/traversal.py", line 512, in apply_strategies
traversal_strategy.apply(traversal)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/remote_connection.py", line 148, in apply
remote_traversal = self.remote_connection.submit(traversal.bytecode)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/driver_remote_connection.py", line 53, in submit
result_set = self._client.submit(bytecode)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/client.py", line 108, in submit
return self.submitAsync(message, bindings=bindings).result()
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/_base.py", line 432, in result
return self.__get_result()
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/connection.py", line 63, in cb
f.result()
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/_base.py", line 425, in result
return self.__get_result()
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/thread.py", line 56, in run
result = self.fn(*self.args, **self.kwargs)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/protocol.py", line 74, in write
self._transport.write(message)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/tornado/transport.py", line 37, in write
lambda: self._ws.write_message(message, binary=True))
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 453, in run_sync
self.start()
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 863, in start
event_pairs = self._impl.poll(poll_timeout)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/tornado/platform/kqueue.py", line 66, in poll
kevents = self._kqueue.control(None, 1000, timeout)
OSError: [Errno 9] Bad file descriptor
I'm using Pythton3.7, gremlinpython==3.4.6, MacOS.
I'm still not entirely sure what the issue was, but this works.
import multiprocessing
from multiprocessing import Pool
import itertools
def graph_function(vertex_id_list):
graph = Graph()
g = graph.traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin', 'g'))
res = []
for vertex_id in vertex_id_list:
res.append(g.V(str(vertex_id)).inE().values('benef_nm').dedup().toList())
return res
num_cores = 4
vertex_lst = g.V().limit(30).id().toList()
vertex_lsts = np.array_split(vertex_lst, num_cores)
with Pool(processes=num_cores) as pool:
results = pool.map(graph_function, vertex_lsts)
results = [*itertools.chain.from_iterable(results)]

Bulk index to elasticsearch using python

I have following code which indexes data to elasticsearch using python,
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import requests
from requests.auth import AuthBase
requests.packages.urllib3.disable_warnings()
class TokenAuth(AuthBase):
def __init__(self, token):
self.token = token
def __call__(self, r):
r.headers['Authorization :Bearer'] = f'{self.token}'
return r
es = Elasticsearch('https://localhost:9200/user/type',ca_certs=False,verify_certs=False,auth=TokenAuth(''))
#requests.get('https://httpbin.org/get', auth=TokenAuth('12345abcde-token'))
res = helpers.bulk(es, "ldif2.json", chunk_size=1, request_timeout=200)
It follows token based autheentication , but whan i run this progam i get below error message ,how do i solve this.
Traceback (most recent call last):
File "bulk_index.py", line 20, in <module>
res = helpers.bulk(es, "ldif2.json", chunk_size=1, request_timeout=200)
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\helpers\actions.py", line 300, in bulk
for ok, item in streaming_bulk(client, actions, *args, **kwargs):
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\helpers\actions.py", line 230, in streaming_bulk
**kwargs
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\helpers\actions.py", line 116, in _process_bulk_chunk
raise e
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\helpers\actions.py", line 112, in _process_bulk_chunk
resp = client.bulk("\n".join(bulk_actions) + "\n", *args, **kwargs)
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\client\utils.py", line 84, in _wrapped
return func(*args, params=params, **kwargs)
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\client\__init__.py", line 1498, in bulk
headers={"content-type": "application/x-ndjson"},
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\transport.py", line 353, in perform_request
timeout=timeout,
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\connection\http_urllib3.py", line 239, in perform_request
self._raise_error(response.status, raw_data)
File "C:\Users\mkumaru\AppData\Local\Programs\Python\Python37\lib\site-packages\elasticsearch\connection\base.py", line 168, in _raise_error
status_code, error_message, additional_info
elasticsearch.exceptions.AuthenticationException: AuthenticationException(401, 'Access denied')```
I think es should be like this.
es = Elasticsearch("http://127.0.0.1:9200", http_auth=('user', 'passwd'))

python-instagram OAuthPermissionsException

So I'm trying to get Instagram photos that fit certain parameters and I'm getting the following stack:
Traceback (most recent call last):
File "instagram_find_shows.py", line 83, in <module>
if __name__ == "__main__": main()
File "instagram_find_shows.py", line 48, in main
get_instagram_posts(show_name, show_time, coordinates)
File "instagram_find_shows.py", line 73, in get_instagram_posts
str(coordinates[1]), min_time, max_time)
File "C:\Users\User Name\Anaconda3\lib\site-packages\instagram\bind.py", line 197, in _call
return method.execute()
File "C:\Users\User Name\Anaconda3\lib\site-packages\instagram\bind.py", line 189, in execute
content, next = self._do_api_request(url, method, body, headers)
File "C:\Users\User Name\Anaconda3\lib\site-packages\instagram\bind.py", line 163, in _do_api_request
raise InstagramAPIError(status_code, content_obj['meta']['error_type'], content_obj['meta']['error_message'])
instagram.bind.InstagramAPIError: (400) OAuthPermissionsException-This request requires scope=public_content, but this access token is not authorized with this scope. The user must re-authorize your application with scope=public_content to be granted this permissions.
The code is as follows:
def get_instagram_posts(name, time, coordinates):
max_time_dt = time + timedelta(hours=3)
min_time_dt = time - timedelta(hours=1)
max_time = str(calendar.timegm(max_time_dt.timetuple()))
min_time = str(calendar.timegm(min_time_dt.timetuple()))
dist_rad_str = str(insta_dist_radius_m)
count_str = str(insta_count)
api = InstagramAPI(access_token=insta_access_token,
client_secret=insta_client_secret)
r = api.media_search(name, count_str, str(coordinates[0]),
str(coordinates[1]), min_time, max_time)
photos = []
for media in r:
photos.append('<img src="%s"/>' % media.images['thumbnail'].url)
print(photos[0])
I can't figure out what to do... Literally I'm just trying to do a simple test, not trying to cripple their API. Is there any way to do this within Instagram's parameters? Thanks so much!
Fixed by going to the following URL in the browser:
https://www.instagram.com/oauth/authorize?client_id=[CLIENT_ID]&redirect_uri=[REDIRECT_URI]&response_type=code&scope=basic+public_content+follower_list+comments+relationships+likes

Python - Cannot connect Python to pymongo

I am using Python 3.5.1 this is errors:
Traceback (most recent call last):
File "C:\Users\KORKUSUZ\Desktop\twitter-realtime-heatmap-master\tstream.py", line 55, in <module>
streamer.filter(track = setTerms)
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\tweepy-3.6.0-py3.5.egg\tweepy\streaming.py", line 447, in filter
self._start(async)
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\tweepy-3.6.0-py3.5.egg\tweepy\streaming.py", line 361, in _start
self._run()
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\tweepy-3.6.0-py3.5.egg\tweepy\streaming.py", line 294, in _run
raise exception
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\tweepy-3.6.0-py3.5.egg\tweepy\streaming.py", line 263, in _run
self._read_loop(resp)
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\tweepy-3.6.0-py3.5.egg\tweepy\streaming.py", line 324, in _read_loop
self._data(next_status_obj)
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\tweepy-3.6.0-py3.5.egg\tweepy\streaming.py", line 297, in _data
if self.listener.on_data(data) is False:
File "C:\Users\KORKUSUZ\Desktop\twitter-realtime-heatmap-master\tstream.py", line 48, in on_data
col.insert(json.loads(data))
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\pymongo-3.2.2-py3.5-win-amd64.egg\pymongo\collection.py", line 2203, in insert
with self._socket_for_writes() as sock_info:
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\contextlib.py", line 59, in __enter__
return next(self.gen)
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\pymongo-3.2.2-py3.5-win-amd64.egg\pymongo\mongo_client.py", line 716, in _get_socket
server = self._get_topology().select_server(selector)
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\pymongo-3.2.2-py3.5-win-amd64.egg\pymongo\topology.py", line 142, in select_server
address))
File "C:\Users\KORKUSUZ\AppData\Local\Programs\Python\Python35\lib\site-packages\pymongo-3.2.2-py3.5-win-amd64.egg\pymongo\topology.py", line 118, in select_servers
self._error_message(selector))
pymongo.errors.ServerSelectionTimeoutError: localhost:27017: [WinError 10061] Failed to connect to the target machine actively refused because
## This is my Code ##
import tweepy
import json
from tweepy.streaming import StreamListener
from tweepy import Stream
from bson import json_util
from tweepy.utils import import_simplejson
try:
from pymongo.connection import Connection
except ImportError as e:
from pymongo import MongoClient as Connection
json = import_simplejson()
mongocon = Connection()
db = mongocon.tstream
col = db.tweets_tail
consumer_key="FfRhUzvXKlnS9sDWfGZqxECzQ"
consumer_secret="uhuiApn3IyzXWw34kvl8ia1DzgAaPyk2xuxXG3HtZgEIbFrWSx"
access_token="185166166-o4xUFWdjoL84K1MscTot4SfH9DZnkG5maYbhZZ6Z"
access_token_secret="2vrydY3ogq5vU8Mkqp3CAqeMmlDuRStU6iSDgNbPxDkdS"
auth1 = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth1.set_access_token(access_token, access_token_secret)
class StreamListener(tweepy.StreamListener):
mongocon = Connection()
db = mongocon.tstream
col = db.tweets
json = import_simplejson()
def on_status(self, tweet):
print ('Ran on_status')
def on_error(self, status_code):
return False
def on_data(self, data):
if data[0].isdigit():
pass
else:
col.insert(json.loads(data))
print(json.loads(data))
l = StreamListener()
streamer = tweepy.Stream(auth=auth1, listener=l)
setTerms = ["bigdata", "devops", "hadoop", "twitter"]
streamer.filter(track = setTerms)

Categories

Resources