OOP python: Where to instantiate Cassandra and elasticsearch cluster? - python

I have an object which interact a lot with elasticsearch and cassandra. But I don't know where to instantiate my Cassandra and elasticsearch session. Should I put it in my "code", and pass the session into a parameters of my function like that:
cassandra_cluster = Cluster()
session = cassandra_cluster.connect()
es = Elasticsearch()
class Article:
document_type = "cnn_article"
def __init__(self):
self.author = ""
self.url = ""
...
#classmethod
def from_crawl(cls, url):
obj = cls()
# Launch a crawler and fill the fields and return the object
#classmethod
def from_elasticseacrh(cls, elastic_search_document):
obj = cls()
# Read the response from elasticsearch and return the object
def save_to_cassandra(self):
# Save an object into cassandra
session.execute(.....)
def save_to_elasticsearch(self, index_name, es):
# Save an object into elasticsearch
es.index(index=index_name, ...)
...
article = Article.from_crawl("http://cnn.com/article/blabla")
article.save_to_cassandra(session)
article.save_to_elasticsearch("cnn", es)
Or should I put the instantiation of my cassandra and elasticsearch session as instance variables like that:
class Article:
cassandra_cluster = Cluster()
session = cassandra_cluster.connect()
es = Elasticsearch()
document_type = "cnn_article"
def __init__(self):
self.author = ""
self.url = ""
...
#classmethod
def from_crawl(cls, url):
obj = cls()
# Launch a crawler and fill the fields and return the object
#classmethod
def from_elasticseacrh(cls, elastic_search_document):
obj = cls()
# Read the response from elasticsearch and return the object
def save_to_cassandra(self):
# Save an object into cassandra
session.execute(.....)
def save_to_elasticsearch(self):
# Save an object into elasticsearch
es.index(....)
...
article = Article.from_crawl("http://cnn.com/article/blabla")
article.save_to_cassandra()
article.save_to_elasticsearch()

Based on their documentation and some of the examples here: http://www.datastax.com/dev/blog/datastax-python-driver-multiprocessing-example-for-improved-bulk-data-throughput
I would go with your second approach. They mention that the session is only a context manager for shutting down connections, and their Query managers show them as being class attributes.
I think both would work, but if you want to multiprocess it, it may be marginally easier if you do it with the latter approach.

Related

How to delete a document in MongoDB

I am trying to create a delete method in order to delete a document that has the key:"name" and the value:"Rhonda". Whenever I execute my current code, I get an AttributeError saying:"'AnimalShelter' object has no attribute 'delete'". How do I get the method to return the deleted document's JSON contents? Here is my code:
testing_script.ipynb
from animal_shelter import AnimalShelter
# now need to create the object from the class
shelter = AnimalShelter("aacuser","Superman")
data = {"age_upon_outcome":"2 years","animal_type":"Dog","breed":"Dachshund","color":"Black and tan","name":"Rhonda","outcome_subtype":"Partner","outcome_type":"Adopt","sex_upon_outcome":"Female"}
new_values = {"$set": {"age_upon_outcome":"3 years"}}
# if shelter.create(data):
# print("Animal added")
# else:
# print("Failed to add animal")
# Calls the read function
# shelter.read(data)
# Calls the update function
# shelter.update(data, new_values)
# Calls the delete function
shelter.delete(data)
output
AttributeError Traceback (most recent call last)
<ipython-input-5-60b1d887dfb8> in <module>
17
18 # Calls the delete function
---> 19 shelter.delete(data)
20
AttributeError: 'AnimalShelter' object has no attribute 'delete'
animal_shelter.py
from pymongo import MongoClient
from bson.objectid import ObjectId
class AnimalShelter(object):
""" CRUD operations for Animal collection in MongoDB """
def __init__(self,username,password):
# Initializing the MongoClient. This helps to
# access the MongoDB databases and collections.
# init to connect to mongodb without authentication
self.client = MongoClient('mongodb://localhost:55996')
# init connect to mongodb with authentication
# self.client = MongoClient('mongodb://%s:%s#localhost:55996/?authMechanism=DEFAULT&authSource=AAC'%(username, password))
self.database = self.client['AAC']
# Complete this create method to implement the C in CRUD.
def create(self, data):
if data is not None:
self.database.animals.insert(data) # data should be dictionary
return True # Tells whether the create function ran successfully
else:
raise Exception("Nothing to save ...")
# Create method to implement the R in CRUD.
def read(self, data):
return self.database.animals.find_one(data) #returns only one
# Update method to implement the U in CRUD.
def update(self, data, new_values):
if self.database.animals.count(data):
self.database.animals.update(data, new_values)
return self.database.animals.find({"age_upon_outcome":"3 years"})
else:
raise Exception("Nothing to update ...")
# Delete method to implement the D in CRUD
def delete(self, data)
result = self.database.animals.find_one_and_delete(data)
# print the _id key only if the result is not None
if("_id" in result):
print("find_one_and_delete ID:",result["_id"])
else:
print("Nothing to delete")
Problem is that functions that you are defining are outside the class. You have to put indentation on functions in class AnimalShelter
Also as pointed out in comment you are missing : in delete
Updated animal_sheltor.py
from pymongo import MongoClient
from bson.objectid import ObjectId
class AnimalShelter(object):
""" CRUD operations for Animal collection in MongoDB """
def __init__(self,username,password):
# Initializing the MongoClient. This helps to
# access the MongoDB databases and collections.
# init to connect to mongodb without authentication
self.client = MongoClient('mongodb://localhost:55996')
# init connect to mongodb with authentication
# self.client = MongoClient('mongodb://%s:%s#localhost:55996/?authMechanism=DEFAULT&authSource=AAC'%(username, password))
self.database = self.client['AAC']
# Complete this create method to implement the C in CRUD.
def create(self, data):
if data is not None:
self.database.animals.insert(data) # data should be dictionary
return True # Tells whether the create function ran successfully
else:
raise Exception("Nothing to save ...")
# Create method to implement the R in CRUD.
def read(self, data):
return self.database.animals.find_one(data) #returns only one
# Update method to implement the U in CRUD.
def update(self, data, new_values):
if self.database.animals.count(data):
self.database.animals.update(data, new_values)
return self.database.animals.find({"age_upon_outcome":"3 years"})
else:
raise Exception("Nothing to update ...")
# Delete method to implement the D in CRUD
def delete(self, data):
result = self.database.animals.find_one_and_delete(data)
# print the _id key only if the result is not None
if("_id" in result):
print("find_one_and_delete ID:",result["_id"])
else:
print("Nothing to delete")

Pytest Mock AWS SecurityManager

my project has a file called config.py which has, among others, the following code:
class Secret(Enum):
DATABASE_A = 'name_of_secret_database_A'
DATABASE_A = 'name_of_secret_database_A'
def secret(self):
if self.value:
return get_secret(self.value)
return {}
def get_secret(secret_name):
session = Session()
client = session.client(
service_name='secretsmanager',
region_name='us-east-1',
)
secret_value = client.get_secret_value(SecretId=secret_name)
return loads(secret_value.get('SecretString', "{}"))
I need to somehow mock get_secret in tests with pytest for all enum calls, for example Secret.DATABASE_A.secret ()
You can use monkeypatch to override the behaviour of get_secret(). I have made the get_secret() method a static method of the Secret class, but you can make it part of any module you want and import it as well. Just make sure you change in in the monkeypatch.setattr() call as well.
import pytest
from enum import Enum
class Secret(Enum):
DATABASE_A = 'name_of_secret_database_A'
DATABASE_B = 'name_of_secret_database_B'
def secret(self):
if self.value:
return Secret.get_secret(self.value)
return {}
#staticmethod
def get_secret(secret_name):
session = Session()
client = session.client(
service_name='secretsmanager',
region_name='us-east-1',
)
secret_value = client.get_secret_value(SecretId=secret_name)
return loads(secret_value.get('SecretString', "{}"))
def test_secret_method(monkeypatch):
def get_secret(secret_name):
return "supersecret"
monkeypatch.setattr(Secret, "get_secret", get_secret)
s = Secret.DATABASE_A
assert s.secret() == "supersecret"
This returns into 1 passed test.
What is happening here is, that I created a function get_secret() in my test_secret_method as well, and then overwrite the Secret.get_secret() with that new method. Now, you can use the Secret class in your test_method and be sure what the 'get_secret()' method will return without actually running the original code.

Inherited Methods How do I make super methods available?

I have a class (which works [or appears to]) to run an sql query. Code is below. If I inherit from object and do not use super, it works fine.
I am learning about inserting methods from super classes and so I thought that I would make my class header look like this
class database_connector(Connection)
and incorporate a super call in init like this
super().__init__()
However, I get
TypeError: function() argument 1 must be code, not str
I tried
super(database_connector, self).__init__()
after reading some other stuff in StackOverFlow but I now still get
TypeError: function() argument 1 must be code, not str
I am anticipating that this work will allow me to call more methods from pymysql.Connection.
Here is my class
from pymysql import Connection
# set up default values for database connector
class database_connector(Connection):
def __init__(self, host=None, db=None, user=None, passwd=None):
super(database_connector, self).__init__()
if host is None:
self.host = "mysql_host_ip"
else:
self.host = host
if db is None:
self.db = "fred_db"
else:
self.db = db
if user is None:
self.user = "fred"
else:
self.user = user
if passwd is None:
self.passwd = "fredspasswd"
else:
self.passwd = passwd
self.this_database = (Connection(host=self.host,
user=self.user,
passwd=self.passwd,
db=self.db))
self.cur = self.this_database.cursor()
def my_sql_run_this_sql(self, sql_to_run=None):
if sql_to_run is None:
data = self.cur.execute("SELECT * FROM person")
else:
data = self.cur.execute(sql_to_run)
data = []
for row in self.cur.fetchall():
data.append(row)
self.this_database.close()
return data

ContextDecorator does not restore mocked objects upon __exit__

On a project I am working on, we are using stripe but we need to mock the API calls in our tests.
I wrote a class inheriting from contextlib.ContextDecorator so I can use it as a decorator and a context manager. Here is the code:
class mock_stripe(ContextDecorator):
"""Mock Stripe API calls.
This class will be either working as a function decorator or as a
context manager that will mock calls to the Stripe API, so there won't
be any HTTP Request nor possible flakyness if the Stripe test API is
down.
"""
def __init__(self, customer=None, source=None,
retrieved_charge=None):
"""Initialize the context with objects to return upon mocked calls.
If none of the arguments are provided, they will receive default
values.
Args:
customer: The stripe customer to return.
source: The stripe source to return.
charge: The stripe charge to return.
retrieved_charge: The stripe retrieved charge to return.
"""
self.customer = customer
self.source = source
self.retrieved_charge = retrieved_charge
self.pending_charge = MagicMock()
self.pending_charge.status = 'pending'
self.charge_state = 0
if self.customer is None:
self._create_default_customer()
if self.source is None:
self._create_default_source()
if self.retrieved_charge is None:
self._create_default_retrieved_charge()
def _create_default_customer(self):
self.customer = MagicMock()
self.customer.stripe_customer_token = "token"
self.customer.get_full_name = lambda: "Georges Abitbol"
self.customer.email = "georges.abitbol#example.com"
self.customer.number = "0123456789"
self.customer.customer_id = "1"
self.customer.id = "1"
self.customer.source.id = "2"
def _create_default_source(self):
self.source = MagicMock()
self.source.id = "2"
self.source.customer_id = "1"
self.source.sepa_debit.mandate_reference = "3"
self.source.sepa_debit.mandate_url = "http://foo.bar/baz"
def _create_default_retrieved_charge(self):
self.retrieved_charge = MagicMock()
self.retrieved_charge.status = 'succeeded'
def __enter__(self):
self.exit_stack = ExitStack()
customer_create = self.exit_stack.enter_context(
patch('stripe.Customer.create'))
customer_retrieve = self.exit_stack.enter_context(
patch('stripe.Customer.retrieve'))
sources_create = self.exit_stack.enter_context(
patch('stripe.Source.create'))
charge_create = self.exit_stack.enter_context(
patch('stripe.Charge.create'))
charge_retrieve = self.exit_stack.enter_context(
patch('stripe.Charge.retrieve'))
customer_create.return_value = self.customer
customer_retrieve.return_value = self.customer
sources_create.return_value = self.source
charge_create.return_value = self.pending_charge
charge_retrieve.return_value = self.retrieved_charge
return self
def __exit__(self, *exc):
self.exit_stack.pop_all()
return False
The functions I patch are mocked, but the code in the __exit__ method does not restore the patched functions, and they remain mocked during the other tests, which I don't want to happend.
What did I do wrong?

Google App Engine NDB post_create hook

I wanted to create a proper post_create (also post_get and post_put) hooks, similar to the ones I had on the DB version of my app.
Unfortunately I can't use has_complete_key.
The problem is quite known: lack of is_saved in a model.
Right now I have implemented it like this:
class NdbStuff(HooksInterface):
def __init__(self, *args, **kwds):
super(NdbStuff, self).__init__(*args, **kwds)
self._is_saved = False
def _put_async(self, post_hooks=True, **ctx_options):
""" Implementation of pre/post create hooks. """
if not self._is_saved:
self._pre_create_hook()
fut = super(NdbStuff, self)._put_async(**ctx_options)
if not self._is_saved:
fut._immediate_callbacks.insert(
0,
(
self._post_create_hook,
[fut],
{},
)
)
self._is_saved = True
if post_hooks is False:
fut._immediate_callbacks = []
return fut
put_async = _put_async
#classmethod
def _post_get_hook(cls, key, future):
obj = future.get_result()
if obj is not None:
obj._is_saved = True
cls._post_get(key, future)
def _post_put_hook(self, future):
if future.state == future.FINISHING:
self._is_saved = True
else:
self._is_saved = False
self._post_put(future)
Everything except the post_create hook seems to work.
The post_create is triggered every time the I use put_async without retrieving the object first.
I would really appreciate a clue on how to trigger the post_create_hook only once after the object was created.
I am not sure why you are creating the NDBStuff class.
Any way if you creating an instance of a class, and you want to track _is_saved or something similar , use a factory to control creation and setting of the property, in this case it makes more sense to track _is_new for example.
class MyModel(ndb.Model):
some_prop = ndb.StringProperty()
def _pre_put_hook(self):
if getattr(self,'_is_new',None):
self._pre_create_hook()
# do something
def _pre_create_hook(self):
# do something on first save
log.info("First put for this object")
def _post_create_hook(self, future):
# do something
def _post_put_hook(self, future);
if getattr(self,'_is_new', None):
self._post_create_hook(future)
# Get rid of the flag on successful put,
# in case you make some changes and save again.
delattr(self,'_is_new')
#classmethod
def factory(cls,*args,**kwargs):
new_obj = cls(*args,**kwargs)
settattr(new_obj,'_is_new',True)
return new_obj
Then
myobj = MyModel.factory(someargs)
myobj.put()
myobj.some_prop = 'test'
myobj.put()
Will call the _pre_create_hook on the first put, and not on the second.
Always create the entities through the factory then you will always have the to call to _pre_create_hook executed.
Does that make sense ?

Categories

Resources