How to delete a document in MongoDB - python

I am trying to create a delete method in order to delete a document that has the key:"name" and the value:"Rhonda". Whenever I execute my current code, I get an AttributeError saying:"'AnimalShelter' object has no attribute 'delete'". How do I get the method to return the deleted document's JSON contents? Here is my code:
testing_script.ipynb
from animal_shelter import AnimalShelter
# now need to create the object from the class
shelter = AnimalShelter("aacuser","Superman")
data = {"age_upon_outcome":"2 years","animal_type":"Dog","breed":"Dachshund","color":"Black and tan","name":"Rhonda","outcome_subtype":"Partner","outcome_type":"Adopt","sex_upon_outcome":"Female"}
new_values = {"$set": {"age_upon_outcome":"3 years"}}
# if shelter.create(data):
# print("Animal added")
# else:
# print("Failed to add animal")
# Calls the read function
# shelter.read(data)
# Calls the update function
# shelter.update(data, new_values)
# Calls the delete function
shelter.delete(data)
output
AttributeError Traceback (most recent call last)
<ipython-input-5-60b1d887dfb8> in <module>
17
18 # Calls the delete function
---> 19 shelter.delete(data)
20
AttributeError: 'AnimalShelter' object has no attribute 'delete'
animal_shelter.py
from pymongo import MongoClient
from bson.objectid import ObjectId
class AnimalShelter(object):
""" CRUD operations for Animal collection in MongoDB """
def __init__(self,username,password):
# Initializing the MongoClient. This helps to
# access the MongoDB databases and collections.
# init to connect to mongodb without authentication
self.client = MongoClient('mongodb://localhost:55996')
# init connect to mongodb with authentication
# self.client = MongoClient('mongodb://%s:%s#localhost:55996/?authMechanism=DEFAULT&authSource=AAC'%(username, password))
self.database = self.client['AAC']
# Complete this create method to implement the C in CRUD.
def create(self, data):
if data is not None:
self.database.animals.insert(data) # data should be dictionary
return True # Tells whether the create function ran successfully
else:
raise Exception("Nothing to save ...")
# Create method to implement the R in CRUD.
def read(self, data):
return self.database.animals.find_one(data) #returns only one
# Update method to implement the U in CRUD.
def update(self, data, new_values):
if self.database.animals.count(data):
self.database.animals.update(data, new_values)
return self.database.animals.find({"age_upon_outcome":"3 years"})
else:
raise Exception("Nothing to update ...")
# Delete method to implement the D in CRUD
def delete(self, data)
result = self.database.animals.find_one_and_delete(data)
# print the _id key only if the result is not None
if("_id" in result):
print("find_one_and_delete ID:",result["_id"])
else:
print("Nothing to delete")

Problem is that functions that you are defining are outside the class. You have to put indentation on functions in class AnimalShelter
Also as pointed out in comment you are missing : in delete
Updated animal_sheltor.py
from pymongo import MongoClient
from bson.objectid import ObjectId
class AnimalShelter(object):
""" CRUD operations for Animal collection in MongoDB """
def __init__(self,username,password):
# Initializing the MongoClient. This helps to
# access the MongoDB databases and collections.
# init to connect to mongodb without authentication
self.client = MongoClient('mongodb://localhost:55996')
# init connect to mongodb with authentication
# self.client = MongoClient('mongodb://%s:%s#localhost:55996/?authMechanism=DEFAULT&authSource=AAC'%(username, password))
self.database = self.client['AAC']
# Complete this create method to implement the C in CRUD.
def create(self, data):
if data is not None:
self.database.animals.insert(data) # data should be dictionary
return True # Tells whether the create function ran successfully
else:
raise Exception("Nothing to save ...")
# Create method to implement the R in CRUD.
def read(self, data):
return self.database.animals.find_one(data) #returns only one
# Update method to implement the U in CRUD.
def update(self, data, new_values):
if self.database.animals.count(data):
self.database.animals.update(data, new_values)
return self.database.animals.find({"age_upon_outcome":"3 years"})
else:
raise Exception("Nothing to update ...")
# Delete method to implement the D in CRUD
def delete(self, data):
result = self.database.animals.find_one_and_delete(data)
# print the _id key only if the result is not None
if("_id" in result):
print("find_one_and_delete ID:",result["_id"])
else:
print("Nothing to delete")

Related

ZODB transactions for nested objects not working

I know that there is little development on ZODB but it might be useful for someone if using ZODB in 2022, or there might be some obvious thing I'm missing:
when trying to store changes to persistent objects inside a ZODB.DB.transaction with block.
they are not stored, and no error is raised.
while doing the same between transaction.begin() and transaction.commit() calls does work.
that is, the only way to currently use a with block is to change objects directly through conn.root(),
that means all persistent objects which want to store changes on themselves must know the full path from root to themselves, which is impractical.
there is also another weird behavior. where after storing an object for the first time, and retrieving it returns the same object, while the 2nd call and up will return a different object.
this trips tests trying to check if something is stored successfully, as it only happens once.
the following code tries to store attributes in a 2-level persistent hierarchy (simplified dev code)
import ZODB
import ZODB.FileStorage
from persistent.mapping import PersistentMapping
import transaction
store=ZODB.FileStorage.FileStorage("temp1.db")
db=ZODB.DB(store)
def get_init(name, obj):
with db.transaction(f"creating root[{name}]") as conn:
try:
return conn.root()[name]
except KeyError:
conn.root()[name] = obj()
return conn.root()[name]
class A:
def __init__(self):
self.cfg = PersistentMapping()
def __setitem__(self, key, value) -> None:
transaction.begin()
self.cfg[key+", inside block"] = value
transaction.commit()
with db.transaction():
self.cfg[key+", inside with"] = value #does not work
#these should be equivalent, no?
def __iter__(self):
return iter(self.cfg)
class Manager:
def __init__(self):
self.a1=get_init("testing", PersistentMapping) # set up the db, should only happen once
def __setitem__(self, name, obj) -> None:
"""Registers in persistent storage"""
with db.transaction(f"Adding testing:{name}") as conn:
if name in conn.root()["testing"]:
print(f"testing with same name {name} already exists in storage")
return
conn.root()["testing"][name] = obj
def __getitem__(self, name: str):
return db.open().root()["testing"][name]
dm=Manager()
initial=A() #only relevant for forst run
dm['a']=initial #only relevant for forst run
fromdb1= dm['a']
fromdb2= dm['a']
with db.transaction() as conn:
fromdb1.cfg['updated from outer txn, directly'] = 1 #doed not work
conn.root()['testing']['a'].cfg['updated from outer txn,through conn'] = 1
#this should be equivalent but only the second one works
initial['new txn updated on initial'] = 1
fromdb1['new txn updated on retrieved 1']= 1
fromdb2['new txn updated on retrieved 2']= 1
print(f"initial obj - {initial.cfg}")
print(f"from db obj 1- {fromdb1.cfg}")
print(f"from db obj 2- {fromdb2.cfg}")
print(f"\nnew from db obj- {dm['a'].cfg}")
print(f"\nis the initial obj and the first obj from db the same: {initial is fromdb1}")
print(f"is the initial obj and the second obj from db the same: {initial is fromdb2}")
Unless I'm missing something the expected result is for all those methods to work.
Any advice from people using ZODB?

How to increase write speed on inserts, pymongo?

I have the following code to insert documents into a MongoDB, the problem is that it's quite slow since I'm unable to multiprocessor it, and considering I have to check if each document inserted already exist or not I believe it's impossible to use bulk-inserts. I'm wondering if there is a faster method to this problem. After doing a profiling on below I found that check record() and update_upstream() are two functions that are very time consuming. So optimising them would increase the overall speed. Any inputs on how to optimise below would be highly appreciated. Thank you!
import os
import pymongo
from directory import Directory
from pymongo import ASCENDING
from pymongo import DESCENDING
from pymongo import MongoClient
from storage_config import StorageConfig
from tqdm import tqdm
dir = Directory()
def DB_collections(collection_type):
types = {'p': 'player_stats',
't': 'team_standings',
'f': 'fixture_stats',
'l': 'league_standings',
'pf': 'fixture_players_stats'}
return types.get(collection_type)
class DB():
def __init__(self, league, season, func=None):
self.db_user = os.environ.get('DB_user')
self.db_pass = os.environ.get('DB_pass')
self.MONGODB_URL = f'mongodb+srv://{self.db_user}:{self.db_pass}#cluster0-mbqxj.mongodb.net/<dbname>?retryWrites=true&w=majority'
self.league = league
self.season = str(season)
self.client = MongoClient(self.MONGODB_URL)
self.DATABASE = self.client[self.league + self.season]
self.pool = multiprocessing.cpu_count()
self.playerfile = f'{self.league}_{self.season}_playerstats.json'
self.teamfile = f'{self.league}_{self.season}_team_standings.json'
self.fixturefile = f'{self.league}_{self.season}_fixturestats.json'
self.leaguefile = f'{self.league}_{self.season}_league_standings.json'
self.player_fixture = f'{self.league}_{self.season}_player_fixture.json'
self.func = func
def execute(self):
if self.func is not None:
return self.func(self)
def import_json(file):
"""Imports a json file in read mode
Args:
file(str): Name of file
"""
return dir.load_json(file , StorageConfig.DB_DIR)
def load_file(file):
try:
loaded_file = import_json(file)
return loaded_file
except FileNotFoundError:
print("Please check that", file, "exists")
def check_record(collection, index_dict):
"""Check if record exists in collection
Args:
index_dict (dict): key, value
"""
return collection.find_one(index_dict)
def collection_index(collection, index, *args):
"""Checks if index exists for collection,
and return a new index if not
Args:
collection (str): Name of collection in database
index (str): Dict key to be used as an index
args (str): Additional dict keys to create compound indexs
"""
compound_index = tuple((arg, ASCENDING) for arg in args)
if index not in collection.index_information():
return collection.create_index([(index, DESCENDING), *compound_index], unique=True)
def push_upstream(collection, record):
"""Update record in collection
Args:
collection (str): Name of collection in database
record_id (str): record _id to be put for record in collection
record (dict): Data to be pushed in collection
"""
return collection.insert_one(record)
def update_upstream(collection, index_dict, record):
"""Update record in collection
Args:
collection (str): Name of collection in database
index_dict (dict): key, value
record (dict): Data to be updated in collection
"""
return collection.update_one(index_dict, {"$set": record}, upsert=True)
def executePushPlayer(db):
playerstats = load_file(db.playerfile)
collection_name = DB_collections('p')
collection = db.DATABASE[collection_name]
collection_index(collection, 'p_id')
for player in tqdm(playerstats):
existingPost = check_record(collection, {'p_id': player['p_id']})
if existingPost:
update_upstream(collection, {'p_id': player['p_id']}, player)
else:
push_upstream(collection, player)
if __name__ == '__main__':
db = DB('EN_PR', '2019')
executePushPlayer(db)
You can cobine the check/insert/update logic into a single update_one() command using upsert=True, then use the bulk operators with something like:
updates = []
for player in tqdm(playerstats):
updates.append(UpdateOne({'p_id': player['p_id']}, player, upsert=True))
collection.bulk_write(updates)
Fianlly, check your index is being used with the following command at the MongoDB shell:
db.mycollection.aggregate([{ $indexStats: {} }])
And review the accesses.ops metric.

How to pass an array to a python class in FLASK

I started working with flask-python recently.
I am trying to send an array read from the database to a class that defines a form.
Here is my class :
# livraison Form Class
class livraisonForm(Form):
list_assurances=['-', u'Aucune assurance trouvée']
type_assur = SelectField(u'Type d\'assurance', choices=list_assurances)
# INIT function :
def __init__(self, list_assurances, *args, **kwargs):
super(Form)
self.list_assurances = list_assurances
Here is how I am trying to pass the array to the init function
def add_livraison():
form = livraisonForm(request.form, get_assurances())
the get_assurances() function returns an array as mentionned below :
def get_assurances():
# Create db cursor
cur = mysql.get_db().cursor()
# Get user by username
result = cur.execute("SELECT ID_ASSURANCE, DESCRIPTION FROM type_assurance ")
if result > 0:
# Get assurances list
data = cur.fetchone()
# Close connection
cur.close()
return [(i[0]+'', i[1]+'') for i in data]
# Close connection
cur.close()
return ['-', u'Aucun assur trouvée']
unfortunately, I am having this problem concerning the form class :
TypeError: 'UnboundField' object is not callable
I tried to delete the list_assurances variable from the form and called the function directly but I got a problem saying that the database has no attribute cursor.
I would like to know what is the right way to send an array to a class -form class- in flask.
Thank you so much
form = livraisonForm(request.form, get_assurances())
Here you're actually assigning the request.form to the self.assurances, not get_assurances() as you should.
Try it like that:
form = livraisonForm(get_assurances())

OOP python: Where to instantiate Cassandra and elasticsearch cluster?

I have an object which interact a lot with elasticsearch and cassandra. But I don't know where to instantiate my Cassandra and elasticsearch session. Should I put it in my "code", and pass the session into a parameters of my function like that:
cassandra_cluster = Cluster()
session = cassandra_cluster.connect()
es = Elasticsearch()
class Article:
document_type = "cnn_article"
def __init__(self):
self.author = ""
self.url = ""
...
#classmethod
def from_crawl(cls, url):
obj = cls()
# Launch a crawler and fill the fields and return the object
#classmethod
def from_elasticseacrh(cls, elastic_search_document):
obj = cls()
# Read the response from elasticsearch and return the object
def save_to_cassandra(self):
# Save an object into cassandra
session.execute(.....)
def save_to_elasticsearch(self, index_name, es):
# Save an object into elasticsearch
es.index(index=index_name, ...)
...
article = Article.from_crawl("http://cnn.com/article/blabla")
article.save_to_cassandra(session)
article.save_to_elasticsearch("cnn", es)
Or should I put the instantiation of my cassandra and elasticsearch session as instance variables like that:
class Article:
cassandra_cluster = Cluster()
session = cassandra_cluster.connect()
es = Elasticsearch()
document_type = "cnn_article"
def __init__(self):
self.author = ""
self.url = ""
...
#classmethod
def from_crawl(cls, url):
obj = cls()
# Launch a crawler and fill the fields and return the object
#classmethod
def from_elasticseacrh(cls, elastic_search_document):
obj = cls()
# Read the response from elasticsearch and return the object
def save_to_cassandra(self):
# Save an object into cassandra
session.execute(.....)
def save_to_elasticsearch(self):
# Save an object into elasticsearch
es.index(....)
...
article = Article.from_crawl("http://cnn.com/article/blabla")
article.save_to_cassandra()
article.save_to_elasticsearch()
Based on their documentation and some of the examples here: http://www.datastax.com/dev/blog/datastax-python-driver-multiprocessing-example-for-improved-bulk-data-throughput
I would go with your second approach. They mention that the session is only a context manager for shutting down connections, and their Query managers show them as being class attributes.
I think both would work, but if you want to multiprocess it, it may be marginally easier if you do it with the latter approach.

How to use Flask-Cache with Flask-Restful

How do I use Flask-Cache #cache.cached() decorator with Flask-Restful? For example, I have a class Foo inherited from Resource, and Foo has get, post, put, and delete methods.
How can I can invalidate cached results after a POST?
#api.resource('/whatever')
class Foo(Resource):
#cache.cached(timeout=10)
def get(self):
return expensive_db_operation()
def post(self):
update_db_here()
## How do I invalidate the value cached in get()?
return something_useful()
As Flask-Cache implementation doesn't give you access to the underlying cache object, you'll have to explicitly instantiate a Redis client and use it's keys method (list all cache keys).
The cache_key method is used to override the default key generation in your cache.cached decorator.
The clear_cache method will clear only the portion of the cache corresponding to the current resource.
This is a solution that was tested only for Redis and the implementation will probably differ a little when using a different cache engine.
from app import cache # The Flask-Cache object
from config import CACHE_REDIS_HOST, CACHE_REDIS_PORT # The Flask-Cache config
from redis import Redis
from flask import request
import urllib
redis_client = Redis(CACHE_REDIS_HOST, CACHE_REDIS_PORT)
def cache_key():
args = request.args
key = request.path + '?' + urllib.urlencode([
(k, v) for k in sorted(args) for v in sorted(args.getlist(k))
])
return key
#api.resource('/whatever')
class Foo(Resource):
#cache.cached(timeout=10, key_prefix=cache_key)
def get(self):
return expensive_db_operation()
def post(self):
update_db_here()
self.clear_cache()
return something_useful()
def clear_cache(self):
# Note: we have to use the Redis client to delete key by prefix,
# so we can't use the 'cache' Flask extension for this one.
key_prefix = request.path
keys = [key for key in redis_client.keys() if key.startswith(key_prefix)]
nkeys = len(keys)
for key in keys:
redis_client.delete(key)
if nkeys > 0:
log.info("Cleared %s cache keys" % nkeys)
log.info(keys)
Yes, you can use like that.
Maybe you will still need to read: flask-cache memoize URL query string parameters as well
You can invalidate cache using cache.clear() method.
For more detials see: https://pythonhosted.org/Flask-Cache/#flask.ext.cache.Cache.clear and Clearing Cache section in https://pythonhosted.org/Flask-Cache/
##create a decarator
from werkzeug.contrib.cache import SimpleCache
CACHE_TIMEOUT = 300
cache = SimpleCache()
class cached(object):
def __init__(self, timeout=None):
self.timeout = timeout or CACHE_TIMEOUT
def __call__(self, f):
def decorator(*args, **kwargs):
response = cache.get(request.path)
if response is None:
response = f(*args, **kwargs)
cache.set(request.path, response, self.timeout)
return response
return decorator
#add this decarator to your views like below
#app.route('/buildingTotal',endpoint='buildingTotal')
#cached()
def eventAlert():
return 'something'
#app.route('/buildingTenants',endpoint='buildingTenants')
#cached()
def buildingTenants():
return 'something'
Answer from #JahMyst didn't work for me.
Flask-Cache doesn’t work with Flask restful framework. #cache.Cached & #cache.memoize can’t handle mutable objects per their documentation.
Using mutable objects (classes, etc) as part of the cache key can become tricky. It is suggested to not pass in an object instance into a memoized function. However, the memoize does perform a repr() on the passed in arguments so that if the object has a __repr__ function that returns a uniquely identifying string for that object, that will be used as part of the cache key.
Had to come-up with my own implementation. Leaving this code snippet incase someone else gets stuck with the same issue.
cache_key function converts the user req into hash.
cache_res_pickled function is being used to pickle or unpickle the data
|-flask-app
|-app.py
|-resource
|--some_resource.py
import json
import logging
import pickle
import time
import urllib
from flask import Response, abort, request
from redis import Redis
redis_client = Redis("127.0.0.1", "6379")
exp_setting_s = 1500
def json_serial(obj):
"""
JSON serializer for objects not serializable by default json code"
Args:
obj: JSON serialized object for dates
Returns:
serialized JSON data
"""
if isinstance(obj, datetime.datetime):
return obj.__str__()
def cache_key():
""" ""
Returns: Hashed string of request made by the user.
"""
args = request.args
key = (
request.path
+ "?"
+ urllib.parse.urlencode(
[(k, v) for k in sorted(args) for v in sorted(args.getlist(k))]
)
)
key_hashed = hashlib.sha256(key.encode())
return key_hashed.hexdigest()
def cache_res_pickled(data, encode):
"""
Args:
data (dict): Data in dict format
encode (Boolean): Encode (true) or decode (false) the data
Returns: Result after pickling
"""
if encode:
return pickle.dumps(data)
else:
data = pickle.loads(data)
return data
class SomeResource(Resource):
#auth.login_required
def get(self):
# Get the key for request in hashed format SHA256
key = cache_key()
result = redis_client.get(key)
def generate():
"""
A lagging generator to stream JSON so we don't have to hold everything in memory
This is a little tricky, as we need to omit the last comma to make valid JSON,
thus we use a lagging generator, similar to http://stackoverflow.com/questions/1630320/
"""
releases = res.__iter__()
try:
prev_release = next(releases) # get first result
# We have some releases. First, yield the opening json
yield '{"data": ['
# Iterate over the releases
for release in releases:
yield json.dumps(prev_release, default=json_serial) + ", "
prev_release = release
logging.info(f"For {key} # records returned = {len(res)}")
# Now yield the last iteration without comma but with the closing brackets
yield json.dumps(prev_release, default=json_serial) + "]}"
except StopIteration:
# StopIteration here means the length was zero, so yield a valid releases doc and stop
logging.info(f"For {key} # records returned = {len(res)}")
yield '{"data": []}'
if result is None:
# Secure a key on Redis server.
redis_client.set(key, cache_res_pickled({}, True), ex=exp_setting_s)
try:
# Do the querying to the DB or math here to get res. It should be in dict format as shown below
res = {"A": 1, "B": 2, "C": 2}
# Update the key on Redis server with the latest data
redis_client.set(key, cache_res_pickled(res, True), ex=exp_setting_s)
return Response(generate(), content_type="application/json")
except Exception as e:
logging.exception(e)
abort(505, description="Resource not found. error - {}".format(e))
else:
res = cache_res_pickled(result, False)
if res:
logging.info(
f"The data already exists!😊 loading the data form Redis cache for Key - {key} "
)
return Response(generate(), content_type="application/json")
else:
logging.info(
f"There is already a request for this key. But there is no data in it. Key: {key}."
)
s = time.time()
counter = 0
# loops aimlessly till the data is available on the Redis
while not any(res):
result = redis_client.get(key)
res = cache_res_pickled(result, False)
counter += 1
logging.info(
f"The data was available after {time.time() - s} seconds. Had to loop {counter} times.🤦‍"
)
return Response(generate(), content_type="application/json")
Inspired from durga's answer I wrote a very basic decorator which uses redis directly instead of any library.
from src.consts import config
from src.utils.external_services import redis_connector
import json
import jsons
import base64
class cached(object):
def __init__(self, req, timeout=None):
self.timeout = timeout or config.CACHE_DEFAULT_TIMEOUT
self.request = req
self.cache = redis_connector.get_redis_instance()
def __call__(self, f):
def decorator(*args, **kwargs):
redis_healthy = True
if self.cache is not None:
try:
self.cache.ping()
except Exception as ex:
redis_healthy = False
else:
redis_healthy = False
if self.request is not None and self.request.values is not None and self.request.path is not None and redis_healthy:
cache_key = "{}-{}".format(self.request.path, json.dumps(jsons.dump(self.request.values), sort_keys=True))
cache_key_base_64 = base64.b64encode(cache_key.encode("ascii")).decode("ascii")
response = self.cache.get(cache_key_base_64)
if response is None:
response = f(*args, **kwargs)
self.cache.setex(cache_key_base_64, self.timeout, jsons.dumps(response))
else:
response = json.loads(response)
else:
response = f(*args, **kwargs)
return response
return decorator
Now use this decorator on your api functions
from flask import g, request
from flask_restful import Resource
from webargs.flaskparser import use_args
class GetProducts(Resource):
#use_args(gen_args.argsGetProducts)
#cached(request)
def get(self, args):
return "hello from products"

Categories

Resources