SQLAlchemy insert from a JSON list to database

SQLAlchemy insert from a JSON list to database - python

I have a list with a JSON like so:
print(type(listed)) # <class 'list'>
print (listed)
[
{
"email": "x#gmail.com",
"fullname": "xg gf",
"points": 5,
"image_url", "https://imgur.com/random.pmg"
},
{
... similar json for the next user and so on
}
]
I'm trying to insert them into my postgres database that has a model like this:
class Users(db.Model):
__tablename__ = 'users'
email = db.Column(db.String(), primary_key=True)
displayName = db.Column(db.String())
image = db.Column(db.String())
points = db.Column(db.Integer())
But I'm quite stuck, I've tried several approaches but none worked, anyone can guide me with an example on how to do it properly?

Here's a solution without pandas, using SQLAlchemy Core
create engine
engine = sqlalchemy.create_engine('...')
load the metadata using the engine as the bind parameter
metadata = sqalchemy.Metadata(bind=engine)
make a reference to the table
users_table = sqlalchemy.Table('users', metadata, autoload = True)
you can then start your inserts
for user in json:
query = users_table.insert()
query.values(**user)
my_session = Session(engine)
my_session.execute(query)
my_session.close()
This creates a session for every user in json, but I thought you might like it anyway. Its very flexible and works for any table, you don't even need a model. Just make sure the json doesnt contain any columns that dont exist in the db (this means you will need to use "img_url" (column name) in both the json key and in the db column name)

Here is an example json list, like you provided.
json = [
{
"email": "x#gmail.com",
"fullname": "xg gf",
"points": 5,
"image_url": "https://imgur.com/random.pmg"
},
{
"email": "onur#gmail.com",
"fullname": "o g",
"points": 7,
"image_url": "https://imgur.com/random_x.pmg"
}
]
Now create an empty dataframe all_df and run iterations inside your json list.
Each iteration creates a dataframe with the data from dictionary inside the list, transpose it and append to all_df.
import pandas as pd
all_df = pd.DataFrame()
for i in json:
df = pd.DataFrame.from_dict(data=i, orient='index').T
all_df = all_df.append(df)
Output:
Now you can go ahead create a session to your database and push all_df
all_df.to_sql(con=your_session.bind, name='your_table_name', if_exists='your_preferred_method', index=False)

Using marshmallow-sqlalchemy
validate the incoming JSON
create general utilities for loading and dumping data
Define schemas
schema.py
from marshmallow import EXCLUDE
from marshmallow_sqlalchemy import ModelSchema
from app import db
class UserSchema(ModelSchema):
class Meta(ModelSchema.Meta):
model = Users
sqla_session = db.session
user_schema_full = UserSchema(only=(
'email',
'displayName',
'image',
'points'
))
utils.py
Exact details below don't matter but create general utility for going from JSON to ORM objects and ORM objects to JSON. schema_partial used for auto generated primary keys.
def loadData(data, schema_partial, many=False,
schema_full=None, instance=None):
try:
if instance is not None:
answer = schema_full.load(data, instance=instance, many=many)
else:
answer = schema_partial.load(data, many=many)
except ValidationError as errors:
raise InvalidData(errors, status_code=400)
return answer
def loadUser(data, instance=None, many=False):
return loadData(data=data,
schema_partial=user_schema_full,
many=many,
schema_full=user_schema_full,
instance=instance)
def dumpData(load_object, schema, many=False):
try:
answer = schema.dump(load_object, many=many)
except ValidationError as errors:
raise InvalidDump(errors, status_code=400)
return answer
def dumpUser(load_object, many=False):
return dumpData(load_object, schema=user_schema_full, many=many)
Use loadUser and dumpUser within api to produce clean flat code.
api.py
#app.route('/users/', methods=['POST'])
def post_users():
"""Post many users"""
users_data = request.get_json()
users = loadUser(users_data, many=True)
for user in users:
db.session.add(user)
object_dump = dumpUser(users, many=True)
db.session.commit()
return jsonify(object_dump), 201

Related

Import json with missing keys into postgres with Flask-Sqlalchemy

I have a question you guys might be able to answer.
I have a json file that looks something like this:
[
{
"address": "some address",
"full_time_school": false,
"name": "some name",
"official_id": "722154",
"school_type": "Grundschule",
"school_type_entity": "Grundschule",
"state": "BW"
},
{
"address": "some other address",
"name": "some other name",
"official_id": "722190",
"state": "BW"
}
]
The point is that not every entry has all keys.
I have a flask-sqlalchemy model that looks like this:
class School(db.Model):
__tablename__ = "school" # pragma: no cover
address = db.Column(db.String)
full_time_school = db.Column(db.Boolean)
name = db.Column(db.String)
official_id = db.Column(db.Integer)
school_type = db.Column(db.String)
school_type_entity = db.Column(db.String)
state = db.Column(db.String)
def __repr__(self):
return f"<name {self.name}"
And I have a python script to add the json entries into my postgresql database that looks like this:
from my_project import db
from my_project.models import School
import json
import os
# insert data
for filename in os.listdir("datamining"):
if filename.endswith(".json"):
file = open(os.path.join("datamining", filename))
print(f"Add schools from {filename.strip('.json')}")
data = json.load(file)
cleaned_data = {school["official_id"]: school for school in data}.values()
print(f"Adding {len(data)} schools to the database.")
for school in cleaned_data:
entry = School(
id=school["official_id"]
)
for key, value in school.items():
entry.key = value
db.session.add(entry)
db.session.commit()
file.close()
print("Added all schools!!!")
I don't know why but somehow every cell is NULL except the official_id field. How so and how can I fix that? I'm at the end of my wits right now. Every pointer or help is much appreciated.
EDIT:
What I found out so far is, that entry.key is not interpreted as entry.state for example, but actually creates a reference entry.key = "BW" for example. Why is that?

Your problem is
entry.key = value
You are just writing your values over and over into the attribute 'key' within your School model. I'm actually surprised SQLAlchemy doesn't raise some kind of error here...
Just pass all your values into the constructor and you should be fine:
school["id"] = school.pop("official_id")
entry = School(**school)
EDIT: It's "BW" because this happens to be the last value that is written into the attribute.

You can do this much easier and faster all in one go by executing this native parameterized query passing the text contents of the JSON file as parameter jsontext:
insert into school
select * from jsonb_populate_recordset(null::school, :jsontext::jsonb);

Flask Pymongo Objectid link not working

I am tring to access a new document from a mongo database collection named games by the _id. But for example if I access localhost:5000/solutie/5ae71f3e8e442b090e4c313bit is giving me the error: ValueError: View function did not return a response so it doesn't go through the if and I think I should convert the value of the _id to another type but I don't know how.
This is my flask code:
#app.route('/solutie/<id>')
def solu(id):
games = mongo.db.games
game_user = games.find_one({'_id' : id})
if game_user:
return id
This is my mongo database collection named games:
{
"_id": {
"$oid": "5ae71f3e8e442b090e4c313b"
},
"sursa1": "nothingfornow",
"sursa2": "nothing4now",
"corectat": 0,
"player2": "test",
"player1": "test2",
"trimis1": 1,
"trimis2": 1
}

There's an object type converter you can use for URL routing:
#app.route('/solutie/<ObjectID:game_id>')
def solu(game_id):
games = mongo.db.games
game_user = games.find_one_or_404({'_id' : game_id})
return game_user
See:
https://flask-pymongo.readthedocs.io/en/latest/#flask_pymongo.BSONObjectIdConverter
Also, don't override id() because this is an in-built Python function.

The second parameter of the find() method is an object describing which fields to include in the result.
This parameter is optional and if omitted, all fields are included in the result.
# #app.route('/solutie/<int:id>') # or
#app.route('/solutie/<string:id>')
def solu(id):
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["mydatabase"]
games = mydb["games"]
game_user = games.find({},{ "_id": id})
if game_user is not None:
return id
else:
return render_template("index.html")
Also you should use "else" condition.

Update a record via a GraphQL API using Graphene and SQLAlchemy

I am building a GraphQL API using the python packages Flask, SQLAlchemy, Graphene and Graphene-SQLAlchemy. I have followed the SQLAlchemy + Flask Tutorial. I am able to execute queries and mutations to create records. Now I would like to know what is the best way to update an existing record.
Here is my current script schema.py:
from graphene_sqlalchemy import SQLAlchemyObjectType
from database.batch import BatchOwner as BatchOwnerModel
import api_utils # Custom methods to create records in database
import graphene
class BatchOwner(SQLAlchemyObjectType):
"""Batch owners."""
class Meta:
model = BatchOwnerModel
interfaces = (graphene.relay.Node,)
class CreateBatchOwner(graphene.Mutation):
"""Create batch owner."""
class Arguments:
name = graphene.String()
# Class attributes
ok = graphene.Boolean()
batch_owner = graphene.Field(lambda: BatchOwner)
def mutate(self, info, name):
record = {'name': name}
api_utils.create('BatchOwner', record) # Custom methods to create records in database
batch_owner = BatchOwner(name=name)
ok = True
return CreateBatchOwner(batch_owner=batch_owner, ok=ok)
class Query(graphene.ObjectType):
"""Query endpoint for GraphQL API."""
node = graphene.relay.Node.Field()
batch_owner = graphene.relay.Node.Field(BatchOwner)
batch_owners = SQLAlchemyConnectionField(BatchOwner)
class Mutation(graphene.ObjectType):
"""Mutation endpoint for GraphQL API."""
create_batch_owner = CreateBatchOwner.Field()
schema = graphene.Schema(query=Query, mutation=Mutation)
Remarks:
My object BatchOwner has only 2 attributes (Id, name)
To be able to update the BatchOwner name, I assume I need to provide the database Id (not the relay global Id) as an input argument of some update method
But when I query for a BatchOwner from my client, Graphene only returns me the global Id which is base64 encoded (example: QmF0Y2hPd25lcjox, which correspond to BatchOwner:1)
Example of response:
{
"data": {
"batchOwners": {
"edges": [
{
"node": {
"id": "QmF0Y2hPd25lcjox",
"name": "Alexis"
}
}
]
}
}
}
The solution I am thinking of at the moment would be:
Create an update mutation which takes the global Id as an argument
Decode the global Id (how?)
Use the database Id retrieved from the decoded global Id to query on the database and update the corresponding record
Is there a better way to do this?

I have found a solution using the method from_global_id (documented here)
from graphql_relay.node.node import from_global_id
I added the following class to schema.py:
class UpdateBatchOwner(graphene.Mutation):
"""Update batch owner."""
class Arguments:
id = graphene.String()
name = graphene.String()
# Class attributes
ok = graphene.Boolean()
batch_owner = graphene.Field(lambda: BatchOwner)
def mutate(self, info, id, name):
id = from_global_id(id)
record = {'id': id[1], 'name': name}
api_utils.update('BatchOwner', record)
batch_owner = BatchOwner(id=id, name=name)
ok = True
return UpdateBatchOwner(batch_owner=batch_owner, ok=ok)
And I updated the Mutation class:
class Mutation(graphene.ObjectType):
"""Mutation endpoint for GraphQL API."""
create_batch_owner = CreateBatchOwner.Field()
update_batch_owner = UpdateBatchOwner.Field()
I'm wondering if there is a more straight forward way to do this?

Django & Mongoengine get data from embedded document?

I am having trouble retrieving data from the embedded document in mongoengine.
models.py
from mongoengine import Document, EmbeddedDocument, fields
class ProductFields(EmbeddedDocument):
key_name = fields.StringField(required=True)
value = fields.DynamicField(required=True)
class Product(Document):
name = fields.StringField(required=True)
description = fields.StringField(required=True, null=True)
fields = fields.ListField(fields.EmbeddedDocumentField(ProductFields))
views.py
class ProductListView(APIView):
def get(self, request):
# list_products = Product.objects.all()
result=[]
productfields = ProductFields
for product in Product.objects:
data={
"name":product.name,
"description":product.description,
# "key":product.fields.key_name,
# "value":ProductFields.value,
}
print (data)
# print(productfields.key_name)
result.append(data)
return Response({"products":result,"message":"list of products.","requestStatus":1},status=status.HTTP_200_OK)
Output:
{
"description": "test description",
"name": "product1"
"fields":[
{ "key_name" : value},
{ "key_name" : value},
]
}
How do I get the above-desired output? Print function doesn't work because mongoengine returns object and not the value.

Here I see you're using APIView from Django Rest FrameWork. Have a look at django-rest-framework-mongoengine. If you're already familiar with DRF, you can use this extension to create your API endpoints with MongoDB easily.
You must have found some workaround by now even though you can mark this answer as correct so that if anyone else runs into the same problem in future, they can get the solution.

Make serializer for nested values in Django rest serializer

I am trying to integrate my django project with the api from mailchimp, to add users to a list I need to generate some json in the following format:
{
"email_address": "EMAIL",
"status": "subscribed",
"merge_fields": {
"FNAME": "FIRST_NAME",
"LNAME": "SURNAME"
},
Sadly I am having some struggles with the nested merge_field. I expected the following to work:
class MergeSerializer(serializers.Serializer):
FNAME = serializers.SerializerMethodField('get_first_name')
LNAME = serializers.SerializerMethodField('get_surname')
def get_first_name(self, obj):
return obj.first_name
def get_surname(self, obj):
return obj.surname
class CreateContactSerializer(serializers.Serializer):
email_address = serializers.EmailField()
status = serializers.SerializerMethodField('get_alternative_status')
merge_fields = MergeSerializer(read_only=True)
def get_alternative_status(self, obj):
return "subscribed"
This only generates some json with the email_address and the status, and completely ignores the merge_fields. After hours I have absolutely no clue what to try next. Does anybody know how to solve this problem?
Since I thought that the documentation for the marshmallow framework was a bit clearer I also tried it with their package, this however returend exactly the same result (so ignoring my merge_fields):
class MergeFieldsSchema(Schema):
FNAME = fields.String(attribute="first_name")
LNAME = fields.String(attribute="surname")
class CreateContactSerializer(Schema):
merge_fields = fields.Nested(MergeFieldsSchema)
email_address = fields.String()
status = fields.Constant("subscribed")

You don't say this, but I am assuming that surname and first_name are also part of the same object as email_address on your model, which is why the nested serializer does not work (as nested serializers are for foreign keys). If this is not the case, please add the model to the OP.
Because you just want to customize the output, you can use a Serializer Method on your main CreateContactSerializer:
class CreateContactSerializer(serializers.Serializer):
email_address = serializers.EmailField()
status = serializers.SerializerMethodField('get_alternative_status')
merge_fields = serializers.SerializerMethodField('get_merge_fields')
def get_alternative_status(self, obj):
return "subscribed"
def get_merge_fields(self, obj):
return {
"FNAME": obj.first_name,
"LNAME": obj.surname
}
If you want, you could even reuse the serializer that you already used and do
def get_merge_fields(self, obj):
serializer = MergeSerializer(obj)
return serializer.data;
Don't forget to add merge_fields to your fields

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

SQLAlchemy insert from a JSON list to database - python

Related

Import json with missing keys into postgres with Flask-Sqlalchemy

Flask Pymongo Objectid link not working

Update a record via a GraphQL API using Graphene and SQLAlchemy

Django & Mongoengine get data from embedded document?

Make serializer for nested values in Django rest serializer

Categories

Resources