Query SQL Server JSON columns using SQLAlchemy - python

I'm looking for a way to replicate the functionality of SQL Server's JSON_VALUE function using a SQLAlchemy query. I'm using metadata.reflect to define my existing db tables in SQLAlchemy.
SQL:
SELECT Id,
JSON_VALUE(BankDataJSON,'$.AccountName')
FROM BankData
SQLAlchemy Model:
db = SQLAlchemy()
db.Model.metadata.reflect(db.engine)
class BankData(db.Model):
__table__ = db.Model.metadata.tables['BankData']
Endpoint / Query:
#cust_accts_bp.route('/api/CustomerAccts')
def get_cust_accts():
custId = request.args.get('custId')
db = SQLAlchemy(app)
BankData = models.bank_data.BankData
BankAccounts = models.bank_accounts.BankAccounts
qry = db.session.query(BankAccounts.Id, BankAccounts.AccountNumber, BankAccounts.BankName,
BankData.AppId, BankData.CustomerId, BankAccounts.Filename, BankData.BankDataJSON) \
.filter(
and_(BankData.Id == BankAccounts.BankDataId, BankData.CustomerId == custId)
)
engine = app.config['SQLALCHEMY_DATABASE_URI']
df = pd.read_sql(qry.statement, engine)
df['BankDataJSON'] = df['BankDataJSON'].apply(json.loads) # convert string representation of JSON
df['BankDataJSON'] = df['BankDataJSON'].map(lambda x:[x[i] for i in x if i=='AccountName'][0])
df = df.rename(columns={'BankDataJSON':'BusinessName'})
response = json.loads(df.to_json(orient="records"))
return(json.dumps(response))
Using this method, I have to manually serialize the JSON object (BankDataJSON) to a Python dict, and parse it to get the value I want ('AccountName'). If I were to use SQL Server's JSON_VALUE function, this is all done for you.
JSON response:
[
{
"Id": 3003,
"AccountNumber": "111111111",
"BankName": "Wells Fargo",
"AppId": 111111,
"CustomerId": "555555",
"Filename": "some filename.pdf",
"BusinessName": "Some BusinessName"
},
{
"Id": 3004,
"AccountNumber": "22222222",
"BankName": "Wells Fargo",
"AppId": 111111,
"CustomerId": "555555",
"Filename": "Some filename",
"BusinessName": "Some Businessname"
},
]
How can I go about doing this? I walso want to be able to replicated SQL Server's CROSS APPLY OPENJSON functionality for working with array of JSON objects in the future. Do I need to define the BankDataJSON column as a JSON type in my model? When I do this, I get an error regarding pyodbcs inability to deserialize JSON in the MSSQL dialect

may be you can try to implement the server's function in your query, something like this
from sqlalchemy.sql import func
db = SQLAlchemy(app)
BankData = models.bank_data.BankData
qry = db.session.query(BankData.Id,
func.JSON_VALUE(BankData.BankDataJSON,'$.AccountName'))

Related

Mock: What Is the right way to mock sqlalchemy database?

This is what I've done for testing database
#patch.object(query_functions, 'connection')
def test_service_reg_return_correct_data(self, mock_sqlalchmey):
data = {
"user_name": "testuser53",
"password": "123456",
"email_address": "testuser53#example.com",
"dob": "2022-06-07 00:00:00",
"address": "Ahmedabad",
}
# json_data = json.dumps(data)
# print("jsob", json_data)
m1 = mock.MagicMock()
m1.get_json.return_value = data
print("m", m1)
with mock.patch("backened.routes.register.request", m1):
response =user_register()
self.assertTrue(mock_sqlalchmey.called)
self.assertEqual(response.json, {'Message': 'New user Created'},201)
I've tested this function
def user_register():
request_data = request.get_json()
insert = service_register(request_data)
print(type(insert))
return insert
inside service_register(), I've run_insert() for execution of queries i.e query_functions.py file.
query_functions.py
def connection():
engine = create_engine('mysql://root:admin#172.17.0.2:3306/flask', poolclass = NullPool)
conn = engine.connect()
return conn
def run_insert(data):
conn = connection()
sql = text(CONST_INSERT)
print(sql)
conn.execute(sql, data)
conn.close()
return ("executed")
I want to know whether it is right or wrong as I was trying to mock my database and don't want to insert data into DB. I'm not sure how database mock happened in this.

Inserting data using PyMongo based on a defined data model

I have a dataset consisting of 250 rows that looks like to following:
In MongoDB Compass, I inserted the first row as follows:
db.employees.insertOne([{"employee_id": 412153,
"first_name": "Carrol",
"last_name": "Dhin",
"email": "carrol.dhin#company.com",
"managing": [{"manager_id": 412153, "employee_id": 174543}],
"department": [{"department_name": "Accounting", "department_budget": 500000}],
"laptop": [{"serial_number": "CSS49745",
"manufacturer": "Lenovo",
"model": "X1 Gen 10",
"date_assigned": {$date: 01-15-2022},
"installed_software": ["MS Office", "Adobe Acrobat", "Slack"]}]})
If I wanted to insert all 250 rows into the database using PyMongo in Python, how would I ensure that every row is entered following the format that I used when I inserted it manually in the Mongo shell?
from pymongo import MongoClient
import pandas as pd
client = MongoClient(‘localhost’, 27017)
db = client.MD
collection = db.gammaCorp
df = pd.read_csv(‘ ’) #insert CSV name here
data = {}
for i in df.index:
data['employee_id'] = df['employee_id'][i]
data['first_name'] = df['first_name'][i]
data['last_name'] = df['last_name'][i]
data['email'] = df['email'][i]
data['managing'] = [{'manager_id': df['employee_id'][i]}, {'employee_id': df['managing'][i]}]
data['department'] = [{'department_name': df['department'][i]}, {'department_budget': df['department_budget'][i]}]
data['laptop'] = [{'serial_number': df['serial_number'][i]}, {'manufacturer': df['manufacturer'][i]}, {'model': df['model'][i]}, {'date_assigned': df['date_assigned'][i]}, {'installed_software': df['installed_software'][i]}]
collection.insert_one(data)

How to append JSON fields in sqlalchemy statements

How to update JSON field in sqlalchemy appending another json?
stmt = pg_insert(Users).values(
userid=user.id,
pricesjson=[{
"product1": user.product1,
"product2": user.product2,
"product3": user.product3
}],
tsins=datetime.now()
)
stmtUpsert = stmt.on_conflict_do_update(index_elements=[Users.userid],
set_={'pricesjson': cast({"product1": user.product1,
"product2": user.product2,
"product3": user.product3
} +
cast(stmt.excluded.pricesjson, JSONB),
JSON)
, 'tsvar': datetime.now()})
In that way i don't receive errors but overwrite json field without append.
Thank you ;)
Solved: After altered the field on table from json to jsonb, that's the working code:
stmtUpsert = stmt.on_conflict_do_update(index_elements=[Users.userid],
set_={'pricesjson': cast([{"product1": user.product1,
"product2": user.product2,
"product3": user.product3
], JSONB) + Users.pricesjson
, 'tsvar': datetime.now()})
That's the relative sample query:
insert into users (userid, pricesjson) values('1', '{"product1": "test1", product2": "test2"}')
on conflict (userid)
do update set pricesjson =cast('[{"productX": "testX"}]' as jsonb) || securitiesprices.pricesjson

How to load Mysql to Elasticsearch using python

I have a table name employees
I need to push the employees to Elasticsearch index using python
import MySQLdb
import json
from elasticsearch import Elasticsearch
db = MySQLdb.connect("localhost", "admin", "password", "dbname")
cursor = db.cursor()
Here is my quick idea 😎
from sqlalchemy import create_engine
import pymysql
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch import helpers
#Replaceme
CONSTR = 'mysql+pymysql://root:#127.0.0.1'
sqlEngine = create_engine(CONSTR, pool_recycle=3600)
dbConnection = sqlEngine.connect()
df = pd.read_sql("select * from employees", dbConnection);
rows = df.to_json(orient='records')
es = Elasticsearch()
actions=[]
for item in rows:
action = {
#replace me if need to
"_id": "employee_%s"%item['id'],
"doc_type": "_doc",
"doc": item
}
actions.append(action)
response = helpers.bulk(es, actions, index="employees", doc_type='_doc')
dbConnection.close()
Dump out a CSV file (SELECT .. INTO OUTFILE) from MySQL, Load that into Elasticsearch.

What are the possible ways for JSON data processing using SQL, elastic search or preprocessing using python

I have a case study where i need to take data from a REST API do some analysis on the data using aggregate function,joins etc and use the response data in JSON format to plot some retail grahs.
Approaches being followed till now:
Read the data from JSON store these in python variable and use insert to hit the SQL query. Obviously it is a costly operation because for every JSON line read it is inserting into database.For 33k rows it is taking more than 20 mins which is inefficient.
This can be handled in elastic search for faster processing but complex operation like joins are not present in elastic search.
If anybody can suggest what would be the best approach (like preprocessing or post processing in python) to follow for handling such scenerios it would be helpful.
Thanks in advance
Sql Sript
def store_data(AccountNo)
db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo) VALUES (%s)"
cursor.execute(insert_query, (AccountNo))
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
#This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
#print (len(datajson))
#grab the wanted data from the Tweet
for i in range(len(datajson)):
for cosponsor in datajson[i]:
AccountNo=cosponsor['AccountNo']
store_data( AccountNo)
Edit1: Json Added
{
"StartDate": "1/1/18",
"EndDate": "3/30/18",
"Transactions": [
{
"CSPAccountNo": "41469300",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "00980030003",
"POSCodeModifier": "0",
"Description": "TIC TAC GUM WATERMEL",
"ActualSalesPrice": 1.59,
"TotalCount": 1,
"Totalsales": 1.59,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
},
{
"CSPAccountNo": "41469378",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "01070080727",
"POSCodeModifier": "0",
"Description": "PAYDAY KS",
"ActualSalesPrice": 2.09,
"TotalCount": 1,
"Totalsales": 2.09,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
}
]
}
I do not have your json file so not know if it is runnable, but I would have tried something like below: I read just your account infos to a list and than try to write to the db at once with executemany I expect it to have a better(less) execution time than 20 mins.
def store_data(AccountNo):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo,ZIP,ReportDate) VALUES (:AccountNo,:ZIP,:ReportDate)"
cursor.executemany(insert_query, AccountNo)
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
# This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
#declare an empty list for the all accountno's
accountno_list = list()
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
# print (len(datajson))
# grab the wanted data from the Tweet
for row in datajson[0]['Transactions']:
values = dict()
values['AccountNo'] = row['CSPAccountNo']
values['ZIP'] = row['ZIP']
values['ReportDate'] = row['ReportDate']
#from here on you can populate the attributes you need in a similar way..
accountno_list.append(values)
except:
pass
store_data(accountno_list)

Categories

Resources