building json data from sql database cursor - python

Without knowing the structure of the json, how can I return a json object from the database query? All of the the information is there, I just can't figure out how to build the object.
import MySQLdb
import json
db = MySQLdb.connect( host, user, password, db)
cursor = db.cursor()
cursor.execute( query )
rows = cursor.fetchall()
field_names = [i[0] for i in cursor.description]
json_string = json.dumps( dict(rows) )
print field_names[0]
print field_names[1]
print json_string
db.close()
count
severity
{"321": "7.2", "1": "5.0", "5": "4.3", "7": "6.8", "1447": "9.3", "176": "10.0"}
The json object would look like:
{"data":[{"count":"321","severity":"7.2"},{"count":"1","severity":"5.0"},{"count":"5","severity":"4.3"},{"count":"7","severity":"6.8"},{"count":"1447","severity":"9.3"},{"count":"176","severity":"10.0"}]}

The problem you are encountering happens because you only turn the fetched items into dicts, without their description.
dict in python expects either another dict, or an iterable returning two-item tuples, where for each tuple the first item will be the key, and the second the value.
Since you only fetch two columns, you get the first one (count) as key, and the second (severity) as value for each fetched row.
What you want to do is also combine the descriptions, like so:
json_string = json.dumps([
{description: value for description, value in zip(field_names, row)}
for row in rows])

1- You can use pymsql DictCursor:
import pymysql
connection = pymysql.connect(db="test")
cursor = connection.cursor(pymysql.cursors.DictCursor)
cursor.execute("SELECT ...")
row = cursor.fetchone()
print row["key"]
2- MySQLdb also includes DictCursor that you can use. You need to pass cursorclass=MySQLdb.cursors.DictCursor when making the connection.
import MySQLdb
import MySQLdb.cursors
connection = MySQLdb.connect(db="test",cursorclass=MySQLdb.cursors.DictCursor)
cursor = connection.cursor()
cursor.execute("SELECT ...")
row = cursor.fetchone()
print row["key"]

I got this to work using Collections library, although the code is confusing:
import MySQLdb
import json
import collections
db = MySQLdb.connect(host, user, passwd, db)
cursor = db.cursor()
cursor.execute( query )
rows = cursor.fetchall()
field_names = [i[0] for i in cursor.description]
objects_list = []
for row in rows:
d = collections.OrderedDict()
d[ field_names[0] ] = row[0]
d[ field_names[1] ] = row[1]
objects_list.append(d)
json_string = json.dumps( objects_list )
print json_string
db.close()
[{"count": 176, "severity": "10.0"}, {"count": 1447, "severity": "9.3"}, {"count": 321, "severity": "7.2"}, {"count": 7, "severity": "6.8"}, {"count": 1, "severity": "5.8"}, {"count": 1, "severity": "5.0"}, {"count": 5, "severity": "4.3"}]

Related

Json Response to pyodbc Better way

This works, but I have to do 30 of 100 felids of the response. Is there a better way?
for record in data:
record["lastName"] = record["lastName"].replace("'","''")
record["lastName"] = record["lastName"].replace("'","''")
cursor.execute("Insert Into emp_temp (employeeId, firstName, lastName) values ('" + record["employeeId"] +"','"+ record["firstName"] + "','"+ record["lastName"] +"')")
cursor.commit()
cursor.close()
conn.close()```
Assuming that json.loads() is giving you a simple list of dict objects then that is precisely the format that can be directly consumed by SQLAlchemy:
# https://stackoverflow.com/q/67129218/2144390
import json
import sqlalchemy as sa
response_text = '''\
[{"employeeId": 1, "firstName": "Gord", "lastName": "Thompson"},
{"employeeId": 2, "firstName": "Bob", "lastName": "Loblaw"}]'''
data = json.loads(response_text)
print(type(data)) # <class 'list'>
print(type(data[0])) # <class 'dict'>
engine = sa.create_engine("mssql+pyodbc://#mssqlLocal64")
emp_temp = sa.Table("emp_temp", sa.MetaData(), autoload_with=engine)
with engine.begin() as conn:
conn.execute(emp_temp.insert(), data)
# check results
with engine.begin() as conn:
results = conn.execute(sa.text("SELECT * FROM emp_temp")).fetchall()
print(results)
# [(1, 'Gord', 'Thompson'), (2, 'Bob', 'Loblaw')]

duplicate key error collection: mydatabase.customers index: _id_ dup key

I want to check if the user exists. If user does exist, do not insert it.
This is my code:
#!/usr/bin/env python
# Python code to illustrate
# inserting data in MongoDB
from pymongo import MongoClient
try:
myclient = MongoClient('10.1.3.18',27017)
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
emp_rec1 = {
"name":"Mr.Geek1",
"eid":24,
"location":"delhi"
}
emp_rec2 = {
"name":"Mr.Shaurya",
"eid":14,
"location":"delhi"
}
emp_rec3 = {
"name":"Mr.Shaurya111",
"eid":141111,
"location":"delhi111111"
}
a=[emp_rec1,emp_rec2,emp_rec3]
mydb = myclient["mydatabase"]
#result = mydb.profiles.create_index([('user_id'],unique=True)
mycol = mydb["customers"]
#x = mycol.insert_one(a[2])
cursor = mycol.find()
for record in cursor:
print(record)
mydb.servers.getIndexes()
if record['name']!="Mr.Shaurya":
x = mycol.insert_one(a[0])
print(record)
Is this code correct? Or there is an other solution?
and if I run my code twice I get this error:
pymongo.errors.DuplicateKeyError: E11000 duplicate key error collection:
mydatabase.customers index: _id_ dup key: { :ObjectId('5d7b9a6bc9a8569a44a6da2c') }
How to prevent duplicated key in MongoDB?
How can I use index?
Assuming that eid is your Primary Key column that is causing the Duplicate Key error, the code below should fix your problem.
When you loop through cursor, you want to also loop through you array a and see if the eid in a also exists in database cursor. If it does not exist, then record will be inserted using x = mycol.insert_one(a[0])
Code:
#!/usr/bin/env python
# Python code to illustrate
# inserting data in MongoDB
from pymongo import MongoClient
try:
myclient = MongoClient('10.1.3.18',27017)
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
emp_rec1 = {
"name":"Mr.Geek1",
"eid":24,
"location":"delhi"
}
emp_rec2 = {
"name":"Mr.Shaurya",
"eid":14,
"location":"delhi"
}
emp_rec3 = {
"name":"Mr.Shaurya111",
"eid":141111,
"location":"delhi111111"
}
a=[emp_rec1,emp_rec2,emp_rec3]
mydb = myclient["mydatabase"]
#result = mydb.profiles.create_index([('user_id'],unique=True)
mycol = mydb["customers"]
#x = mycol.insert_one(a[2])
cursor = mycol.find()
for record in cursor:
print(record)
mydb.servers.getIndexes()
for i in a:
if record['eid'] != i['eid']:
x = mycol.insert_one(a[0])
print(record)
If you want to strictly check only the name of user in customers table, you can use the following query.
Code:
#!/usr/bin/env python
# Python code to illustrate
# inserting data in MongoDB
from pymongo import MongoClient
try:
myclient = MongoClient('10.1.3.18',27017)
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
emp_rec1 = {
"name":"Mr.Geek1",
"eid":24,
"location":"delhi"
}
emp_rec2 = {
"name":"Mr.Shaurya",
"eid":14,
"location":"delhi"
}
emp_rec3 = {
"name":"Mr.Shaurya111",
"eid":141111,
"location":"delhi111111"
}
a=[emp_rec1,emp_rec2,emp_rec3]
mydb = myclient["mydatabase"]
#result = mydb.profiles.create_index([('user_id'],unique=True)
mycol = mydb["customers"]
#x = mycol.insert_one(a[2])
cursor = mycol.find()
for record in cursor:
print(record)
mydb.servers.getIndexes()
for i in a:
if record['name'] != i['name']:
x = mycol.insert_one(a[0])
print(record)

What are the possible ways for JSON data processing using SQL, elastic search or preprocessing using python

I have a case study where i need to take data from a REST API do some analysis on the data using aggregate function,joins etc and use the response data in JSON format to plot some retail grahs.
Approaches being followed till now:
Read the data from JSON store these in python variable and use insert to hit the SQL query. Obviously it is a costly operation because for every JSON line read it is inserting into database.For 33k rows it is taking more than 20 mins which is inefficient.
This can be handled in elastic search for faster processing but complex operation like joins are not present in elastic search.
If anybody can suggest what would be the best approach (like preprocessing or post processing in python) to follow for handling such scenerios it would be helpful.
Thanks in advance
Sql Sript
def store_data(AccountNo)
db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo) VALUES (%s)"
cursor.execute(insert_query, (AccountNo))
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
#This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
#print (len(datajson))
#grab the wanted data from the Tweet
for i in range(len(datajson)):
for cosponsor in datajson[i]:
AccountNo=cosponsor['AccountNo']
store_data( AccountNo)
Edit1: Json Added
{
"StartDate": "1/1/18",
"EndDate": "3/30/18",
"Transactions": [
{
"CSPAccountNo": "41469300",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "00980030003",
"POSCodeModifier": "0",
"Description": "TIC TAC GUM WATERMEL",
"ActualSalesPrice": 1.59,
"TotalCount": 1,
"Totalsales": 1.59,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
},
{
"CSPAccountNo": "41469378",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "01070080727",
"POSCodeModifier": "0",
"Description": "PAYDAY KS",
"ActualSalesPrice": 2.09,
"TotalCount": 1,
"Totalsales": 2.09,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
}
]
}
I do not have your json file so not know if it is runnable, but I would have tried something like below: I read just your account infos to a list and than try to write to the db at once with executemany I expect it to have a better(less) execution time than 20 mins.
def store_data(AccountNo):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo,ZIP,ReportDate) VALUES (:AccountNo,:ZIP,:ReportDate)"
cursor.executemany(insert_query, AccountNo)
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
# This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
#declare an empty list for the all accountno's
accountno_list = list()
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
# print (len(datajson))
# grab the wanted data from the Tweet
for row in datajson[0]['Transactions']:
values = dict()
values['AccountNo'] = row['CSPAccountNo']
values['ZIP'] = row['ZIP']
values['ReportDate'] = row['ReportDate']
#from here on you can populate the attributes you need in a similar way..
accountno_list.append(values)
except:
pass
store_data(accountno_list)

How can I convert array of arrays to array of objects in Flask?

So I have this code:
#app.route("/")
def users():
try:
c, conn = connection()
c.execute(''' SELECT * FROM users ''')
rv = c.fetchall()
return jsonify(rv)
except Exception as e:
return(str(e))
and the output looks like this:
[
[
1,
"Robert Soriano",
"sorianorobertc#gmail.com",
24
],
[
2,
"Charmaine Villar",
"charmandervillar#hotmail.com\r\n",
23
]
]
but since I am doing a restful api, I want it to be in objects like:
[
{
1,
"Robert Soriano",
"sorianorobertc#gmail.com",
24
},
{
2,
"Charmaine Villar",
"charmandervillar#hotmail.com\r\n",
23
}
]
I tried something like:
try:
c, conn = connection()
c.execute(''' SELECT * FROM users ''')
rv = list(c.fetchall()) #converted to list
return jsonify(rv)
except Exception as e:
return(str(e))
But I still get the same output.
Am I missing something here? Thanks in advance
I don't know the db driver you use. I test with pymysql. The result of cursor.fetchall() and elements in it are all tuple. You can use collections.namedtuple to assign field name to each element for each record.
Record = collections.namedtuple("Record", ["id", "name", "email", "age"])
result = cursor.fetchall()
records = [Record(*record)._asdict() for record in result]
There is a excellent tool Records: SQL for Humans™ has do it for you.

Iterating rows with Pyodbc

I am using Pyodbc to return a number of rows which are dumped into a JSON and sent to a server. I would like to iterate my SQL table and return all records. I am using cursor.fetchall() now, and the program returns one record. As shown below. When I use fetchone an error is returned AttributeError: 'unicode' object has no attribute 'SRNUMBER' and fetchmany returns one record as well. How do I successfully return all records? I am using Python 2.6.7
Code:
import pyodbc
import json
import collections
import requests
connstr = 'DRIVER={SQL Server};SERVER=server;DATABASE=ServiceRequest; UID=SA;PWD=pwd'
conn = pyodbc.connect(connstr)
cursor = conn.cursor()
cursor.execute("""
SELECT SRNUMBER, FirstName, LastName, ParentNumber
FROM MYLA311 """)
rows = cursor.fetchone()
objects_list = []
for row in rows:
d = collections.OrderedDict()
d['SRNUMBER']= row.SRNUMBER
d['FirstName']= row.FirstName
d['LastName']= row.LastName
d['ParentNumber']= row.ParentNumber
objects_list.append(d)
output = {"MetaData": {},
"SRData": d}
print output
j = json.dumps(output)
print json.dumps(output, sort_keys=True, indent=4)`
Output for fetchall and fetchmany:
{
"MetaData": {},
"SRData": {
"FirstName": "MyLAG",
"LastName": "ThreeEleven",
"ParentNumber": "021720151654176723",
"SRNUMBER": "1-3580171"
}
}
Use code from my answer here to build a list of dictionaries for the value of output['SRData'], then JSON encode the output dict as normal.
import pyodbc
import json
connstr = 'DRIVER={SQL Server};SERVER=server;DATABASE=ServiceRequest; UID=SA;PWD=pwd'
conn = pyodbc.connect(connstr)
cursor = conn.cursor()
cursor.execute("""SELECT SRNUMBER, FirstName, LastName, ParentNumber FROM MYLA311""")
# build list of column names to use as dictionary keys from sql results
columns = [column[0] for column in cursor.description]
results = []
for row in cursor.fetchall():
results.append(dict(zip(columns, row)))
output = {"MetaData": {}, "SRData": results}
print(json.dumps(output, sort_keys=True, indent=4))
For starters, the line
objects_list.append(d)
needs to be inside the for loop, not outside.

Categories

Resources