How to update records only storing changes? - python

My backend using Python and Flask splits JSON data into various endpoints, to retrieve in my client Swift app rather than having to download the full data client-side.
JSON file :
{
"pilots": [
{
"cid": 1234567,
"name": "John Smith",
"callsign": "TIA1",
"server": "USA-WEST",
"pilot_rating": 3,
"latitude": 18.42663,
"longitude": 116.15007,
"altitude": 41038,
"groundspeed": 435,
"transponder": "2200",
"heading": 154,
"qnh_i_hg": 29.96,
"qnh_mb": 1015,
"flight_plan": {
"flight_rules": "I",
"aircraft": "B737/M-VGDW/C",
"aircraft_faa": "B737/L",
"aircraft_short": "B737",
"departure": "LTBA",
"arrival": "WAMM",
"alternate": "",
"cruise_tas": "437",
"altitude": "33000",
"deptime": "1230",
"enroute_time": "1415",
"fuel_time": "1542",
"remarks": "PBN/B1D1O1S1 DOF/221107 REG/VPCTY EET/LTAA0020 UDDD0137 UBBA0151 UTAK0222 UTAA0247 UTAV0309 UTSD0322 UTTR0345 UAII0352 UTTR0354 UCFO0412 UCFM0434 ZWUQ0451 ZLHW0606 ZPKM0741 ZGZU0856 VHHK0946 RPHI1020 WAAF1251 SEL/EJKS CODE/ADF5D2 OPR/TEXAS AIR LLC ORGN/KCHIUALE PER/C RMK/CALLSIGN \"TEXAS\" /V/",
"route": "ASMAP UL333 SIV UA4 ERZ UB374 INDUR N449 DUKAN A480 KRS B701 TUGTA A909 BABUM A477 POGON L143 TISIB L141 KAMUD W186 SADAN Y1 OMBON B330 AVPAM A599 POU B330 CH A583 ZAM A461 BONDA",
"revision_id": 4,
"assigned_transponder": "0363"
},
"logon_time": "2022-11-06T07:07:42.1130925Z",
"last_updated": "2022-11-07T22:36:19.1087668Z"
}
}
I import JSON into various SQLite tables. JSON data is updated every 60 seconds, so I need to update my copy accordingly. My current solution is to delete data in database then reinsert, but that's most likely not the right way. I'm not sure how I should go about diffing records in database against latest JSON, I could retrieve all records then compare old and new line by line, but this could be even less efficient. What's a robust way of doing this in Python?
Code to insert pilots and associated flight plans :
def _store_pilots(pilots):
"""Removes all records from the db, then stores pilots and associated flight plans, checking for duplicate CIDs."""
pilots_list = []
cid_list = []
fp_list = []
for pilot in pilots:
cid = int(pilot['cid'])
if cid in cid_list:
continue
cid_list.append(cid)
pilot_tuple = (
pilot['cid'], pilot['name'],
pilot['callsign'], pilot['server'],
pilot['pilot_rating'],
pilot['latitude'], pilot['longitude'],
pilot['altitude'], pilot['groundspeed'],
pilot['transponder'], pilot['heading'],
pilot['qnh_i_hg'], pilot['qnh_mb'],
pilot['logon_time'], pilot['last_updated']
)
pilots_list.append(pilot_tuple)
if pilot['flight_plan']:
fp = pilot['flight_plan']
fp_tuple = (
pilot['cid'], fp['flight_rules'],
fp['aircraft'], fp['aircraft_faa'], fp['aircraft_short'],
fp['departure'], fp['arrival'], fp['alternate'],
fp['cruise_tas'], fp['altitude'],
fp['deptime'], fp['enroute_time'], fp['fuel_time'],
fp['remarks'], fp['route'], fp['revision_id']
)
fp_list.append(fp_tuple)
with sqlite3.connect(DATABASE_PATH) as connection:
connection.execute("PRAGMA foreign_keys = 1")
c = connection.cursor()
c.execute("DELETE FROM pilots")
c.executemany("""
INSERT INTO pilots(cid, name, callsign, server, pilot_rating, latitude, longitude, altitude, groundspeed, transponder, heading, qnh_i_hg, qnh_mb, logon_time, last_updated)
values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", pilots_list)
c.executemany("""
INSERT INTO flight_plans VALUES (?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?
)
""", fp_list)

You can create a table with columns cid, raw_json_value, json_hash and check hash value before update / insert. Here is an example:
data = [{'cid': 1, ...}, {'cid': 2, ...}, {'cid': 3, ...}]
for item in data: # type: dict
cid = item['cid']
json_hash = hash(json.dumps(data))
# Record - let's say a model from db
record = get_record_by_cid(cid)
if not record:
save_record(cid, item, json_hash)
continue
if record.json_hash != json_hash:
update_record(cid, item, json_hash)

Related

mapping excel columns to table column in database with python

json file
"mappingdef": [
{
"src": "A",
"dest": "id"
},
{
"src": "B",
"dest": "expense_type"
},
{
"src": "C",
"dest": "balance"
},
{
"src": "D",
"dest": "debit"
},
{
"src": "E",
"dest": "credit"
},
{
"src": "F",
"dest": "total_balance"
}
]
my python script:
#changing excel column names
df.columns = ["A", "B", "C", "D", "E", "F"]
#fetching data from dataframe
for row in range(df.shape[0]):
col_A = str(df.at[row, "A"]),
col_B = str(df.at[row, "B"]),
col_C = float(df.at[row, "C"]),
col_D = float(df.at[row, "D"]),
col_E = float(df.at[row, "E"]),
col_F = float(df.at[row, "F"])
#query to insert data in database
query2 = """
INSERT INTO ocean_street_apartments(
id,
expense_type,
balance,
debit,
credit,
total_balance)
values (%s, %s, %s, %s, %s, %s)
"""
i have this table definition info in json which tells src as excel column, and dest as database table column name. i want to read an excel file through pandas and want to map excel column (src) to database table column (dest). i am working in python
Assuming that its JSON file so its an API get response.
Things i am assuming you know how to do:
1)fetch get response and what is returned is an array of object descriptions for every file.
2)create script to download this and move it to a DF.
Now you have a a list of direct links to our csv files! We can read these urls directly using pandas.read_csv(url).
If data is problematic transform them.
It's time to Directly Load DF into a SQL DB using pandas.DataFrame.to_sql
Code below describes how to connect to a SQLite db.
def upload_to_sql(filenames, db_name, debug=False):
""" Given a list of paths, upload to a database
"""
conn = sqlite3.connect(f"{db_name}.db")
if debug:
print("Uploading into database")
for i, file_path in tqdm(list(enumerate(filenames))):
dat = pd.read_csv(file_path)
# rename labels
filename = os.path.basename(file_path).split('.')[0]
dat = factor_dataframe(dat, filename)
# write records to sql database
if i == 0: # if first entry, and table name already exist, replace
dat.to_sql(db_name, con=conn, index = False, if_exists='replace')
else: # otherwise append to current table given db_name
dat.to_sql(db_name, con=conn, index = False, if_exists='append')
# upload into sql database
upload_to_sql(download_urls, 'example', debug=True)
import psycopg2
import ijson
conn = psycopg2.connect(
host="localhost",
database="sea",
user="postgres",
password="hemant888")
cursor = conn.cursor()
chunk_size = 10
skiprows = 5
file_name = "Ocean Street Apartments Trial Balance 03-22.xlsx"
cursor.execute("""
SELECT COUNT(*)
FROM information_schema.tables
WHERE table_name = 'ocean_street_apartments'
""")
if cursor.fetchone()[0] != 1:
columns = list()
datatype = list()
row = list()
with open("tabledef.json", "r") as f:
for record in ijson.items(f, "item"):
for i in record["def"]["tabledef"]["columns"]:
col = i["name"]
columns.append(col)
dt = i["datatype"]
datatype.append(dt)
for i in range(len(columns)):
row.append("{col} {dt}".format(col=columns[i], dt=datatype[i]))
query1 = "create table ocean_street_apartments(" + \
",".join(map(str, row)) + ")"
cursor.execute(query1)
conn.commit()
while True:
df_chunk = pd.read_excel(file_name, skiprows=skiprows,
nrows=chunk_size)
skiprows += chunk_size
# When there is no data, we know we can break out of the loop.
if not df_chunk.shape[0]:
break
else:
columns = list()
columns_table = list()
with open("tabledef.json", "r") as f:
for record in ijson.items(f, "item"):
for i in record["def"]["tabledef"]["mappingdef"]:
col = i["src"]
columns.append(col)
col_table = i["dest"]
columns_table.append(col_table)
query2 = "INSERT INTO ocean_street_apartments(" + ",".join(
map(str, columns_table)) + ")values (%s, %s, %s, %s, %s, %s)"
df_chunk.columns = columns
values_list = list()
for row in range(df_chunk.shape[0]):
for col in df_chunk.columns:
val = str(df_chunk.at[row, col])
values_list.append(val)
values = tuple(values_list)
cursor.execute(query2, values)
values_list = list(values)
values_list.clear()
values = tuple(values_list)
conn.commit()
conn.close()

mariadb-connector executemany throws DataError

I'm trying to implement a few simple SQL insert statements with python-mariadb-connector but cannot figure out what I'm doing wrong.
The database looks like this:
SET FOREIGN_KEY_CHECKS = false;
CREATE OR REPLACE TABLE `forums` (
`id` int(10) unsigned NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE OR REPLACE TABLE `accounts` (
`id` int(10) unsigned NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE OR REPLACE TABLE `posts` (
`id` int(10) unsigned NOT NULL,
`forum_id` int(10) unsigned NOT NULL,
`account_id` int(10) unsigned NOT NULL,
PRIMARY KEY (`id`),
KEY `posts_forum_fk` (`forum_id`),
KEY `posts_account_fk` (`account_id`),
CONSTRAINT `posts_account_fk` FOREIGN KEY (`account_id`) REFERENCES `accounts` (`id`),
CONSTRAINT `posts_forum_fk` FOREIGN KEY (`forum_id`) REFERENCES `forums` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE OR REPLACE TABLE `comments` (
`id` int(10) unsigned NOT NULL,
`post_id` int(10) unsigned NOT NULL,
`account_id` int(10) unsigned NOT NULL,
`parent_id` int(10) unsigned DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `comments_post_fk` (`post_id`),
KEY `comments_account_fk` (`account_id`),
-- KEY `comments_comments_fk` (`parent_id`),
-- CONSTRAINT `comments_comments_fk` FOREIGN KEY (`parent_id`) REFERENCES `comments` (`id`),
CONSTRAINT `comments_account_fk` FOREIGN KEY (`account_id`) REFERENCES `accounts` (`id`),
CONSTRAINT `comments_post_fk` FOREIGN KEY (`post_id`) REFERENCES `posts` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
SET FOREIGN_KEY_CHECKS = true;
The code for inserting data looks like this:
import mariadb
config = {
"user": "db_user_name",
"password": "db_passwd",
"host": "db_host",
"port": 3306,
"database": "db_name"
}
if __name__ == '__main__':
with mariadb.connect(**config) as conn:
cur = conn.cursor()
cur.executemany(
"INSERT INTO `forums` (`id`) VALUES (?)",
[(1,), (2,), (3,)]
)
cur.executemany(
"INSERT INTO `accounts` (`id`) VALUES (?)",
[(1,), (2,), (3,), (4,)]
)
cur.executemany(
"INSERT INTO `posts` (`id`, `forum_id`, `account_id`) VALUES (?, ?, ?)",
[(6, 3, 1)]
)
cur.executemany(
"INSERT INTO `comments` (`id`, `post_id`, `account_id`, `parent_id`) VALUES (?, ?, ?, ?)",
[(1, 6, 1, None), (2, 6, 2, 1)]
) # exception happens here
When executing this, I get the following error:
Traceback (most recent call last):
File ".../db_test.py", line 28, in <module>
cur.executemany(
mariadb.DatabaseError.DataError: Invalid parameter type at row 2, column 4
Im not sure how executemany is implemented but I think it should do something similar to the following SQL-query:
INSERT INTO `forums` (`id`) VALUES (1), (2), (3);
INSERT INTO `accounts` (`id`) VALUES (1), (2), (3), (4);
INSERT INTO `posts` (`id`, `forum_id`, `account_id`) VALUES (6, 3, 1);
INSERT INTO `comments` (`id`, `post_id`, `account_id`, `parent_id`)
VALUES (1, 6, 1, NULL), (2, 6, 2, 1);
which works just fine for me...
Is it a bug or am I doing something wrong here?
It took me a while
cur.executemany(
"INSERT INTO `comments` (`id`, `post_id`, `account_id`, `parent_id`) VALUES (?, ?, ?, ?)",
[(1, 6, 1, None), (2, 6, 2, 1)]
)
But in your comment table, you have this constraint
CONSTRAINT `comments_comments_fk` FOREIGN KEY (`parent_id`)
REFERENCES `comments` (`id`),
Before you can enter (2, 6, 2, 1) the tuple (1, 6, 1, None) has to already commit in the database and in a bulk insert the commit is made after all inserts are in the database, but the first tuple isn't at the time not there
so if you make this, both rows will appear in the database(i also committed all other tables after bulk insert):
MariaDB
mycursor.execute(
"INSERT INTO `comments` (`id`, `post_id`, `account_id`,
`parent_id`) VALUES (?, ?, ?, ?)",
(1, 6, 1,None ))
mydb.commit()
mycursor.executemany(
"INSERT INTO `comments` (`id`, `post_id`, `account_id`, `parent_id`) VALUES (?, ?, ?, ?)",
[ (2, 6, 2, 1)])
mydb.commit()
MySQL
sql = """INSERT INTO forums (id) VALUES (%s)"""
val = [("1",), ("2",), ("3",)]
mycursor.executemany(sql,val)
mydb.commit()
mycursor.executemany(
"INSERT INTO accounts (id) VALUES (%s)",
[(1,), (2,), (3,), (4,)]
)
mydb.commit()
mycursor.executemany(
"INSERT INTO posts (id, forum_id, account_id) VALUES (%s, %s, %s)",
[(6, 3, 1)]
)
mydb.commit()
mycursor.execute(
"INSERT INTO `comments` (`id`, `post_id`, `account_id`, `parent_id`) VALUES (%s, %s, %s, %s)",
(1, 6, 1,None ))
mydb.commit()
mycursor.executemany(
"INSERT INTO `comments` (`id`, `post_id`, `account_id`, `parent_id`) VALUES (%s, %s, %s, %s)",
[ (2, 6, 2, 1)])
mydb.commit()

What are the possible ways for JSON data processing using SQL, elastic search or preprocessing using python

I have a case study where i need to take data from a REST API do some analysis on the data using aggregate function,joins etc and use the response data in JSON format to plot some retail grahs.
Approaches being followed till now:
Read the data from JSON store these in python variable and use insert to hit the SQL query. Obviously it is a costly operation because for every JSON line read it is inserting into database.For 33k rows it is taking more than 20 mins which is inefficient.
This can be handled in elastic search for faster processing but complex operation like joins are not present in elastic search.
If anybody can suggest what would be the best approach (like preprocessing or post processing in python) to follow for handling such scenerios it would be helpful.
Thanks in advance
Sql Sript
def store_data(AccountNo)
db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo) VALUES (%s)"
cursor.execute(insert_query, (AccountNo))
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
#This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
#print (len(datajson))
#grab the wanted data from the Tweet
for i in range(len(datajson)):
for cosponsor in datajson[i]:
AccountNo=cosponsor['AccountNo']
store_data( AccountNo)
Edit1: Json Added
{
"StartDate": "1/1/18",
"EndDate": "3/30/18",
"Transactions": [
{
"CSPAccountNo": "41469300",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "00980030003",
"POSCodeModifier": "0",
"Description": "TIC TAC GUM WATERMEL",
"ActualSalesPrice": 1.59,
"TotalCount": 1,
"Totalsales": 1.59,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
},
{
"CSPAccountNo": "41469378",
"ZIP": "60098",
"ReportDate": "2018-03-08T00:00:00",
"POSCode": "01070080727",
"POSCodeModifier": "0",
"Description": "PAYDAY KS",
"ActualSalesPrice": 2.09,
"TotalCount": 1,
"Totalsales": 2.09,
"DiscountAmount": 0,
"DiscountCount": 0,
"PromotionAmount": 0,
"PromotionCount": 0,
"RefundAmount": 0,
"RefundCount": 0
}
]
}
I do not have your json file so not know if it is runnable, but I would have tried something like below: I read just your account infos to a list and than try to write to the db at once with executemany I expect it to have a better(less) execution time than 20 mins.
def store_data(AccountNo):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO cstore (AccountNo,ZIP,ReportDate) VALUES (:AccountNo,:ZIP,:ReportDate)"
cursor.executemany(insert_query, AccountNo)
db.commit()
cursor.close()
db.close()
return
def on_data(file_path):
# This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
#declare an empty list for the all accountno's
accountno_list = list()
# Decode the JSON from Twitter
testFile = open(file_path)
datajson = json.load(testFile)
# print (len(datajson))
# grab the wanted data from the Tweet
for row in datajson[0]['Transactions']:
values = dict()
values['AccountNo'] = row['CSPAccountNo']
values['ZIP'] = row['ZIP']
values['ReportDate'] = row['ReportDate']
#from here on you can populate the attributes you need in a similar way..
accountno_list.append(values)
except:
pass
store_data(accountno_list)

building json data from sql database cursor

Without knowing the structure of the json, how can I return a json object from the database query? All of the the information is there, I just can't figure out how to build the object.
import MySQLdb
import json
db = MySQLdb.connect( host, user, password, db)
cursor = db.cursor()
cursor.execute( query )
rows = cursor.fetchall()
field_names = [i[0] for i in cursor.description]
json_string = json.dumps( dict(rows) )
print field_names[0]
print field_names[1]
print json_string
db.close()
count
severity
{"321": "7.2", "1": "5.0", "5": "4.3", "7": "6.8", "1447": "9.3", "176": "10.0"}
The json object would look like:
{"data":[{"count":"321","severity":"7.2"},{"count":"1","severity":"5.0"},{"count":"5","severity":"4.3"},{"count":"7","severity":"6.8"},{"count":"1447","severity":"9.3"},{"count":"176","severity":"10.0"}]}
The problem you are encountering happens because you only turn the fetched items into dicts, without their description.
dict in python expects either another dict, or an iterable returning two-item tuples, where for each tuple the first item will be the key, and the second the value.
Since you only fetch two columns, you get the first one (count) as key, and the second (severity) as value for each fetched row.
What you want to do is also combine the descriptions, like so:
json_string = json.dumps([
{description: value for description, value in zip(field_names, row)}
for row in rows])
1- You can use pymsql DictCursor:
import pymysql
connection = pymysql.connect(db="test")
cursor = connection.cursor(pymysql.cursors.DictCursor)
cursor.execute("SELECT ...")
row = cursor.fetchone()
print row["key"]
2- MySQLdb also includes DictCursor that you can use. You need to pass cursorclass=MySQLdb.cursors.DictCursor when making the connection.
import MySQLdb
import MySQLdb.cursors
connection = MySQLdb.connect(db="test",cursorclass=MySQLdb.cursors.DictCursor)
cursor = connection.cursor()
cursor.execute("SELECT ...")
row = cursor.fetchone()
print row["key"]
I got this to work using Collections library, although the code is confusing:
import MySQLdb
import json
import collections
db = MySQLdb.connect(host, user, passwd, db)
cursor = db.cursor()
cursor.execute( query )
rows = cursor.fetchall()
field_names = [i[0] for i in cursor.description]
objects_list = []
for row in rows:
d = collections.OrderedDict()
d[ field_names[0] ] = row[0]
d[ field_names[1] ] = row[1]
objects_list.append(d)
json_string = json.dumps( objects_list )
print json_string
db.close()
[{"count": 176, "severity": "10.0"}, {"count": 1447, "severity": "9.3"}, {"count": 321, "severity": "7.2"}, {"count": 7, "severity": "6.8"}, {"count": 1, "severity": "5.8"}, {"count": 1, "severity": "5.0"}, {"count": 5, "severity": "4.3"}]

Insert dictionary keys values into a MySQL database

I have executed this code to insert a dictionary into my table in database,
d = {'err': '0', 'tst': '0', 'Type o': 'FTP', 'recip': 'ADMIN', 'id': '101', 'origin': 'REPORT', 'Type recip': 'SMTP', 'date': '2010-01-10 18:47:52'}
db = MySQLdb.connect("localhost","admin","password","database")
cursor = db.cursor()
cursor.execute("""INSERT INTO mytable(ID, ERR, TST, DATE, ORIGIN, TYPE_O, RECIP, TYPE_RECIP) VALUES (%(id)s, %(err)s, %(tst)s, %(date)s, %(origin)s, %(Type o)s, %(recip)s, %(Type recip)s)""", d)
db.commit()
db.close()
Create statement of my table:
CREATE TABLE mytable (
`ID` tinyint unsigned NOT NULL,
`ERR` tinyint NOT NULL,
`TST` tinyint unsigned NOT NULL,
`DATE` datetime NOT NULL,
`ORIGIN` varchar(30) NOT NULL,
`TYPE_O` varchar(10) NOT NULL,
`RECIP` varchar(30) NOT NULL,
`TYPE_RECIP` varchar(10) NOT NULL,
PRIMARY KEY (`ID`,`DATE`)
) ENGINE = InnoDB;
But i have an error, it says:
1064, "you have an error in your SQL syntax; check the manual that
corresponds to you MySQL server version... )
Be aware of SQL injections and use the second argument to execute for inserting your query parameters:
cursor.execute("""
INSERT INTO
table
(name, age, origin, date)
VALUES
(%(name)s, %(age)s, %(origin)s, %(date)s)
""", d)

Categories

Resources