Parse SQL to extract column and table names using python

Parse SQL to extract column and table names using python - python

I want to write a code that will extract table and column names from a query that does not have JOIN keyword. Instead, the cartesian join (,) is used as below:
SELECT suppliers.supplier_name, subquery1.total_amt
FROM suppliers
,
(SELECT supplier_id, SUM(orders.amount) AS total_amt
FROM orders
GROUP BY supplier_id) subquery1
WHERE subquery1.supplier_id = suppliers.supplier_id;"""
I tried using the below code but its not working in python 2.7 as i'm getting the error : Bool object not callable at line 21:
import itertools
import sqlparse
from sqlparse.sql import IdentifierList, Identifier
from sqlparse.tokens import Keyword, DML
def is_subselect(parsed):
if not parsed.is_group():
return False
for item in parsed.tokens:
if item.ttype is DML and item.value.upper() == 'SELECT':
return True
return False
def extract_from_part(parsed):
from_seen = False
print 'hi'
for item in parsed.tokens:
if item.is_group():
print 'group'
for x in extract_from_part(item):
yield x
if from_seen:
print 'from'
if is_subselect(item):
for x in extract_from_part(item):
yield x
elif item.ttype is Keyword and item.value.upper() in ['ORDER', 'GROUP', 'BY', 'HAVING']:
from_seen = False
StopIteration
else:
yield item
if item.ttype is Keyword and item.value.upper() == 'FROM':
from_seen = True
def extract_table_identifiers(token_stream):
for item in token_stream:
if isinstance(item, IdentifierList):
for identifier in item.get_identifiers():
value = identifier.value.replace('"', '').lower()
yield value
elif isinstance(item, Identifier):
value = item.value.replace('"', '').lower()
yield value
def extract_tables(sql):
# let's handle multiple statements in one sql string
extracted_tables = []
statements = (sqlparse.parse(sql))
for statement in statements:
# print statement.get_type()
if statement.get_type() != 'UNKNOWN':
stream = extract_from_part(statement)
print stream
extracted_tables.append(set(list(extract_table_identifiers(stream))))
return list(itertools.chain(*extracted_tables))
# strsql = """
# SELECT p.product_name, inventory.quantity
# FROM products p join inventory
# ON p.product_id = inventory.product_id;
# """
strsql = """SELECT suppliers.supplier_name, subquery1.total_amt
FROM suppliers
,
(SELECT supplier_id, SUM(orders.amount) AS total_amt
FROM orders
GROUP BY supplier_id) subquery1
WHERE subquery1.supplier_id = suppliers.supplier_id;"""
extract_tables(strsql)
Error : this is the traceback:
Traceback (most recent call last):
File "4.py", line 77, in <module>
extract_tables(strsql)
File "4.py", line 60, in extract_tables
extracted_tables.append(set(list(extract_table_identifiers(stream))))
File "4.py", line 40, in extract_table_identifiers
for item in token_stream:
File "4.py", line 21, in extract_from_part
if item.is_group():
TypeError: 'bool' object is not callable

Thanks to #Gphilo for the answer:
From the traceback it seems is_group is actually not a function, but a simple bool attribute. Try replacing item.is_group() with item.is_group and see if things improve

Related

SQLalchemy attribute error when dynamically creating both table and columns names when using string variables

I'm having an issue where I need to create a new tables and a specific columns because I need to do SELECT INTO from a regular data table into a mapping table. The table names and two columns names are dynamic due to the nature of the data that I'm working with in the database.
I tried using an example in Use variable column headings in SQLAlchemy, but I'm still getting errors being raised. Here's the basic code:
RecIndex = # some string key value that changes
tableData = "Data-" + str(i)
tableName = f'Mapping{idx}'
colName = f'Voltage{i}-V{idx}'
col_list = ['Reading', 'Date', colName]
t_list = {TableData: [RecIndex, colName], tableName: [RecIndex, colName]}
table_list = []
for t_name, col_name in t_list.items():
t = Table(
t_name, metadata,
Column('Reading', Integer),
Column('Date', Date),
*[Column(name, Integer) for name in col_name]
)
table_list.append(t)
t1 = table_list[0] # Mapping table
t2 = table_list[1] # Data table
sel = t1.insert().from_select(col_list, t2.select().where(t2.c.colName > 0)) # FAILS HERE
However, when I try to build the sel variable, it fails and I get this error message:
Traceback (most recent call last):
File "C:\Python\Python\lib\site-packages\sqlalchemy\sql\base.py", line 1201, in getattr
return self._index[key]
KeyError: 'colName'
sel = t1.insert().from_select(col_list, t2.select().where(t2.c.colName > 0))
File "C:\Python\Python\lib\site-packages\sqlalchemy\sql\base.py", line 1203, in getattr
util.raise_(AttributeError(key), replace_context=err)
File "C:\Python\Python\lib\site-packages\sqlalchemy\util\compat.py", line 207, in raise_
raise exception
AttributeError: colName
Anyone have any idea on why it isn't working? I would appreciate any help

In your example code you are referencing the column by the name of the variable, colName instead of the contents of the variable, ie. "Voltage0-V2". Try something like this:
sel = t1.insert().from_select(col_list, t2.select().where(getattr(t2.c, colName) > 0))
This is just the built-in getattr:
getattr
I think dict style lookups are also supported ie. t2.c[colName], so this might work too:
sel = t1.insert().from_select(col_list, t2.select().where(t2.c[colName] > 0))

How to resolve AttributeError when trying to set None as default method argument

I know there are loads of answers to this question but I'm still not getting it...
Following is sa_reporting.py
class saReport():
def __init__(self, body, to_table, normalise=False, date_col=None):
global directory
self.body = body
self.to_table = to_table
self.normalise = normalise
self.date_col = date_col if date_col is not None else []
directory = os.path.join('/Users','python', self.to_table)
if not os.path.exists(directory):
os.mkdir(directory)
def download_files(self, ...):
...
def download_reports(self, ...):
...
def get_files(self):
...
def read_file(self, file):
....
def load_to_db(self, sort_by=None): # THIS IS WHAT I THINK IS CAUSING THE ERROR
sort_by = sort_by if sort_by is not None else [] # THIS IS WHAT I TRIED TO FIX IT
def normalise_data(self, data):
dim_data = []
for row in data:
if row not in dim_data:
dim_data.append(row)
return dim_data
def convert_dates(self, data):
if self.date_col:
for row in data:
for index in self.date_col:
if len(row[index]) > 10:
row[index] = row[index][:-5].replace('T',' ')
row[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d %H:%M:%S")
else:
row[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d").date()
return data
print(f'\nWriting data to {self.to_table} table...', end='')
files = self.get_files()
for file in files:
print('Processing ' + file.split("sa360/",1)[1] + '...', end='')
csv_file = self.read_file(file)
csv_headers = ', '.join(csv_file[0])
csv_data = csv_file[1:]
if self.normalise:
csv_data = self.normalise_data(csv_data)
csv_data = self.convert_dates(csv_data)
if sort_by:
csv_data = sorted(csv_data, key=itemgetter(sort_by))
#...some other code that inserts into a database...
Executing the following script (sa_main.py):
import sa_reporting
from sa_body import *
dim_campaign_test = sa_reporting.saReport(
body=dim_campaign_body,
to_table='dimsa360CampaignTest',
normalise=True,
date_col=[4,5]
)
dim_campaign_test_download = dim_campaign_test.download_reports()
dim_campaign_test_download.load_to_db(sort_by=0) # THIS IS WHERE THE ERROR OCCURS
Output and error message:
Downloading reports...
The report is still generating...restarting
The report is ready
Processing...
Downloading fragment 0 for report AAAnOdc9I_GnxAB0
Files successfully downloaded
Traceback (most recent call last):
File "sa_main.py", line 43, in <module>
dim_campaign_test_download.load_to_db(sort_by=0)
AttributeError: 'NoneType' object has no attribute 'load_to_db'
Why am I getting this error? And how can I fix it?
I just want to make None be the default argument and if a user specifies the sort_by parameter then None will be replaced with whatever the user specifies (which should be an integer index)

This code would seem to suggest that dim_campaign_test_download is being set to None. As in the below line, you set it to the result of dim_campaign_test.download_reports(), it is likely that no reports are being found.
dim_campaign_test_download = dim_campaign_test.download_reports()
You might want to instead do the following, as dim_campaign_test is the saReport Object on which you probably want to operate:
dim_campaign_test.load_to_db(sort_by=0)

sqlalchemy - How to select count from a union query

My goal is that I have two tables, each describes a relationship between a user and either a "team" or "company"
If there are any rows that says "admin" in the tables for the userid, then I want to know that, otherwise if no rows then i need to know that as well.
What I have so far is as follows (UserTeamLink and UserCompanyLink) are Orm tabels.
obj = UserTeamLink
q1 = session.query(func.count(obj.type).label("cnt")).filter(obj.type == 'admin').filter(obj.user_id == id) ; str(q1) ; q1.all()
obj = UserCompanyLink
q2 = session.query(func.count(obj.type).label("cnt")).filter(obj.type == 'admin').filter(obj.user_id == id) ; str(q2) ; q2.all()
my_union = q1.union_all(q2)
query = select([func.sum(my_union.c.cnt).label("total_cnt")], from_obj=my_union)
query.all()
however, the line "query = select([func.sum(my_union.c.cnt).label("total_cnt")], from_obj=my_union)" breaks with:
AttributeError: 'BaseQuery' object has no attribute 'c'
The entire output is as follows:
>>> obj = UserTeamLink
>>> q1 = session.query(func.count(obj.type).label("cnt")).filter(obj.type == 'admin').filter(obj.user_id == id) ; str(q1) ; q1.all()
'SELECT count(user_team.type) AS cnt \nFROM user_team \nWHERE user_team.type = :type_1 AND user_team.user_id = :user_id_1'
[(0L,)]
>>> obj = UserCompanyLink
>>> q2 = session.query(func.count(obj.type).label("cnt")).filter(obj.type == 'admin').filter(obj.user_id == id) ; str(q2) ; q2.all()
'SELECT count(user_company.type) AS cnt \nFROM user_company \nWHERE user_company.type = :type_1 AND user_company.user_id = :user_id_1'
[(0L,)]
>>>
>>> my_union = q1.union_all(q2)
>>> query = select([func.sum(my_union.c.cnt).label("total_cnt")], from_obj=my_union)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'BaseQuery' object has no attribute 'c'
Is there a way to fix this?
In summary what I'm attempting to do is select counts from both tables, then union into another table and then add them.
Thank you.

You need to use my_union as subquery
my_union = q1.union_all(q2).subquery()
query = select([func.sum(my_union.c.cnt).label("total_cnt")], from_obj=my_union)

Here's my own problem solved, I used a slightly different approach than #r-m-n's however I think both should work:
q_union = q1.union_all(q2)
q_union_cnt = db.session.query(func.sum(q_union.subquery().columns.cnt).label("total_cnt"))
admin_points = q_union_cnt.scalar()

Why am I getting TypeError: an integer is required?

This is my code snippet for where the traceback call shows an error:
def categorize(title):
with conn:
cur= conn.cursor()
title_str= str(title)
title_words= re.split('; |, |\*|\n',title_str)
key_list= list(dictionary.keys())
flag2= 1
for word in title_words:
title_letters= list(word)
flag1= 1
for key in key_list:
if key==title_letters[0]:
flag1= 0
break
if flag1== 0:
start=dictionary.get(title_letters[0])
end= next_val(title_letters[0])
for i in xrange (start,end):
if word==transfer_bag_of_words[i]:
flag2= 0
break
if flag2== 0:
cur.execute("select Id from articles where title=title")
row_id= cur.fetchone()
value= (row_id,'1')
s= str(value)
f.write(s)
f.write("\n")
break
return
def next_val(text):
for i,v in enumerate(keyList):
if text=='t':
return len(transfer_bag_of_words)
elif v==text:
return dictionary[keyList[i+1]]
This is the traceback call:
Traceback (most recent call last):
File "categorize_words.py", line 93, in <module>
query_database()
File "categorize_words.py", line 45, in query_database
categorize(row)
File "categorize_words.py", line 67, in categorize
for i in xrange (start,end):
TypeError: an integer is required
I have not given the whole code here. But I will explain what I am trying to do. I am trying to import a particular field from a sqlite database and checking if a single word of the field matches with a particular bag of words I already have in my program. I have sorted the bag of words aphabetically and assigned every starting of a new letter to it's index using python dictionary. This I have done so that everytime I check a word of the field being present in the bag of words, I do not have to loop through the entire bag of words. Rather I can just start looping from the index of the first letter of the word.
I have checked that the return type of get() in dictionary is int and the function nextVal also should return an int since both len() and dictionary[keylist[i+1]] are int types.
Please help.
EDIT
This is my entire code:
import sqlite3 as sql
import re
conn= sql.connect('football_corpus/corpus2.db')
transfer_bag_of_words=['transfer','Transfer','transfers','Transfers','deal','signs','contract','rejects','bid','rumours','swap','moves',
'negotiation','negotiations','fee','subject','signings','agreement','personal','terms','pens','agent','in','for',
'joins','sell','buy','confirms','confirm','confirmed','signing','renew','joined','hunt','excited','move','sign',
'loan','loaned','loans','switch','complete','offer','offered','interest','price','tag','miss','signed','sniffing',
'remain','plug','pull','race','targeting','targets','target','eye','sale','clause','rejected',
'interested']
dictionary={}
dictionary['a']=0;
keyList=[]
f= open('/home/surya/Twitter/corpus-builder/transfer.txt','w')
def map_letter_to_pos():
pos=0
transfer_bag_of_words.sort()
for word in transfer_bag_of_words:
flag=1
letters= list(word)
key_list= list(dictionary.keys())
for key in key_list:
if key==letters[0]:
flag=0
break
if flag==1:
dictionary[letters[0]]=pos
pos+=1
else:
pos+=1
keyList= sorted(dictionary.keys())
def query_database():
with conn:
cur= conn.cursor()
cur.execute("select title from articles")
row_titles= cur.fetchall()
for row in row_titles:
categorize(row)
def categorize(title):
with conn:
cur= conn.cursor()
title_str= str(title)
title_words= re.split('; |, |\*|\n',title_str)
key_list= list(dictionary.keys())
flag2= 1
for word in title_words:
title_letters= list(word)
flag1= 1
for key in key_list:
if key==title_letters[0]:
flag1= 0
break
if flag1== 0:
start=dictionary.get(title_letters[0])
end= next_val(title_letters[0])
for i in xrange (start,end):
if word==transfer_bag_of_words[i]:
flag2= 0
break
if flag2== 0:
cur.execute("select Id from articles where title=title")
row_id= cur.fetchone()
value= (row_id,'1')
s= str(value)
f.write(s)
f.write("\n")
break
return
def next_val(text):
for i,v in enumerate(keyList):
if text=='t':
return len(transfer_bag_of_words)
elif v==text:
return dictionary[keyList[i+1]]
if __name__=='__main__':
map_letter_to_pos()
query_database()
And this is the downloadable link to the database file http://wikisend.com/download/702374/corpus2.db

map_letter_to_pos attempts to modify the global variable keyList without specifying it as a global variable, therefore it only modifies a local copy of keyList and then discards it. This causes next_val to have nothing to iterate, so it never reaches the if elif, and returns None.
end = None
range(start,end) # None is not an int

IndexError: list index out of range [python irc bot]

For my IRC bot when I try to use the `db addcol command I will get that Index error but I got no idea what is wrong with it.
#hook.command(adminonly=True, autohelp=False)
def db(inp,db=None):
split = inp.split(' ')
action = split[0]
if "init" in action:
result = db.execute("create table if not exists users(nick primary key, host, location, greeting, lastfm, fines, battlestation, desktop, horoscope, version)")
db.commit()
return result
elif "addcol" in action:
table = split[1]
col = split[2]
if table is not None and col is not None:
db.execute("ALTER TABLE {} ADD COLUMN {}".format(table,col))
db.commit
return "Added Column"
That is the command I am trying to execute and here is the error:
Unhandled exeption in thread started by <function run at 0xb70d6844
Traceback (most recent call last):
File "core/main.py", line 68, in run
out = func(input.inp, **kw)
File "plugins/core_admin_global.py", :ine 308, in db
col = split[2]
IndexError: list index out of range
You will find the whole code at my GIT repository.
Edit: This bot is just a little thing I have been playing around with while learning python so don't expect me to be too knowledgeable about this.
Yet another edit:
The command I am trying to add, just replace desktop with mom.
#hook.command(autohelp=False)
def desktop(inp, nick=None, conn=None, chan=None,db=None, notice=None):
"desktop http://url.to/desktop | # nick -- Shows a users Desktop."
if inp:
if "http" in inp:
database.set(db,'users','desktop',inp.strip(),'nick',nick)
notice("Saved your desktop.")
return
elif 'del' in inp:
database.set(db,'users','desktop','','nick',nick)
notice("Deleted your desktop.")
return
else:
if '#' in inp: nick = inp.split('#')[1].strip()
else: nick = inp.strip()
result = database.get(db,'users','desktop','nick',nick)
if result:
return '{}: {}'.format(nick,result)
else:
if not '#' in inp: notice(desktop.__doc__)
return 'No desktop saved for {}.'.format(nick)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parse SQL to extract column and table names using python - python

Thanks to #Gphilo for the answer: From the traceback it seems is_group is actually not a function, but a simple bool attribute. Try replacing item.is_group() with item.is_group and see if things improve

Related

SQLalchemy attribute error when dynamically creating both table and columns names when using string variables

How to resolve AttributeError when trying to set None as default method argument

sqlalchemy - How to select count from a union query

Why am I getting TypeError: an integer is required?

IndexError: list index out of range [python irc bot]

Categories

Resources