Checking if ElementTree node is empty failure - python

I keep getting the error: AttributeError: 'NodeList' object has no attribute 'data', but I am just trying to check if that node is empty, and if so, just pass a -1 instead of the value. My understanding is temp_pub.getElementsByTagName("pages").data should return None. How do I fix this?
(p.s.- I've tried != None and is None)
xmldoc = minidom.parse('pubsClean.xml')
#loop through <pub> tags to find number of pubs to grab
root = xmldoc.getElementsByTagName("root")[0]
pubs = [a.firstChild.data for a in root.getElementsByTagName("pub")]
num_pubs = len(pubs)
count = 0
while(count < num_pubs):
temp_pages = 0
#get data from each <pub> tag
temp_pub = root.getElementsByTagName("pub")[count]
temp_ID = temp_pub.getElementsByTagName("ID")[0].firstChild.data
temp_title = temp_pub.getElementsByTagName("title")[0].firstChild.data
temp_year = temp_pub.getElementsByTagName("year")[0].firstChild.data
temp_booktitle = temp_pub.getElementsByTagName("booktitle")[0].firstChild.data
#handling no value
if temp_pub.getElementsByTagName("pages").data != None:
temp_pages = temp_pub.getElementsByTagName("pages")[0].firstChild.data
else:
temp_pages = -1
temp_authors = temp_pub.getElementsByTagName("authors")[0]
temp_author_array = [a.firstChild.data for a in temp_authors.getElementsByTagName("author")]
num_authors = len(temp_author_array)
count = count + 1
XML being processed
<pub>
<ID>5010</ID>
<title>Model-Checking for L<sub>2</sub</title>
<year>1997</year>
<booktitle>Universität Trier, Mathematik/Informatik, Forschungsbericht</booktitle>
<pages></pages>
<authors>
<author>Helmut Seidl</author>
</authors>
</pub>
<pub>
<ID>5011</ID>
<title>Locating Matches of Tree Patterns in Forest</title>
<year>1998</year>
<booktitle>Universität Trier, Mathematik/Informatik, Forschungsbericht</booktitle>
<pages></pages>
<authors>
<author>Andreas Neumann</author>
<author>Helmut Seidl</author>
</authors>
</pub>
Full code from edit (with to ElementTree)
#for execute command to work
import sqlite3
import xml.etree.ElementTree as ET
con = sqlite3.connect("publications.db")
cur = con.cursor()
from xml.dom import minidom
#use this to clean the foreign characters
import re
def anglicise(matchobj):
if matchobj.group(0) == '&':
return matchobj.group(0)
else:
return matchobj.group(0)[1]
outputFilename = 'pubsClean.xml'
with open('test.xml') as inXML, open(outputFilename, 'w') as outXML:
outXML.write('<root>\n')
for line in inXML.readlines():
if (line.find("<sub>") or line.find("</sub>")):
newline = line.replace("<sub>", "")
newLine = newline.replace("</sub>", "")
outXML.write(re.sub('&[a-zA-Z]+;',anglicise,newLine))
outXML.write('\n</root>')
tree = ET.parse('pubsClean.xml')
root = tree.getroot()
xmldoc = minidom.parse('pubsClean.xml')
#loop through <pub> tags to find number of pubs to grab
root2 = xmldoc.getElementsByTagName("root")[0]
pubs = [a.firstChild.data for a in root2.getElementsByTagName("pub")]
num_pubs = len(pubs)
count = 0
while(count < num_pubs):
temp_pages = 0
#get data from each <pub> tag
temp_ID = root.find(".//ID").text
temp_title = root.find(".//title").text
temp_year = root.find(".//year").text
temp_booktitle = root.find(".//booktitle").text
#handling no value
if root.find(".//pages").text:
temp_pages = root.find(".//pages").text
else:
temp_pages = -1
temp_authors = root.find(".//authors")
temp_author_array = [a.text for a in temp_authors.findall(".//author")]
num_authors = len(temp_author_array)
count = count + 1
#process results into sqlite
pub_params = (temp_ID, temp_title)
cur.execute("INSERT OR IGNORE INTO publication (id, ptitle) VALUES (?, ?)", pub_params)
cur.execute("INSERT OR IGNORE INTO journal (jtitle, pages, year, pub_id, pub_title) VALUES (?, ?, ?, ?, ?)", (temp_booktitle, temp_pages, temp_year, temp_ID, temp_title))
x = 0
while(x < num_authors):
cur.execute("INSERT OR IGNORE INTO authors (name, pub_id, pub_title) VALUES (?, ?, ?)", (temp_author_array[x],temp_ID, temp_title))
cur.execute("INSERT OR IGNORE INTO wrote (name, jtitle) VALUES (?, ?)", (temp_author_array[x], temp_booktitle))
x = x + 1
con.commit()
con.close()
print("\nNumber of entries processed: ", count)

You can use the attributes method to get a dictionary-like object (Doc) and then query the dictionary:
if temp_pub.getElementsByTagName("pages").attributes.get('data'):

As the error message suggest getElementsByTagName() returns neither single node nor None, but `NodeList. So you should check the length to see if the returned list contains any item :
if len(temp_pub.getElementsByTagName("pages")) > 0:
temp_pages = temp_pub.getElementsByTagName("pages")[0].firstChild.data
or you can pass the list directly to if since empty list is falsy :
if temp_pub.getElementsByTagName("pages"):
temp_pages = temp_pub.getElementsByTagName("pages")[0].firstChild.data
Side note, despite the title and the tag of this question, your code suggests that you're using minidom instead of ElementTree. Your code could be simpler using ElementTree, for example :
# minidom
temp_ID = temp_pub.getElementsByTagName("ID")[0].firstChild.data
# finding single element can be using elementtree's `find()`
temp_ID = temp_pub.find(".//ID").text
....
# minidom
temp_author_array = [a.firstChild.data for a in temp_authors.getElementsByTagName("author")]
# finding multiple elements using elementtree's `find_all()`
temp_author_array = [a.text for a in temp_authors.find_all(".//author")]

Related

PYTHON SQLITE selecting multiple where conditions that may or may not exist

Working on some code that uses pysimplegui as the UI and SQlite for the data sorting. I'm using SQLite's execute function to select data based on input from the user in the UI through variables. For example user wants to search for part name they input all or part of the name into the box, hit the search button which then runs my "parts_search" method, which will then only filter the result based on part name. OR the user enters information in multiple boxes which then filters based on the boxes that have information.
This here is runnable code provided you add a file base1.db in the same folder location as the script itself
import PySimpleGUI as sg
import os.path
import sqlite3
# sql var
c = None
conn = None
setup = None
# list var
parts = []
def sql():
global setup
conn_sql()
c.execute("""CREATE TABLE IF NOT EXISTS parts (part_name TEXT, part_number TEXT, part_series TEXT,
part_size INTEGER, job_type TEXT)""")
conn.commit()
if conn:
conn.close()
def conn_sql():
global c
global conn
# SQL connection var
if os.path.isfile('./base1.db'):
conn = sqlite3.connect('base1.db')
c = conn.cursor()
def main_gui_parts():
global parts
layout = [[sg.Text('Part Name: '), sg.Input(size=(20, 1), key='-PName-'), sg.Text('Part Series:'),
sg.Input(size=(10, 1), key='-PSeries-')],
[sg.Text('Part Number:'), sg.Input(size=(20, 1), key='-PNumber-'), sg.Text('Part Size:'),
sg.Input(size=(10, 1), key='-PSize-')],
[sg.Checkbox('Fit', key='-PFit-'), sg.Checkbox('Weld', key='-PWeld-'),
sg.Checkbox('Assemble', key='-PAssemble-'),
sg.Button('Search', key='-PSearch-')],
[sg.Listbox(parts, size=(58, 10), key='-PParts-')], [sg.Button('Back', key='-PBack-')]]
window = sg.Window('parts list', layout, grab_anywhere=True)
sql()
while True:
event, values = window.read()
if event == 'Close' or event == sg.WIN_CLOSED:
break
# PART WINDOW
part_name = values['-PName-']
part_series = values['-PSeries-']
part_number = values['-PNumber-']
part_size = values['-PSize-']
fit = values['-PFit-']
weld = values['-PWeld-']
assemble = values['-PAssemble-']
if event == '-PSearch-':
print('search parts')
part_search(part_name, part_series, part_number, part_size, fit, weld, assemble)
if event == '-PBack-':
break
window.close()
def part_search(part_name, part_series, part_number, part_size, fit, weld, assemble):
global parts
conn_sql()
filter_original = """SELECT * FROM parts WHERE """
filter = filter_original
if part_name:
print('part name: ' + part_name)
if filter == filter_original:
filter += """part_name LIKE ? """
else:
filter += """AND part_name LIKE ? """
if part_series:
print('part series: ' + part_series)
if filter == filter_original:
filter += """part_series=(?) """
else:
filter += """AND part_series=(?) """
if part_number:
print('part number: ' + part_number)
if filter == filter_original:
filter += """part_number LIKE ? """ ### DONT USE LIKE???
else:
filter += """AND part_number LIKE ? """ ### DONT USE LIKE???
if part_size:
print('part size: ' + part_size)
if filter == filter_original:
filter += """part_size=(?) """
else:
filter += """AND part_size=(?) """
if fit:
print('job type: ' + str(fit))
if filter == filter_original:
filter += """job_type = fit """
else:
filter += """AND job_type = fit """
if weld:
print('job type: ' + str(weld))
if filter == filter_original:
filter += """job_type = weld """
else:
filter += """AND job_type = weld """
if assemble:
print('job type: ' + str(assemble))
if filter == filter_original:
filter += """job_type = assemble"""
else:
filter += """AND job_type = assemble"""
print(filter)
#if filter != filter_original:
#c.execute(filter, ())
#else:
#c.execute("""SELECT * FROM parts""")
main_gui_parts()
THE PROBLEM: The commented code at the bottom is where I'm having trouble figuring out (in the "part_search" method). I don't use all of the variables all the time. Only filter with the variables provided by the user. which means the tuple should only have the variables which was input by the user.
If all the variables were used this is what it would look like. c.execute(filter, (part_name, part_series, part_number, part_size, fit, weld, assemble)) but more often than not only some of those variable will have been used and may need to look like this instead. c.execute(filter, (part_name, part_series, weld)) Somehow I need the variables here to be removeable(for lack of better word)
I've been learning a lot about SQLite but I could be seeing tunnel vision and can't think of a different way to go about this.
Probably the easiest way to deal with this is to put all the filter conditions and values into lists, and then only add a WHERE clause if the length of the filters list is non-zero. For example:
query = """SELECT * FROM parts"""
filters = []
values = []
if part_name:
filters.append("""part_name LIKE ?""")
values.append(part_name)
...
if len(filters):
query += ' WHERE ' + ' AND '.join(filters)
c.execute(query, tuple(values))
Note: should your filters ever include OR conditions, you need to parenthesise them when building the query to ensure correct operation i.e.
query += ' WHERE (' + ') AND ('.join(filters) + ')'

Get all data API when inputs are empty

I created my first API where I can get data from my tables in Bigquery.
I can get all the data I need based on the 2 inputs below, but I am also trying to get the whole table when the inputs are empty, which I cannot do.
Thanks for your help
#app.route("/tracking", methods=['GET'])
def tracking_data():
haulier_id_tracking = request.args.get('haulier_id_tracking')
month_tracking = request.args.get('month_tracking')
query_job = bq_client.query("""
WITH t AS (
SELECT *
FROM mart.monthly_vehicle_stats
WHERE dt_fr_month = '{month_tracking}-01' AND (haulier_id_tracking = '{haulier_id_tracking}')
SELECT TO_JSON_STRING(STRUCT(ARRAY_AGG(STRUCT(dt_fr_month, haulier_id_tracking, vehicle_id , nb_days_tracked,
data_access, date_first_camp, invoiced)) AS data)) json
FROM t
""".format(month_tracking = month_tracking, haulier_id_tracking = haulier_id_tracking))
for row in query_job:
return json.loads(row["json"])
You can remove the where clause when the input is empty like this:
#app.route("/tracking", methods=['GET'])
def tracking_data():
haulier_id_tracking = request.args.get('haulier_id_tracking')
month_tracking = request.args.get('month_tracking')
where_clause = ''
if haulier_id_tracking != '' and month_tracking != '':
where_clause = f"WHERE dt_fr_month = '{month_tracking}-01' AND (haulier_id_tracking = '{haulier_id_tracking}'"
query_job = bq_client.query(f"""
WITH t AS (
SELECT * FROM mart.monthly_vehicle_stats {where_clause})
SELECT TO_JSON_STRING(STRUCT(ARRAY_AGG(STRUCT(dt_fr_month, haulier_id_tracking, vehicle_id , nb_days_tracked,
data_access, date_first_camp, invoiced)) AS data)) json
FROM t
""")
for row in query_job:
return json.loads(row["json"])

How to pass value of variable into another text file?

I have text files which have sql queries.After running one file "tb_exec_ns_call_pln.txt" i'm getting two dates like- 2018-12-29 ,
2019-03-29.
i just want to pass these dates in other text file (tb_exec_ns_call_actvty.txt) using python. the text file contain the below query-
SELECT a.nm as cycle_nm,
a.start_dt as cycle_start_dt,
a.end_dt as cycle_end_dt,
a.terr as territory,sales_drctn,
x_rating1,
c.jnj_id as jnj_id,
c.prsn_first_nm,
c.prsn_last_nm,
plnnd_calls as rep_goal
FROM eureka.cycle_plan a, eureka.cycle_plan_trgt b, eureka.acct c
WHERE
a.id = b.cycle_plan
and b.acct = c.id
and b.del_flg = 'N'
***and start_dt >= '2018-12-29'***
***and end_dt <= '2019-03-29'***
and substring(a.terr,1,6) in ('106-KS','106-PI','106-VO')
and a.status = 'In_Progress_vod'
and a.del_flg = 'N'
and c.del_flg = 'N' and plnnd_calls > 0
i have written python script also.. Please guide me how to pass the value.
path = "D:/Users/SPate233/Downloads/NS dashboard/tb_exec_ns_call_pln.txt"
sql_query_file = open(path, 'r')
sql_query1 = sql_query_file.read()
cur.execute(sql_query1)
res = cur.fetchall()
print(res)
print(type(res))
for val in res:
print(val[1])
print(val[2])
One approach is to have a string variable hardcoded in to tb_exec_ns_call_actvty.txt and then use str.replace to fill in the required info.
Ex:
SELECT a.nm as cycle_nm,
a.start_dt as cycle_start_dt,
a.end_dt as cycle_end_dt,
a.terr as territory,sales_drctn,
x_rating1,
c.jnj_id as jnj_id,
c.prsn_first_nm,
c.prsn_last_nm,
plnnd_calls as rep_goal
FROM eureka.cycle_plan a, eureka.cycle_plan_trgt b, eureka.acct c
WHERE
a.id = b.cycle_plan
and b.acct = c.id
and b.del_flg = 'N'
and start_dt >= 'START_DT'
and end_dt <= 'END_DT'
and substring(a.terr,1,6) in ('106-KS','106-PI','106-VO')
and a.status = 'In_Progress_vod'
and a.del_flg = 'N'
and c.del_flg = 'N' and plnnd_calls > 0
InCode:
path = "D:/Users/SPate233/Downloads/NS dashboard/tb_exec_ns_call_pln.txt"
with open(path) as sql_query_file:
sql_query1 = sql_query_file.read()
sql_query1 = sql_query1.replace("START_DT", '2018-12-29').replace("END_DT", '2019-03-29')
cur.execute(sql_query1)
res = cur.fetchall()

SQL insert into.. where (python)

I have the following code:
def create_table():
c.execute('CREATE TABLE IF NOT EXISTS TEST(SITE TEXT, SPORT TEXT, TOURNAMENT TEXT, TEAM_1 TEXT, TEAM_2 TEXT, DOUBLE_CHANCE_1X TEXT, DOUBLE_CHANCE_X2 TEXT, DOUBLE_CHANCE_12 TEXT, DRAW_1 TEXT, DRAW_2 TEXT DATE_ODDS TEXT, TIME_ODDS TEXT)')
create_table()
def data_entry():
c.execute("INSERT INTO TEST(SITE, SPORT, TOURNAMENT, TEAM_1, TEAM_2, DOUBLE_CHANCE_1X, DOUBLE_CHANCE_X2, DOUBLE_CHANCE_12, DATE_ODDS, TIME_ODDS) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(Site, sport.strip(), tournament.strip(), team_1.strip(), team_2.strip(), x_odd.strip(), y_odd.strip(), z_odd.strip(), Date_odds, Time_odds))
conn.commit()
def double_chance():
c.execute("UPDATE TEST SET DOUBLE_CHANCE_1X = x_odd, DOUBLE_CHANCE_X2 = y_odd, DOUBLE_CHANCE_12 = z_odd WHERE TOURNAMENT = tournament and TEAM_1 = team_1 and TEAM_2 = team_2 and DATE_ODDS = Date_odds and TIME_ODDS = Time_odds")
conn.commit()
driver.get(link)
Date_odds = time.strftime('%Y-%m-%d')
Time_odds = time.strftime('%H:%M')
sport = (driver.find_element_by_xpath(".//*[#id='breadcrumb']/li[2]/a")).text #example Footbal
tournament = (driver.find_element_by_xpath(".//*[#id='breadcrumb']/li[4]/a")).text #example Premier League
try:
div = (driver.find_element_by_xpath(".//*[#id='breadcrumb']/li[5]/a")).text #to find any division if exists
except NoSuchElementException:
div = ""
market = driver.find_element_by_xpath(".//*[contains(#id,'ip_market_name_')]")
market_name = market.text
market_num = market.get_attribute('id')[-9:]
print market_num
team_1 = (driver.find_element_by_xpath(".//*[#id='ip_marketBody" + market_num + "']/tr/td[1]//*[contains(#id,'name')]")).text
team_2 = (driver.find_element_by_xpath(".//*[#id='ip_marketBody" + market_num + "']/tr/td[3]//*[contains(#id,'name')]")).text
print sport, tournament, market_name, team_1, team_2
data_entry() #first SQL call
for ip in driver.find_elements_by_xpath(".//*[contains(#id,'ip_market3')]"):
num = ip.get_attribute('id')[-9:]
type = (driver.find_element_by_xpath(".//*[contains(#id,'ip_market_name_" + num + "')]")).text
if type == 'Double Chance':
print type
print num
x_odd = (driver.find_element_by_xpath(".//*[#id='ip_market" + num + "']/table/tbody/tr/td[1]//*[contains(#id,'price')]")).text
y_odd = (driver.find_element_by_xpath(".//*[#id='ip_market" + num + "']/table/tbody/tr/td[2]//*[contains(#id,'price')]")).text
z_odd = (driver.find_element_by_xpath(".//*[#id='ip_market" + num + "']/table/tbody/tr/td[3]//*[contains(#id,'price')]")).text
print x_odd, y_odd, z_odd
double_chance() #second SQL call
c.close()
conn.close()
Update:
Based on the answer below I updated the code, but I can't make it work.
When I run it, I get the following error:
sqlite3.OperationalError: no such column: x_odd
What should I do?
Update 2:
I found the solution:
I created an unique ID in order to be able to select exactly the row I want when I run the second SQL query. In this case it doesn't modify any other rows:
def double_chance():
c.execute("UPDATE TEST SET DOUBLE_CHANCE_1X = (?), DOUBLE_CHANCE_X2 = (?), DOUBLE_CHANCE_12 = (?) WHERE ID = (?)",(x_odd, y_odd, z_odd, ID_unique))
conn.commit()
Now it works perfectly.
Use the UPDATE statement to update columns in an existing row.
UPDATE TEST SET DRAW_1=value1,DRAW_2=value2 WHERE column3=value3;
If data_entry(1) is always called first, then change the statement in data_entry_2() to UPDATE. If not you will need to check if the row exists in both cases and INSERT or UPDATE accordingly.

Insert tree kind of data taken from a database into a python dictionary

I have a database table as follows. The data is in the form of a tree with
CREATE TABLE IF NOT EXISTS DOMAIN_HIERARCHY (
COMPONENT_ID INT NOT NULL ,
LEVEL INT NOT NULL ,
COMPONENT_NAME VARCHAR(127) NOT NULL ,
PARENT INT NOT NULL ,
PRIMARY KEY ( COMPONENT_ID )
);
The following data is in the table
(1,1,'A',0)
(2,2,'AA',1)
(3,2,'AB',1)
(4,3,'AAA',2)
(5,3,'AAB',2)
(6,3,'ABA',3)
(7,3,'ABB',3)
I have to retrieve the data and store in a python dictionary
I wrote the below code
conx = sqlite3.connect( 'nameofdatabase.db' )
curs = conx.cursor()
curs.execute( 'SELECT COMPONENT_ID, LEVEL, COMPONENT_NAME, PARENT FROM DOMAIN_HIERARCHY' )
rows = curs.fetchall()
cmap = {}
for row in rows:
cmap[row[0]] = row[2]
hrcy={}
for level in range( 1, maxl + 1 ):
for row in rows:
if row[1] == level:
if hrcy == {}:
hrcy[row[2]] = []
continue
parent = cmap[row[3]]
hrcy[parent].append( { row[2]: [] } )
The problem I'm facing is for nodes more than 2nd level ,they are getting added to the root instead of their parent ; where should I do the change in the code?
The problem is that you can't directly see the nodes for the second level after you insert them. Try this:
conx = sqlite3.connect( 'nameofdatabase.db' )
curs = conx.cursor()
curs.execute( 'SELECT COMPONENT_ID, LEVEL, COMPONENT_NAME, PARENT ' +
'FROM DOMAIN_HIERARCHY' )
rows = curs.fetchall()
cmap = {}
hrcy = None
for row in rows:
entry = (row[2], {})
cmap[row[0]] = entry
if row[1] == 1:
hrcy = {entry[0]: entry[1]}
# raise if hrcy is None
for row in rows:
item = cmap[row[0]]
parent = cmap.get(row[3], None)
if parent is not None:
parent[1][row[2]] = item[1]
print hrcy
By keeping each component's map of subcomponents in cmap, I can always reach each parent's map to add the next component to it. I tried it with the following test data:
rows = [(1,1,'A',0),
(2,2,'AA',1),
(3,2,'AB',1),
(4,3,'AAA',2),
(5,3,'AAB',2),
(6,3,'ABA',3),
(7,3,'ABB',3)]
The output was this:
{'A': {'AA': {'AAA': {}, 'AAB': {}}, 'AB': {'ABA': {}, 'ABB': {}}}}

Categories

Resources