I am executing PostgreSQL13 queries coding them in python 3.9 using the psycopg2 library. I also am working with PostGIS extension over PostgreSQL.
Kindly look for the comment which points out the line which causes the syntax error. I am having trouble both understanding what is the syntax error and how to debug it since I need to execute PostgreSQL queries using python so any tips will be greatly appreciated.
def corefunc(rf, openConnection):
pcur = openConnection.cursor(name="pcur" + rf)
rcur = openConnection.cursor(name="rcur" + rf)
acur = openConnection.cursor()
rcur.execute("SELECT geom FROM " + rf)
for number in range (1, 5):
acur.execute("DROP TABLE IF EXISTS " + "pf"+ rf)
acur.execute("CREATE TABLE " + "pf" + rf + " (index integer, sums integer)")
pcur.execute("SELECT geom FROM " + "pf" + str(number))
row = 1
for each in rcur.fetchall():
if number == 1: acur.execute("INSERT INTO " + "pf" + rf + " (index, sums) VALUES (" + str(row) + ",0)")
for eachone in pcur.fetchall():
#-------------------------------------------- the statement below gives the syntax error
acur.execute("UPDATE TABLE " + "pf" + rf + " SET sums = sums + "\
+"ST_Contains(" + " ' " + each[0] + " ' " + ", " + " ' " + eachone[0] + " ' " + ")::int WHERE index = " + str(row))
row = row + 1
def parallelJoin (pointsTable, rectsTable, outputTable, outputPath, openConnection):
#Implement ParallelJoin Here.
cursor = openConnection.cursor()
cursor.execute("SELECT COUNT(*) FROM " + pointsTable)
size_data = (cursor.fetchall())[0][0]
for number in range(1, 5):
cursor.execute("DROP TABLE IF EXISTS pf" + str(number))
cursor.execute("CREATE TABLE pf" + str(number) + " AS SELECT * FROM "
+ pointsTable + " LIMIT " + str(size_data/4)
+ " OFFSET " + str(((number-1)*size_data)/4))
cursor.execute("SELECT COUNT(*) FROM " + rectsTable)
size_rects = (cursor.fetchall())[0][0]
for number in range(1, 5):
cursor.execute("DROP TABLE IF EXISTS rf" + str(number))
cursor.execute("CREATE TABLE rf" + str(number) + " AS SELECT * FROM "
+ pointsTable + " LIMIT " + str(size_rects/4)
+ " OFFSET " + str(((number - 1) * size_rects)/4))
threads = dict()
for number in range(0, 4):
threads[number] = threading.Thread(target=corefunc, args=("rf" + str(number + 1), openConnection))
threads[number].start()
break
while threads[0].is_alive() or threads[1].is_alive()\
or threads[2].is_alive() or threads[3].is_alive(): pass
# more shit to do
Nevermind, I just thought checking out the syntax from somewhere and I found the problem. To update table, the query begins with "UPDATE...." not "UPDATE TABLE.....".
Related
When I run this query by manually it execute correctly without any issue and I can get the store number and item number but when I use it in my framework and connect my scenario step to the Db2 it gives me an error. This is the query which one is I execute:::
cursor.execute("select * from qs36f.DSTHSTP join qs36f.calendar on date_ccyymmd = dhindt where date_iso between(current date - 10 day) and current date and DHCUS# in (" + open_stores + ") and dhqtss>=1 and DHCLSS = " + class_nbr + " and dhsbcl = " + sub_class_nbr + " and ((dhqtss*dhrt5s)*DHPACK) <" + end_range + "")
I don't know what is the issue here. This is error:::
cursor.execute("select * from qs36f.DSTHSTP join qs36f.calendar on date_ccyymmd = dhindt where date_iso between(current date - 10 day) and current date and DHCUS# in (" + open_stores + ") and dhqtss>=1 and DHCLSS = " + class_nbr + " and dhsbcl = " + sub_class_nbr + " and ((dhqtss*dhrt5s)*DHPACK) <" + end_range + "")
pyodbc.ProgrammingError: ('42000', '[42000] [IBM][System i Access ODBC Driver][DB2 for i5/OS]SQL0104 - Token , was not valid. Valid tokens: FOR USE SKIP WAIT WITH FETCH LIMIT ORDER UNION EXCEPT OFFSET. (-104) (SQLExecDirectW)')
My expectations is I have to retrieve from database store number and item number.
It seems you are facing a syntax error. Having that your statement is:
select *
from qs36f.DSTHSTP
join qs36f.calendar
on date_ccyymmd = dhindt
where date_iso between (current date - 10 day) and current date
and DHCUS# in (" + open_stores + ")
and dhqtss>=1
and DHCLSS = " + class_nbr + "
and dhsbcl = " + sub_class_nbr + "
and ((dhqtss*dhrt5s)*DHPACK) <" + end_range + "
It's possible that you are not building it correctly. In such cases, try to remove one line from the WHERE clause and execute the query in order to find the one that is not correct.
def complete_stage_purge_process(self, target_cnxn, stage_table, process_cd):
self.logger.debug(datetime.now())
self.logger.debug('complete_stage_purge_process')
delete_dt = datetime.today() - timedelta(days=30)
delete_dt = str(delete_dt)
run_pk_sql = "select run_pk from " + schemaName.PROCESS.value + "." + tableName.RUN_LOG.value + " where " + ProcessRunlog.ETL_MODIFIED_DTM.value + " <= '" + delete_dt + "' and " + \
ProcessRunlog.PROCESS_PK.value + " = (select " + ProcessRunlog.PROCESS_PK.value + " from " + schemaName.PROCESS.value + "." + \
tableName.PROCESS.value + " where " + \
Process.PROCESS_CODE.value + " = '" + process_cd + "') "
delete_sql = "delete from " + schemaName.STAGE.value + "." + stage_table + " where run_pk in (" + run_pk_sql + ")"
print(delete_sql)
print(target_cnxn)
try:
trgt_cursor = target_cnxn.cursor()
trgt_cursor.execute(delete_sql)
self.logger.debug("deletes processed successfully ")
except:
self.logger.exception('Error in processing deletes')
raise
But when added commit after trgt_cursor.execute(delete_sql) then below error is thrown. Could someone please help on how to handle this
AttributeError: 'psycopg2.extensions.cursor' object has no attribute 'commit'
I'm trying to analyze a sqlite3 file and printing the results to a text file. If i test the code with print it all works fine. When i write it to a file it cuts out at the same point every time.
import sqlite3
import datetime
import time
conn = sqlite3.connect("History.sqlite")
curs = conn.cursor()
results = curs.execute("SELECT visits.id, visits.visit_time, urls.url, urls.visit_count \
FROM visits INNER JOIN urls ON urls.id = visits.url \
ORDER BY visits.id;")
exportfile = open('chrome_report.txt', 'w')
for row in results:
timestamp = row[1]
epoch_start = datetime.datetime(1601,1,1)
delta = datetime.timedelta(microseconds=int(timestamp))
fulltime = epoch_start + delta
string = str(fulltime)
timeprint = string[:19]
exportfile.write("ID: " + str(row[0]) + "\t")
exportfile.write("visit time: " + str(timeprint) + "\t")
exportfile.write("Url: " + str(row[2]) + "\t")
exportfile.write("Visit count: " + str(row[3]))
exportfile.write("\n")
print "ID: " + str(row[0]) + "\t"
print "visit time: " + str(timeprint) + "\t"
print "Url: " + str(row[2]) + "\t"
print "Visit count: " + str(row[3])
print "\n"
conn.close()
So the print results give the proper result but the export to the file stops in the middle of a url.
OK, I would start by replacing the for loop with the one below
with open('chrome_report.txt', 'w') as exportfile:
for row in results:
try:
timestamp = row[1]
epoch_start = datetime.datetime(1601,1,1)
delta = datetime.timedelta(microseconds=int(timestamp))
fulltime = epoch_start + delta
string = str(fulltime)
timeprint = string[:19]
exportfile.write("ID: " + str(row[0]) + "\t")
exportfile.write("visit time: " + str(timeprint) + "\t")
exportfile.write("Url: " + str(row[2]) + "\t")
exportfile.write("Visit count: " + str(row[3]))
exportfile.write("\n")
print "ID: " + str(row[0]) + "\t"
print "visit time: " + str(timeprint) + "\t"
print "Url: " + str(row[2]) + "\t"
print "Visit count: " + str(row[3])
print "\n"
except Exception as err:
print(err)
By using the "with" statement (context manager) we eliminate the need to close the file. By using the try/except we capture the error and print it. This will show you where your code is failing and why.
This script was created by an ex-lab member that was quite a bit more adapt at Python scripting than I am.
I am attempting to find Cooccupancy between annotated peaks in "exon" regions of the entire human h19 genome. However, after trying to get this to run for about an hour I am looking for help.
Here is the script:
#!/usr/bin/python
import math
import sys
import re
import csv
import MySQLdb
import itertools
import argparse
# format for execution: ./findCooccupancy.py <loci file> <comma separated list of marks to check> <window size> <outputfile>
# example: ./findCooccupancy.py AllGenes.txt PolII-ChIP,KAP1-ChIP,Hexim 150 output.txt
# format of loci file:
# chr2 12345678 12345900 GENEA 1 +
# chr4 987654321 98765000 GENEB 1 -
# etc...
locifile = sys.argv[1]
marks = sys.argv[2]
window = int(sys.argv[3])
outputfile = sys.argv[4]
loci = list(csv.reader(open(locifile, 'rb'),delimiter='\t'))
#loci = list(itertools.chain.from_iterable(loci))
db = MySQLdb.connect(host="localhost",user="snrnp",passwd="snrnp",db="snrnp")
cur = db.cursor()
cntdict = {}
for mark in marks.split(","):
cntdict[mark] = []
counter = 1
for locus in loci:
print "Working on line# " + str(counter)
counter += 1
if str(locus[5]) == "+":
exon = locus[1]
else:
exon = locus[2]
for mark in marks.split(","):
# this is incredibly dirty. sorry. I don't have time to do this better
if mark == 'PolII-ChIP':
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")")
#print "select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")"
else:
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and ((chr_start < " + str(exon) + " and chr_end > " + str(exon) + ") or (abs(chr_start - " + str(exon) + ") < " + str(window) + ") or (abs(chr_end - " + str(exon) + ") < " + str(window) + "))")
#print "select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and ((chr_start < " + str(exon) + " and chr_end > " + str(exon) + ") or (abs(chr_start - " + str(exon) + ") < " + str(window) + ") or (abs(chr_end - " + str(exon) + ") < " + str(window) + "))"
cnt = cur.fetchone()[0]
if cnt > 0:
cntdict[mark].append(",".join(locus))
convertedlist = []
for key in cntdict.keys():
convertedlist.append(cntdict[key])
intersectlist = set(convertedlist[0]).intersection(*convertedlist[1:])
for key in cntdict.keys():
print str(key) + " hits: " + str(len(cntdict[key]))
print "\nTotal Intersection Count: " + str(len(intersectlist))
with open(outputfile, 'w') as outputwriter:
for line in intersectlist:
outputwriter.write(line + "\n")
This is the command line that I have been using:
./findCooccupancy.py ~/code/snRNP/analysis/from\ sequencing/KEC_Project/Pol-IIAnnotatedPeaksGenome.txt PolII-ChIP 150 KECExonOccupancy.txt
This is the latest error message I have received:
Working on line# 1
Traceback (most recent call last):
File "./findCooccupancy.py", line 41, in <module>
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")")
File "/Library/Python/2.7/site-packages/MySQLdb/cursors.py", line 205, in execute
self.errorhandler(self, exc, value)
File "/Library/Python/2.7/site-packages/MySQLdb/connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.OperationalError: (1054, "Unknown column 'Start' in 'where clause'")
I am exploring a data structure which get expands to sub-elements and resolves to a final element. But I only want to store top two levels.
Example: Lets say I start with New York which breaks into Bronx, Kings, New York, Queens, and Richmond as counties but then finally somehow they resolve to USA.
I am not sure if this is a good example but just to make it clear here is more clear explanation of the problem.
A (expands to) B,C,D -> B (expands to) K,L,M -> K resolves to Z
I initially wrote it in series of for loops and then use the recursion but in recursion I am loosing some of the elements that get expand and due to that I don't drill down each of the expanded element. I have put the both recursive version and non-recursive. I am looking for some advise on building this data structure, and what is the best way to do.
I call a data base query for every element in the expanded version which returns a list of items. Go until it resolves to single element. With out recursion I don't loose drilling all the way till the final element that others resolve to. But with recursion its not the same. I am also new to python so hopefully this is not a bad question to ask in a site like this.
returnCategoryQuery is a method that returns list of items by calling the database query.
With out recursion
#Dictionary to save initial category with the rest of cl_to
baseCategoryTree = {};
#categoryResults = [];
# query get all the categories a category is linked to
categoryQuery = "select cl_to from categorylinks cl left join page p on cl.cl_from = p.page_id where p.page_namespace=14 and p.page_title ='";
cursor = db.cursor(cursors.SSDictCursor);
for key, value in idTitleDictionary.iteritems():
for startCategory in value[0]:
#print startCategory + "End of Query";
categoryResults = [];
try:
categoryRow = "";
baseCategoryTree[startCategory] = [];
print categoryQuery + startCategory + "'";
cursor.execute(categoryQuery + startCategory + "'");
done = False;
while not done:
categoryRow = cursor.fetchone();
if not categoryRow:
done = True;
continue;
categoryResults.append(categoryRow['cl_to']);
for subCategoryResult in categoryResults:
print startCategory.encode('ascii') + " - " + subCategoryResult;
for item in returnCategoryQuery(categoryQuery + subCategoryResult + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item;
for subItem in returnCategoryQuery(categoryQuery + item + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem;
for subOfSubItem in returnCategoryQuery(categoryQuery + subItem + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem + " - " + subOfSubItem;
for sub_1_subOfSubItem in returnCategoryQuery(categoryQuery + subOfSubItem + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem + " - " + subOfSubItem + " - " + sub_1_subOfSubItem;
for sub_2_subOfSubItem in returnCategoryQuery(categoryQuery + sub_1_subOfSubItem + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem + " - " + subOfSubItem + " - " + sub_1_subOfSubItem + " - " + sub_2_subOfSubItem;
except Exception, e:
traceback.print_exc();
With Recursion
def crawlSubCategory(subCategoryList):
level = 1;
expandedList = [];
for eachCategory in subCategoryList:
level = level + 1
print "Level " + str(level) + " " + eachCategory;
#crawlSubCategory(returnCategoryQuery(categoryQuery + eachCategory + "'"));
for subOfEachCategory in returnCategoryQuery(categoryQuery + eachCategory + "'"):
level = level + 1
print "Level " + str(level) + " " + subOfEachCategory;
expandedList.append(crawlSubCategory(returnCategoryQuery(categoryQuery + subOfEachCategory + "'")));
return expandedList;
#Dictionary to save initial category with the rest of cl_to
baseCategoryTree = {};
#categoryResults = [];
# query get all the categories a category is linked to
categoryQuery = "select cl_to from categorylinks cl left join page p on cl.cl_from = p.page_id where p.page_namespace=14 and p.page_title ='";
cursor = db.cursor(cursors.SSDictCursor);
for key, value in idTitleDictionary.iteritems():
for startCategory in value[0]:
#print startCategory + "End of Query";
categoryResults = [];
try:
categoryRow = "";
baseCategoryTree[startCategory] = [];
print categoryQuery + startCategory + "'";
cursor.execute(categoryQuery + startCategory + "'");
done = False;
while not done:
categoryRow = cursor.fetchone();
if not categoryRow:
done = True;
continue;
categoryResults.append(categoryRow['cl_to']);
#crawlSubCategory(categoryResults);
except Exception, e:
traceback.print_exc();
#baseCategoryTree[startCategory].append(categoryResults);
baseCategoryTree[startCategory].append(crawlSubCategory(categoryResults));
Are you trying to lookup "Queens" and learn that it is in the USA? Have you tried encoding your tree in XML, and using lxml.etree to find an element and then use getpath to return the path in XPath format?
This would meaning adding a fourth top level to your tree, namely World, and then you would search for Queens and learn that the path to Queens is World/USA/NewYork/Queens. The answer to your question would always be the second item in the XPath.
Of course you could always just build a tree from the XML and use a tree search algorithm.