I have created a webcrawler in python 2.7 and i am using mysqldb to insert data into database.
I have executed each function as a different script for different webpages, but after i put them into a single file as functions, the program shows error;
(After entering seed page and depth)
Traceback (most recent call last):
File "C:\Users\Chetan\Desktop\webCrawler.py", line 207, in
mainFunc(depth,url)
File "C:\Users\Chetan\Desktop\webCrawler.py", line 194, in mainFunc
lst=perPage(url)
File "C:\Users\Chetan\Desktop\webCrawler.py", line 186, in perPage
filterContent(url,page)
File "C:\Users\Chetan\Desktop\webCrawler.py", line 149, in filterContent
cursor.execute(sql)
File "C:\Python27\lib\site-packages\MySQLdb\cursors.py", line 202, in execute
self.errorhandler(self, exc, value)
File "C:\Python27\lib\site-packages\MySQLdb\connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
ProgrammingError: (1064, 'You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'s and specials." />\n
I can't seem to find any problem. Here is the code;
def metaContent(page,url):#EXTRACTS META TAG CONTENT
lst=[]
while page.find("<meta")!=-1:
start_link=page.find("<meta")
page=page[start_link:]
start_link=page.find("content=")
start_quote=page.find('"',start_link)
end_quote=page.find('"',start_quote+1)
metaTag=page[start_quote+1:end_quote]
page=page[end_quote:]
lst.append(metaTag)
#ENTER DATA INTO DB
i,j=0,0
while i<len(lst):
sql = "INSERT INTO META(URL, \
KEYWORD) \
VALUES ('%s','%s')" % \
(url,lst[i])
cursor.execute(sql)
db.commit()
def filterContent(page,url):#FILTERS THE CONTENT OF THE REMAINING PORTION
phrase = ['to','a','an','the',"i'm",\
'for','from','that','their',\
'i','my','your','you','mine',\
'we','okay','yes','no','as',\
'if','but','why','can','now',\
'are','is','also']
#CALLS FUNC TO REMOVE HTML TAGS
page = strip_tags(page)
#CONVERT TO LOWERCASE
page = page.lower()
#REMOVES WHITESPACES
page = page.split()
page = " ".join(page)
#REMOVES IDENTICAL WORDS AND COMMON WORDS
page = set(page.split())
page.difference_update(phrase)
#CONVERTS FROM SET TO LIST
lst = list(page)
#ENTER DATA INTO DB
i,j=0,0
while i<len(lst):
sql = "INSERT INTO WORDS(URL, \
KEYWORD) \
VALUES ('%s','%s')" % \
(url,lst[i])
cursor.execute(sql)
db.commit()
#<6>
def perPage(url):#CALLS ALL THE FUNCTIONS
page=pageContent(url)
#REMOVES CONTENT BETWEEN SCRIPT TAGS
flg=0
while page.find("<script",flg)!=-1:
start=page.find("<script",flg)
end=page.find("</script>",flg)
end=end+9
i,k=0,end-start
page=list(page)
while i<k:
page.pop(start)
i=i+1
page=''.join(page)
flg=start
#REMOVES CONTENT BETWEEN STYLE TAGS
flg=0
while page.find("<script",flg)!=-1:
start=page.find("<style",flg)
end=page.find("</style>",flg)
end=end+9
i,k=0,end-start
page=list(page)
while i<k:
page.pop(start)
i=i+1
page=''.join(page)
flg=start
metaContent(url,page)
lst=linksExt(url,page)
filterContent(url,page)
return lst#CHECK WEATHER NEEDED OR NOT
#<7>
crawled=[]
def mainFunc(depth,url):#FOR THE DEPTH MANIPULATION
if (depth):
lst=perPage(url)
crawled.append(url)
i=0
if (depth-1):
while i<len(lst):
if url[i] not in crawled:
mainFunc(depth-1,url[i])
i+=1
#CALLING MAIN FUNCTION
mainFunc(depth,url)
Please mention any error, especially in depth manipulation function( mainFunc()). Anything regarding improving the crawler would be helpful.
It is definitely sql error, your quotes are not being escaped.
Instead of this
sql = "INSERT INTO META(URL, \
KEYWORD) \
VALUES ('%s','%s')" % \
(url,lst[i])
cursor.execute(sql)
and this
sql = "INSERT INTO WORDS(URL, \
KEYWORD) \
VALUES ('%s','%s')" % \
(url,lst[i])
cursor.execute(sql)
Try this
sql = "INSERT INTO WORDS(URL, \
KEYWORD) \
VALUES (%s, %s)"
cursor.execute(sql, (url, lst[i]))
and this
sql = "INSERT INTO META(URL, \
KEYWORD) \
VALUES (%s, %s)"
cursor.execute(sql, (url, lst[i]))
Also you are using while but not incrementing i, instead you can use this
for keyword in lst:
sql = "INSERT INTO META(URL, \
KEYWORD) \
VALUES (%s, %s)"
cursor.execute(sql, (url, keyword))
In the recursive call of mainFunc you are calling main function,
main(depth-1,url[i])
There is no main function in your code.
change it to,
mainFunc(depth-1,url[i])
Related
I want to change the bot database from SQLite to MySQL (pymysql) and ran into a problem.
I have a function for generating sql queries, but for some reason it stopped working:
def update_format_with_args(sql, parameters: dict):
values = ", ".join([
f"{item} = ?" for item in parameters
])
sql = sql.replace("XXX", values)
return sql, tuple(parameters.values())
def update_settingsx(**kwargs):
with connection.cursor() as db:
sql = f"UPDATE main_settings SET XXX "
sql, parameters = update_format_with_args(sql, kwargs)
db.execute(sql, parameters)
db.commit()
Error:
update_settingsx(profit_buy=now_unix)
File "/root/rent/bot_id/utils/db_api/sqlite.py", line 154, in update_settingsx
db.execute(sql, parameters)
File "/usr/local/lib/python3.7/dist-packages/pymysql/cursors.py", line 168, in execute
query = self.mogrify(query, args)
File "/usr/local/lib/python3.7/dist-packages/pymysql/cursors.py", line 147, in mogrify
query = query % self._escape_args(args, conn)
TypeError: not all arguments converted during string formatting
Not every database connector supports the ? substitution scheme. Pymysql uses %s instead.
Another alternative would be to use named substition:
def update_format_with_args(sql, parameters: dict):
values = ", ".join([
f"{item} = %({item})s" for item in parameters
])
sql = sql.replace("XXX", values)
return sql, parameters
I am new to coding and databases, I can not get the query to work if I write it long hand but I have a lot to carry out and want it in a function but cannot get it to work, it returns a parameters error
import mysql.connector
def connection_check_1(query, value):
mydb = mysql.connector.connect(
host="******",
user="*****",
passwd="*****",
database="****"
)
mycursor = mydb.cursor()
mycursor.execute(query, (value))
myresult = mycursor.fetchall()
mydb.close()
return myresult
value = "sheep"
query = 'select inlicence from licence where animal = %s'
myresult = connection_check_1(query, value)
print(myresult)
Here is the SQL table I have
create table licence
(
animal varchar (20) primary key,
inlicence int (1)
);
This is the error I get
Traceback (most recent call last):
File "*******************", line 20, in
myresult = connection_check_1(query, value)
File "********************", line 13, in connection_check_1
mycursor.execute(query, (value))
File "********************************************88", line 246, in execute
prepared = self._cnx.prepare_for_mysql(params)
File "/home/kev/PycharmProjects/test bed/venv/lib/python3.5/site-packages/mysql/connector/connection_cext.py", line 535, in prepare_for_mysql
raise ValueError("Could not process parameters")
ValueError: Could not process parameters
I have tried changing the way the query is written, changing it to fetchall().
Wrapping a value with () doesn't turn it in to a tuple. You probably meant to add a comma there:
mycursor.execute(query, (value,))
# Creates a one-element tuple-^
I've printed the output of my "payload" which I want to save to the MySQL database:
('B01MTOV8IP', '40462', '23.95', 'n/a', 'Usually ships in 24 hours',
'https://www.amazon.com/reviews/iframe?akid=AKIAIDCPAFSAQICDTFNQ&alinkCode=xm2&asin=B01MTOV8IP&atag=reakenture-20&exp=2017-08-25T17%3A27%3A37Z&v=2&sig=3zbBXVo4cQAJueFeVeo%252F%252FejvaUOmvuwAtfB4EfMyDiU%253D', 'CHG-GSTWL')
There seems to be something wrong with the way I am formatting it before I pass it to connect.
try:
selling_price = product.price_and_currency
selling_price_v = selling_price[0]#type
print selling_price_v
except Exception as e:
selling_price = "n/a"
conn = MySQLdb.connect(host="clabadmin.cfcudy1fdz8o.us-east-1.rds.amazonaws.com", user="", passwd="", db="")
payload =[
asin,
bsr,
str(selling_price_v),
str(listing_price_v),
# availability_type,
availability,
reviews,
sku]
print payload
# conn = sqlite3.connect('skubsr.db')
c = conn.cursor()
c.execute("""UPDATE webservice_bsr
SET
AISN = %s,
Best_Sellers_Rank = %s,
selling_price = %s,
price = %s,
# availability_type = %s,
availability = %s,
reviews = %s
WHERE ItemSKU = %s""", payload)
conn.commit()
I get the following error:
Traceback (most recent call last):
File "/home/trackstarz/clabReportScraper/bsrimport.py", line 907, in <module>
WHERE ItemSKU = %s""", payload)
File "/usr/local/lib/python2.7/dist-packages/MySQLdb/cursors.py", line 187, in execute
query = query % tuple([db.literal(item) for item in args])
TypeError: not enough arguments for format string
[Finished in 3.1s with exit code 1]
# is only used to indicate a comment when used inside Python code. In your query, it is inside the query string, and so is not parsed as a comment identifier, but as part of the query.
If you delete it, you are left with 8 %s and only 7 items inside payload.
I believe the problem is you have multiple %s string indicators in your execute string but are only giving it a single item (in this case a list) which it doesn't know it should break down into multiple values.
Try using some of the suggestions in this post to get your desired effect.
Using Python String Formatting with Lists
New to SO and fairly new to coding, so doing my best to follow the appropriate protocols.
In my python script, I'm creating a new table and populating column names from a list, named 'dups'.
dups = ['Id', 'Name', 'Price', 'Rating']
I'm inputting this list as columns for the new table, called "SuperTable", via a for loop. See code below:
with new_db:
cur = new_db.cursor()
cur.execute("DROP TABLE IF EXISTS SuperTable")
for i in dups:
if i == dups[0]:
new_col = i.replace("'","")
cur.execute("CREATE TABLE SuperTable(%s)" % (new_col))
else:
cur.execute("ALTER TABLE SuperTable ADD COLUMN %s" % i)
I've looked around a lot and can't seem to identify what I'm doing wrong. This approach worked with Sqlite but I keep getting this same error for MySQLdb:
Traceback (most recent call last):
File "MySQL_SuperTable.py", line 125, in <module>
cur.execute("CREATE TABLE Super(%s)" % (new_col))
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/MySQLdb/cursors.py", line 174, in execute
self.errorhandler(self, exc, value)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/MySQLdb/connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ')' at line 1")
Thanks to eggyal! He pointed out that MySQL columns require a datatype. This is what the code looks like now (I created a list of tuples to input the datatypes + column names via a for loop):
with new_db:
cur = new_db.cursor()
cur.execute("DROP TABLE IF EXISTS SuperTable")
for i in col_namestypes:
if i == col_namestypes[0]:
cur.execute("CREATE TABLE SuperTable(%s %s)" % (i))
else:
cur.execute("ALTER TABLE SuperTable ADD COLUMN %s %s" % i)
for i in new_table:
count = len(i)
question_marks = []
while a < count:
question_marks.append('%s')
a += 1
quests = ','.join(question_marks)
cur.executemany("INSERT INTO SuperTable VALUES(%s)" % quests, new_table)
I am wondering why I am receiving this error:
cmd = "INSERT INTO resulttest (category, value, timestamp) VALUES (" + key + ", " + str(value) + ", " + str(timestamp) + ")"
c.execute(cmd)
db.commit()
INSERT INTO resulttest (category, value, timestamp) VALUES (composed, 2, 1343186948.8)
Traceback (most recent call last):
File "C:/Behavioral Technology/Google Drive/twitterthingv5test.py", line 94, in <module>
moodParser()
File "C:/Behavioral Technology/Google Drive/twitterthingv5test.py", line 92, in moodParser
query()
File "C:/Behavioral Technology/Google Drive/twitterthingv5test.py", line 37, in query
main(columns)
File "C:/Behavioral Technology/Google Drive/twitterthingv5test.py", line 81, in main
c.execute(cmd)
File "C:\Python27\lib\site-packages\MySQLdb\cursors.py", line 174, in execute
self.errorhandler(self, exc, value)
File "C:\Python27\lib\site-packages\MySQLdb\connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
I believe it has something to do with how I am passing my values to the SQL command.
Your code to create the query isn't attempting to quote the string values:
cmd = "INSERT INTO resulttest (category, value, timestamp) VALUES (" + key + ", " + str(value) + ", " + str(timestamp) + ")"
Look at the SQL statement you printed:
INSERT INTO resulttest (category, value, timestamp) VALUES (composed, 2, 1343186948.8)
Shouldn't "category" be quoted?
You shouldn't be composing a SQL statement with string operations in the first place. This is how SQL injection vulnerabilities happen. Instead you should use placeholders and let the MySQL library deal with them:
c.execute(
"INSERT INTO resulttest (category, value, timestamp) VALUES (?, ?, ?)",
(key, value, timestamp)
)
In case Ned Batchelder's suggestion doesn't work, here's an alternative:
sql = "INSERT into resulttest (category, value, timestamp) VALUES (%s, %s, %s)"
c.execute(sql % (key, value, timestamp))
I faced a problem where my SQL command wasn't being executed, and following this kind of a syntax made it work.
Although you must be careful, because using % instead of , opens up the possibility for SQL injections, but this was the only syntax that worked for me. Might be a Python-MySQL version and compatibility problem.
A better idea would be to install compatible versions and follow the correct syntax.