Running chatbot on system - python

import re
import sqlite3
from collections import Counter
from string import punctuation
from math import sqrt
# initialize the connection to the database
connection = sqlite3.connect('chatbot.sqlite')
cursor = connection.cursor()
# create the tables needed by the program
create_table_request_list = [
'CREATE TABLE words(word TEXT UNIQUE)',
'CREATE TABLE sentences(sentence TEXT UNIQUE, used INT NOT NULL DEFAULT 0)',
'CREATE TABLE associations (word_id INT NOT NULL, sentence_id INT NOT NULL, weight REAL NOT NULL)',
]
for create_table_request in create_table_request_list:
try:
cursor.execute(create_table_request)
except:
pass
def get_id(entityName, text):
"""Retrieve an entity's unique ID from the database, given its associated text.
If the row is not already present, it is inserted.
The entity can either be a sentence or a word."""
tableName = entityName + 's'
columnName = entityName
cursor.execute('SELECT rowid FROM ' + tableName + ' WHERE ' + columnName + ' = ?', (text,))
row = cursor.fetchone()
if row:
return row[0]
else:
cursor.execute('INSERT INTO ' + tableName + ' (' + columnName + ') VALUES (?)', (text,))
return cursor.lastrowid
def get_words(text):
"""Retrieve the words present in a given string of text.
The return value is a list of tuples where the first member is a lowercase word,
and the second member the number of time it is present in the text."""
wordsRegexpString = '(?:\w+|[' + re.escape(punctuation) + ']+)'
wordsRegexp = re.compile(wordsRegexpString)
wordsList = wordsRegexp.findall(text.lower())
return Counter(wordsList).items()
B = 'Hello!'
while True:
# output bot's message
print('B: ' + B)
# ask for user input; if blank line, exit the loop
H = raw_input('H: ').strip()
if H == '':
break
# store the association between the bot's message words and the user's response
words = get_words(B)
words_length = sum([n * len(word) for word, n in words])
sentence_id = get_id('sentence', H)
for word, n in words:
word_id = get_id('word', word)
weight = sqrt(n / float(words_length))
cursor.execute('INSERT INTO associations VALUES (?, ?, ?)', (word_id, sentence_id, weight))
connection.commit()
# retrieve the most likely answer from the database
cursor.execute('CREATE TEMPORARY TABLE results(sentence_id INT, sentence TEXT, weight REAL)')
words = get_words(H)
words_length = sum([n * len(word) for word, n in words])
for word, n in words:
weight = sqrt(n / float(words_length))
cursor.execute('INSERT INTO results SELECT associations.sentence_id, sentences.sentence, ?*associations.weight/(4+sentences.used) FROM words INNER JOIN associations ON associations.word_id=words.rowid INNER JOIN sentences ON sentences.rowid=associations.sentence_id WHERE words.word=?', (weight, word,))
# if matches were found, give the best one
cursor.execute('SELECT sentence_id, sentence, SUM(weight) AS sum_weight FROM results GROUP BY sentence_id ORDER BY sum_weight DESC LIMIT 1')
row = cursor.fetchone()
cursor.execute('DROP TABLE results')
# otherwise, just randomly pick one of the least used sentences
if row is None:
cursor.execute('SELECT rowid, sentence FROM sentences WHERE used = (SELECT MIN(used) FROM sentences) ORDER BY RANDOM() LIMIT 1')
row = cursor.fetchone()
# tell the database the sentence has been used once more, and prepare the sentence
B = row[1]
cursor.execute('UPDATE sentences SET used=used+1 WHERE rowid=?', (row[0],))
This is a code written for creating a chatbot. When I try running this code on cmd. By using command python chatbot.py, it returns an error saying invalid syntax.
IS there any way i can remove this error and run this code on my system?
it gives error: File "chatbot.py", line 1 syntax: invalid syntax

What version of Python are you running and in what environment? I ran this code on my Python 3.70b4 under Windows and it worked fine except for line 52:
H = raw_input('H: ').strip()
Which you have to change to:
H = input('H: ').strip()
This is probably unrelated directly to your issue, but the code you posted did run fine for me in my environment, after I made that one change (and of course installed any libraries or modules needed).

Related

How to insert a value in the same line sqlite3 using python

i have two function (in python). The first function defines a new variable which i have to insert in a sql table (first column). The second one, does the same thing, but i want to insert its variable (the second one) near the first variable, so in the second column but in the same line. How can i do with sql?.
connloc = sqlite3.connect("request.db")
sqlloc = "create table requests (" \
" chat_id INTEGER NOT NULL PRIMARY KEY,"\
" locpar varchar(20)," \
" stoppar varchar(20)," \
" locdes varchar(20) ," \
" stopdes varchar(20) );"
connloc.execute(sqlloc)
def name_loc(chat, message):
for i in result:
if message.text == i:
item = [i]
cloc = connloc.cursor()
cloc.execute("INSERT INTO requests(locpar) VALUES (?);", item)
connloc.commit()
def name_stop(chat, message):
for i in result:
for t in result[i]:
if message.text == t:
item = [t]
cloc = connloc.cursor()
cloc.execute("INSERT INTO requests(stoppar) VALUES (?);", item)
connloc.commit()
I would break it up into a two step process by defining two methods, one for table generation and then another second method for populating the new table like this:
def create_table(ptbl):
""" Assemble DDL (Data Definition Language) Table Create statement and build
sqlite3 db table
Args:
string: new db table name.
Returns:
Status string, '' or 'SUCCESS'.
"""
retval = ''
sqlCmd = ''
try:
conn = sqlite3.connect(sqlite_file)
c = conn.cursor()
if ptbl == 'TBL_EXAMPLE':
sqlCmd = 'CREATE TABLE IF NOT EXISTS ' + ptbl + ' (FIELD1 TEXT, FIELD2 INTEGER, FIELD3 TEXT, ' \
'FIELD4 TEXT, FIELD5 TEXT)'
else:
pass
if sqlCmd != '':
c.execute(sqlCmd)
conn.commit()
conn.close()
retval = 'SUCCESS'
except Error as e:
retval = 'FAIL'
print(e)
return retval
and then populate it as you like with the values (inserting your new row with those two specific values you mentioned).
Now, I'm populating from a csv file here, but I thinkit'll give you a really good solid start on this task.
def populate_tbl_file_marker_linenums(p_fml_tbl, p_fml_datafile):
""" Read csv and load data into TBL_FILE_MARKER_LINENUMS table ...
Args:
p_fml_tbl (TEXT) target table name
p_fml_datafile (TEXT) name of csv file to load into table
Returns:
retval (TEXT) - Status of method, e.g., 'SUCCESS'
"""
retval = ''
mode = 'r'
try:
conn = sqlite3.connect(sqlite_file)
c = conn.cursor()
csv_dataset = open(p_fml_datafile, mode)
csv_reader = csv.reader(csv_dataset)
c.executemany('INSERT INTO ' + p_fml_tbl + ' (FIELD1, FIELD2, FIELD3, FIELD4, FIELD5) VALUES (?, ?, ?, ?, ?)', csv_reader)
conn.commit()
conn.close()
retval = 'SUCCESS'
except Error as e:
print(e)
return retval

Inserting large amounts of data in sqlite

I am making an inverted index lookup table for my database in sqlite3. The database I have consists of certain bloggers and their posts.
I have a table post which has the columns id, text, blogger_id. This table consists of ~680 000 posts. And I want to make a table Blogger_Post_Word with the columns blogger_id, post_id, word_position, word_id.
I am using Python for this and I have tried a 2 ways before but both have their problems.
I saw online that the best way to insert large amounts of data is with a bulk insert. This means that I have to fetch all the posts and for each word in a post I have to store that locally so I can do a bulk insert later. This requires way to much memory that I don't have.
I have also tried inserting each word one by one but this just takes way to long.
Is there an efficient way to solve this problem or an sql statement that does this in one go?
Edit:
This is my the code I'm using now:
#lru_cache()
def get_word_id(_word: str) -> int:
word_w_id = db.get_one('Word', ['word'], (word,))
if word_w_id is None:
db.insert_one('Word', ['word'], (word,))
word_w_id = db.get_one('Word', ['word'], (word,))
return word_w_id[0]
for post_id, text, creation_date, blogger_id in db.get_all('Post'):
split_text = text.split(' ')
for word_position, word in enumerate(split_text):
word_id = get_word_id(word)
db.insert_one('Blogger_Post_Word',
['blogger_id', 'post_id', 'word_position', 'word_id'],
(blogger_id, post_id, word_position, word_id))
The db is a class I wrote to handle the database, these are the functions in that class I use:
def get(self, table: str, where_cols: list = None, where_vals: tuple = None):
query = 'SELECT * FROM ' + table
if where_cols is not None and where_vals is not None:
where_cols = [w + '=?' for w in where_cols]
query += ' WHERE ' + ' and '.join(where_cols)
return self.c.execute(query, where_vals)
return self.c.execute(query)
def get_one(self, table: str, where_cols: list = None, where_vals: tuple = None):
self.get(table, where_cols, where_vals)
return self.c.fetchone()
def insert_one(self, table: str, columns: list, values: tuple):
query = self.to_insert_query(table, columns)
self.c.execute(query, values)
self.conn.commit()
def to_insert_query(self, table: str, columns: list):
return 'INSERT INTO ' + table + '(' + ','.join(columns) + ')' + ' VALUES (' + ','.join(['?' for i in columns]) + ')'
Okay I hope this helps anyone.
The problem was that indeed that insert one is too slow and I didn't have enough memory to store the whole list locally.
Instead I used a hybrid of the two and insert them into the database incrementaly.
I displayed the size of my list to determine the bottleneck. It seemed that 150 000 posts of the 680 000 was about my bottleneck. The total size of the list was about 4.5 GB.
from pympler.asizeof import asizeof
print(asizeof(indexed_data))
>>> 4590991936
I decide on an increment of 50 000 posts to keep everything running smooth.
This is now my code:
# get all posts
c.execute('SELECT * FROM Post')
all_posts = c.fetchall()
increment = 50000
start = 0
end = increment
while start < len(all_posts):
indexed_data = []
print(start, ' -> ', end)
for post_id, text, creation_date, blogger_id in all_posts[start:end]:
split_text = text.split(' ')
# for each word in the post add a tuple with blogger id, post id, word position in the post and the word to indexed_data
indexed_data.extend([(blogger_id, post_id, word_position, word) for word_position, word in enumerate(split_text)])
print('saving...')
c.executemany('''
INSERT INTO Inverted_index (blogger_id, post_id, word_position, word)
VALUES (?, ?, ?, ?)
''', indexed_data)
start += increment
if end + increment > len(all_posts):
end = len(all_posts)
else:
end += increment

Inputting Python List into SQLite

I'm trying to add the list that is made after it parses through each line. As I go through each code I get different errors
(C:\Users\myname\Desktop\pythonCourse>dblesson2
Enter file name: mbox.txt
['uct.ac.za']
Traceback (most recent call last):
File "C:\Users\myname\Desktop\pythonCourse\dblesson2.py", line 25, in
<module>
#VALUES ( ?, 1 )''', ( email, ) )
sqlite3.OperationalError: near "#VALUES": syntax error)
and I know that it is because I am not passing the correct data to the database but I can't figure this out on my own.
import sqlite
import re
conn = sqlite3.connect('emaildb.sqlite')
cur = conn.cursor()
cur.execute('''
DROP TABLE IF EXISTS Counts''')
cur.execute('''
CREATE TABLE Counts (email TEXT, count INTEGER)''')
fname = raw_input('Enter file name: ')
if ( len(fname) < 1 ) : fname = 'mbox-short.txt'
fh = open(fname)
for line in fh:
if not line.startswith('From: ') : continue
line = line.rstrip()
email = re.findall('#(\S+[a-zA-Z]+)', line)
print email
cur.execute('SELECT count FROM Counts WHERE email = ? ', (email))
row = cur.fetchone()
if row is None:
#cur.execute('''INSERT INTO Counts (email, count)
#VALUES ( ?, 1 )''', ( email, ) )
else :
cur.execute('UPDATE Counts SET count=count+1 WHERE email = ?',
(email, ))
# This statement commits outstanding changes to disk each
# time through the loop - the program can be made faster
# by moving the commit so it runs only after the loop completes
conn.commit()
# https://www.sqlite.org/lang_select.html
sqlstr = 'SELECT email, count FROM Counts ORDER BY count DESC LIMIT 10'
print
print "Counts:"
for row in cur.execute(sqlstr) :
print str(row[0]), row[1]
cur.close()`
You have a number of small errors in your program. Let me try to list them:
re.findall returns a list, but you seem to treat it as a single string. Try email = email[0] to only consider the first element of the list.
Your first SELECT statement has (email). Putting a single item inside parentheses does not make it a tuple. Try (email,) or [email] instead.
The if after the for loop is meant to occur for each iteration of the for loop, so it must be indented by one stop.
The body of the if cannot be empty. Either uncomment that operation, or change it to pass.
The body of your final for loop needs to be indented one stop.
As a courtesy to Stack Overflow readers, please copy-paste entire stand-alone programs, not merely snippets.
Here is the your program after I fixed the problems:
import sqlite3
import re
conn = sqlite3.connect(':memory:')
cur = conn.cursor()
cur.execute('''
DROP TABLE IF EXISTS Counts''')
cur.execute('''
CREATE TABLE Counts (email TEXT, count INTEGER)''')
fname = raw_input('Enter file name: ')
if ( len(fname) < 1 ) : fname = 'mbox-short.txt'
fh = open(fname)
for line in fh:
if not line.startswith('From: ') : continue
line = line.rstrip()
email = re.findall('#(\S+[a-zA-Z]+)', line)
email = email[0]
cur.execute('SELECT count FROM Counts WHERE email = ? ', (email,))
row = cur.fetchone()
if row is None:
cur.execute('''INSERT INTO Counts (email, count)
VALUES ( ?, 1 )''', ( email, ) )
else :
cur.execute('UPDATE Counts SET count=count+1 WHERE email = ?',
(email, ))
# This statement commits outstanding changes to disk each
# time through the loop - the program can be made faster
# by moving the commit so it runs only after the loop completes
conn.commit()
# https://www.sqlite.org/lang_select.html
sqlstr = 'SELECT email, count FROM Counts ORDER BY count DESC LIMIT 10'
print
print "Counts:"
for row in cur.execute(sqlstr) :
print str(row[0]), row[1]
cur.close()
import sqlite3
conn=sqlite3.connect('emaildb.sqlite')
cur=conn.cursor()
cur.execute('''DROP TABLE IF EXISTS counts''')
cur.execute('''CREATE TABLE counts (org TEXT, count INTEGER)''')
f_name=raw_input('Enter file name: ')
if len(f_name)<1 : f_name='mbox.txt'
fn=open(f_name)
for line in fn:
if not line.startswith('From: ') : continue
words = line.split()
email=words[1]
domain=email.split('#')
organiz=domain[1]
print organiz
cur.execute('SELECT count FROM Counts WHERE org=?',(organiz, ))
row=cur.fetchone()
if row==None:
cur.execute('''INSERT INTO counts (org, count) VALUES (?,1)''',
(organiz, ))
else:
cur.execute('''UPDATE counts SET count=count+1 WHERE org=?''',(organiz,
))
conn.commit()

Counting Organizations by using Python and Sqlite

This application will read the mailbox data (mbox.txt) count up the number email messages per organization (i.e. domain name of the email address) using a database with the following schema to maintain the counts.
CREATE TABLE Counts (org TEXT, count INTEGER)
When you have run the program on mbox.txt upload the resulting database file above for grading.
If you run the program multiple times in testing or with different files, make sure to empty out the data before each run.
You can use this code as a starting point for your application: http://www.pythonlearn.com/code/emaildb.py. The data file for this application is the same as in previous assignments: http://www.pythonlearn.com/code/mbox.txt.
First time to learn Sqlite. I am very confused about this assignment although it seems to be easy. I don't know how can I connect Python codes to Sqlite. It seems that they don't need the code as assignment. All the need is database file. How should I solve this problem. Don't know how to start it. Much appreciated it!
The starting code you've been given is a really good template for what you want to do. The difference is that - in that example - you're counting occurences of email address, and in this problem you're counting domains.
First thing to do is think about how to get domain names from email addresses. Building from the code given (which sets email = pieces[1]):
domain = email.split('#')[1]
This will break the email on the # character, and return the second item (the part after the '#'), which is the domain - the thing you want to count.
After this, go through the SQL statements in the code and replace 'email' with 'domain', so that you're counting the right thing.
One last thing - the template code checks 'mbox-short.txt' - you'll need to edit that as well for the file you want.
import sqlite3
conn = sqlite3.connect('emaildb2.sqlite')
cur = conn.cursor()
cur.execute('''
DROP TABLE IF EXISTS Counts''')
cur.execute('''
CREATE TABLE Counts (org TEXT, count INTEGER)''')
fname = input('Enter file name: ')
if (len(fname) < 1): fname = 'mbox.txt'
fh = open(fname)
list_1 =[]
for line in fh:
if not line.startswith('From: '): continue
pieces = line.split()
email = pieces[1]
dom = email.find('#')
org = email[dom+1:len(email)]
cur.execute('SELECT count FROM Counts WHERE org = ? ', (org,))
row = cur.fetchone()
if row is None:
cur.execute('''INSERT INTO Counts (org, count)
VALUES (?, 1)''', (org,))
else:
cur.execute('UPDATE Counts SET count = count + 1 WHERE org = ?',
(org,))
conn.commit()
# https://www.sqlite.org/lang_select.html
sqlstr = 'SELECT org, count FROM Counts ORDER BY count DESC LIMIT 10'
for row in cur.execute(sqlstr):
print(str(row[0]), row[1])
cur.close()
I am still new here, but I want to thank Stidgeon for pointing me in the right direction. I suspect other Using Databases with Python students will end up here too.
There are two things you need to do with the source code.
domain = email.split('#')[1] http://www.pythonlearn.com/code/emaildb.py
Change from email TEXT to org TEXT when the database is generated.
That should get you on your way.
import sqlite3
conn = sqlite3.connect('emaildb.sqlite')
cur = conn.cursor()
cur.execute('DROP TABLE IF EXISTS Counts')
cur.execute('''
CREATE TABLE Counts (org TEXT, count INTEGER)''')
fname = input('Enter file name: ')
if (len(fname) < 1): fname = 'mbox-short.txt'
fh = open(fname)
for line in fh:
if not line.startswith('From: '): continue
pieces = line.split()
org = pieces[1].split('#')
cur.execute('SELECT count FROM Counts WHERE org = ? ', (org[1],))
row = cur.fetchone()
if row is None:
cur.execute('''INSERT INTO Counts (org, count)
VALUES (?, 1)''', (org[1],))
else:
cur.execute('UPDATE Counts SET count = count + 1 WHERE org = ?',
(org[1],))
conn.commit()
# https://www.sqlite.org/lang_select.html
sqlstr = 'SELECT org, count FROM Counts ORDER BY count DESC LIMIT 10'
for row in cur.execute(sqlstr):
print(str(row[0]), row[1])
cur.close()
print('-----------------done----------------')

Python cursor is returning number of rows instead of rows

Writing a script to clean up some data. Super unoptimized but this cursor is
returning the number of results in the like query rather than the rows what am I doing wrong.
#!/usr/bin/python
import re
import MySQLdb
import collections
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="admin", # your username
passwd="", # your password
db="test") # name of the data base
# you must create a Cursor object. It will let
# you execute all the query you need
cur = db.cursor()
# Use all the SQL you like
cur.execute("SELECT * FROM vendor")
seen = []
# print all the first cell of all the rows
for row in cur.fetchall() :
for word in row[1].split(' '):
seen.append(word)
_digits = re.compile('\d')
def contains_digits(d):
return bool(_digits.search(d))
count_word = collections.Counter(seen)
found_multi = [i for i in count_word if count_word[i] > 1 and not contains_digits(i) and len(i) > 1]
unique_multiples = list(found_multi)
groups = dict()
for word in unique_multiples:
like_str = '%' + word + '%'
res = cur.execute("""SELECT * FROM vendor where name like %s""", like_str)
You are storing the result of cur.execute(), which is the number of rows. You are never actually fetching any of the results.
Use .fetchall() to get all result rows or iterate over the cursor after executing:
for word in unique_multiples:
like_str = '%' + word + '%'
cur.execute("""SELECT * FROM vendor where name like %s""", like_str)
for row in cur:
print row

Categories

Resources