So I am creating a chat bot inspired off the tutorials from sentdex yet I ran into an error I can not figure out.
I am using the latest version python
Code for the chat bot:
<i>
import sqlite3
import json
from datetime import datetime
timeframe = '2007-02'
sql_transaction = []
connection = sqlite3.connect('{}.db' .format(timeframe))
c = connection.cursor()
def create_table():
c.execute("""CREATE TABLE IF NOT EXISTS parent_reply
(parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT,
comment TEXT, subreddit TEXT, unix INT, score INT)""")
def format_data(date):
data = data.replace("\n"," newlinechar ").replace("\r"," newlinechar
").replace('"',"'")
return data
def find_parent(pid):
try:
sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}'
LIMIT 1".format(pid)
c.execture(sql)
result = c.fetchone()
if result != None:
return result [0]
else: return False
except Exception as e:
#print ("find_parent", e)
return False
if __name__ == "__main__":
create_table()
row_counter = 0
paired_rows = 0
with open("/home/anonymouz/Desktop/redditdata/{}/RC_{}".format(timeframe.split('-')[0], timeframe ), buffering=1000) as f:
for row in f:
print(row)
row_counter += 1
row = json.loads(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)<i>
And the error I am getting:
Traceback (most recent call last):
File "/home/anonymouz/Desktop/redditdata/reddit.py", line 44, in <module>
body = format_data(row['body'])
File "/home/anonymouz/Desktop/redditdata/reddit.py", line 17, in format_data
data = data.replace("\n"," newlinechar ").replace("\r"," newlinechar ").replace('"',"'")
UnboundLocalError: local variable 'data' referenced before assignment
>>>
Thank you for anyone who is able to help and isn't rude about it :)
More clean version of code with correct indents:
https://pastebin.com/2ifpEQy9
def format_data(date):
Your parameter is 'date' but your local is 'data'.
Change your parameter name to 'data'
def format_data(data):
data = data.replace("\n"," newlinechar ").replace("\r"," newlinechar
").replace('"',"'")
return data
Related
I'm having a problem with an error: UnboundLocalError: local variable 'TweetID' referenced before assignment.
Now please, don't crap on the layout and functions of this code. I posted the entire code just in case since I don't know where the mistake is. I'm still new and I know it makes no sense, but 99% works, except this part. I really can't find the reason of the error, I know what it means but I don't see the problem.
A little background: I have to pull a certain tweet from a database and accept or reject it. When Accepted, it tweets to twitter. When rejected, you leave an explanation why. Kind of like a moderator. Moderator ID and StationID (where the tweet was posted) are also sent to the database.
If you need more info I'm happy to supply. Other constructive feedback is also appreciated, but I'm a real newbie so I can't deal with anything too complicated :)
Traceback (most recent call last):
File "D:\Users\vince\Desktop\Python Code Project Steam\Twitterzuil\MOD.py", line 74, in
Process()
File "D:\Users\vince\Desktop\Python Code Project Steam\Twitterzuil\MOD.py", line 68, in Process
Accept()
File "D:\Users\vince\Desktop\Python Code Project Steam\Twitterzuil\MOD.py", line 32, in Accept
TweetID = TweetID()
UnboundLocalError: local variable 'TweetID' referenced before assignment
import psycopg2
import time
conn = psycopg2.connect(
host="localhost",
database="postgres",
user="postgres",
password="")
cur = conn.cursor()
cur.execute("SELECT (Tweet, naam, inhoud, datum, tijd) from Bericht2 WHERE acceptrejectposted is NULL ORDER BY tijd ASC LIMIT 1")
result = cur.fetchall();
#login
mod_id = int(input("Geef uw moderator ID: "))
#VIEW TWEET
print(result)
def TweetID():
cur.execute("SELECT (Tweet) from Bericht2 WHERE acceptrejectposted is NULL ORDER BY tijd ASC LIMIT 1")
tweet_id = cur.fetchall()
return tweet_id
def Accept():
tweet_id = TweetID()
cur.execute(f"UPDATE Bericht2 SET acceptrejectposted = 1 WHERE tweet = {tweet_id}")
cur.execute(f"UPDATE Bericht2 SET moderator2mod_id = {mod_id} WHERE tweet = {tweet_id}")
print('TWEET ACCEPTED')
conn.commit()
PostTweet()
def Reject():
tweet_id = TweetID()
cur.execute(f"UPDATE Bericht2 SET acceptrejectposted = 0 WHERE tweet = {tweet_id}")
cur.execute(f"UPDATE Bericht2 SET moderator2mod_id = {mod_id} WHERE tweet = {tweet_id}")
Opmerking = input("Opmerking?")
cur.execute(f"UPDATE Bericht2 SET opmerking = '{Opmerking}' WHERE tweet = {tweet_id}")
print('TWEET REJECTED')
conn.commit()
def PostTweet():
tweet_id = TweetID()
cur.execute("SELECT (inhoud) from Bericht2 WHERE acceptrejectposted = 1 ORDER BY tijd ASC LIMIT 1")
Tweet = cur.fetchall()
cur.execute(f"UPDATE Bericht2 SET acceptrejectposted = 2 WHERE tweet = {tweet_id}")
conn.commit()
print("Posting to twitter...")
time.sleep(1)
print("Posted!")
def Process():
AccRej = input("Accept or Reject? ")
if AccRej == "Accept":
Accept()
elif AccRej == "Reject":
Reject()
else:
print("Invalid")
Process()
As Sylvester Kruin correctly noticed, you should not mixing variable and function names. Consider snake_case for variable names:
def Accept():
tweet_id = TweetID()
cur.execute(f"UPDATE Bericht2 SET acceptrejectposted = 1 WHERE tweet = {tweet_id}")
cur.execute(f"UPDATE Bericht2 SET moderator2mod_id = {mod_id} WHERE tweet = {tweet_id}")
print('TWEET ACCEPTED')
conn.commit()
PostTweet()
Im trying to put together a script that will run on a scheduled basis, updating a database located on the cloud. im using SSHTunnelForwarding to establish a connection with my database. its all packed within a function so that all i have to do is pass the query string within the function and it runs. this function is looping over about 1000 rows of data within a CSV. each row is being altered to the proper format and a sql expression is being created.
with open (CSV, 'r') as f:
reader = csv.reader(f)
data = next(reader)
#load new data
for newrow in reader:
row = []
for val in newrow:
try:
val = float(val)
except:
val = "'"+val.strip()+"'"
row.append(val)
id = row[0]
phase_order = row[1]
Ajera_Project_Key = row[2]
project_id = row[3]
Project_Description = row[4]
Ajera_Client_Key = row[5]
Client = row[6]
Ajera_PM_Key = row[7]
Project_Manager = row[8]
Ajera_PIC_Key = row[9]
Principal_In_Charge = row[10]
title = row[11]
Ajera_Dept_Key = row[12]
Project_Status = row[13]
Phase_Status = row[14]
Department = row[15]
Project_Type = row[16]
start = row[17]
end = row[18]
hours_budgeted = row[19]
Hours_Worked = row[20]
Hours_Remaining = row[21]
Total_Contract_Amount = row[22]
Billed = row[23]
Billed_Labor = row[24]
Billed_Hours = row[25]
WIP = row[26]
Spent = row[27]
Spent_Labor = row[28]
FTEs = row[29]
q = 'INSERT INTO project_phases_test VALUES ({0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13},{14},{15},{16},{17},{18},{19},{20},{21},{22},{23},{24},{25},{26},{27},{28},{29});'.format(id,phase_order,Ajera_Project_Key,project_id,Project_Description,Ajera_Client_Key,Client,Ajera_PM_Key,Project_Manager,Ajera_PIC_Key,Principal_In_Charge,title,Ajera_Dept_Key,Project_Status,Phase_Status,Department,Project_Type,start,end,hours_budgeted,Hours_Worked,Hours_Remaining,Total_Contract_Amount,Billed,Billed_Labor,Billed_Hours,WIP,Spent,Spent_Labor,FTEs)
print (q)
query(q)
the issue im having is when i try to enter a query string other than a simple select. every insert, update, delete statement i pass to it, i get an error that says 'NoneType' object is not iterable. however, as i mentioned before, if i pass a select statement, i get the result im looking for and i dont get any errors.
here is the function
def query(q):
try:
with SSHTunnelForwarder(
(host, 22),
ssh_username=ssh_username,
ssh_password=ssh_password,
ssh_private_key=ssh_private_key,
remote_bind_address=(localhost, 3306)
) as server:
conn = db.connect(host=localhost,
port=server.local_bind_port,
user=user,
passwd=password,
db=database)
pd.read_sql_query(q, conn)
conn.close()
return
except Exception as e:
print e
pass
the error is coming from the print e statement.
full error
Traceback (most recent call last):
File "C:\MAMP\htdocs\WIGHTcloud\dataLoader\load\loader.py", line 183, in <module>
query(drop) # drop phases table
File "C:\MAMP\htdocs\WIGHTcloud\dataLoader\load\loader.py", line 140, in query
pd.read_sql_query(q, conn)
File "C:\Python27\ArcGIS10.6\lib\site-packages\pandas\io\sql.py", line 431, in read_sql_query
parse_dates=parse_dates, chunksize=chunksize)
File "C:\Python27\ArcGIS10.6\lib\site-packages\pandas\io\sql.py", line 1600, in read_query
columns = [col_desc[0] for col_desc in cursor.description]
TypeError: 'NoneType' object is not iterable
using what nilleb said, i plugged it into the sshTunnelForwarder and got it working.
with sshtunnel.SSHTunnelForwarder(ssh_address_or_host = ssh_host,
ssh_username = ssh_username,
ssh_password = ssh_password,
ssh_pkey = ssh_pkey,
remote_bind_address=('localhost', 3306),
) as tunnel:
mydb = mysql.connector.connect(
host=localhost,
user=sql_username,
passwd=sql_password,
database=db_name,
port=tunnel.local_bind_port
)
mycursor = mydb.cursor()
sql = "INSERT INTO customers (name, address) VALUES (%s, %s)"
val = ("John", "Highway 21")
mycursor.execute(sql, val)
mydb.commit()
print(mycursor.rowcount, "record inserted.")
I would say that this is expected. An instruction like SELECT will produce an output, including the table headers.
While other DDL statements do not produce an output and thus raise an exception in the pd.read_sql_query function.
You should rather consider retrieving a cursor, and executing the DDL statement on it, like
mycursor = conn.cursor()
sql = "INSERT INTO customers (name, address) VALUES (%s, %s)"
val = ("John", "Highway 21")
mycursor.execute(sql, val)
More info here: https://www.w3schools.com/python/python_mysql_insert.asp
import json
import urllib
import sqlite3
import temp
def loading():
url = 'https://jobs.github.com/positions.json?page=1' # URL for API 1-5json_obj = urllib.urlopen(url)
response = urllib.urlopen(url)
data = json.load(response) # loads the url and set it into data variable
for item in data[0].keys():
print(item)
return data # Get the keys
# def loading():
# print " LOADING API(s)"
# urllib.urlopen('https://jobs.github.com/positions.json?page=1')
# temp = json.dumps(data[1])
# print (json.dumps(data[1]))
# print (" ")
def createDB(data):
conn = sqlite3.connect('comp.db')
c = conn.cursor()
# Create table
c.execute('''CREATE TABLE IF NOT EXISTS comp
(description text, title text, url text, company_logo text, company text, id integer primary key, company_url text, how_to_apply text,
location text, type text, created_at timestamp)''')
temp_values = list(tuple())
for item in data:
print (item)
list_of_values = [v for k, v in item.items()]
tuple_of_values = tuple(list_of_values)
temp_values.append(tuple_of_values)
c.executemany('INSERT INTO comp VALUES (?,?,?,?,?,?,?,?,?,?,?)', temp_values)
#TO DO
# Make sub sets for each category call
# put in category for each ? in table
def main():
data = loading()
createDB(data)
main()
This code takes a URL from GitHub Jobs and loads the data into a SQL data table into categories (title, location ect..) It runs in Python 3 terminal but cannot run in Pycharm which has me stumped.
Error messages:
File "/Users/John/PycharmProjects/N/Sprint_2/Database.py", line 44, in main createDB(data) File "/Users/John/PycharmProjects/N/Sprint_2/Database.py", line 36, in createDB c.executemany('INSERT INTO comp VALUES (?,?,?,?,?,?,?,?,?,?,?)', temp_values) sqlite3.IntegrityError: datatype mismatch
I keep getting the following error when trying to access a variable from one function inside another function.
NameError: global name 'savemovieurl' is not defined
how can i access the "savemovieurl" from the function "tmdb_posters" inside "dynamic_data_entry" to save it to the database?
i've tried adding global to the variable name, and had no success.
import requests
import urllib
import sqlite3
import time
import datetime
import random
movie = raw_input('Enter your movie: ')
print('You searched for: ', movie)
def imdb_id_from_title(title):
""" return IMDb movie id for search string
Args::
title (str): the movie title search string
Returns:
str. IMDB id, e.g., 'tt0095016'
None. If no match was found
"""
pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
url = pattern.format(movie_title=urllib.quote(title))
r = requests.get(url)
res = r.json()
# sections in descending order or preference
for section in ['popular','exact','substring']:
key = 'title_' + section
if key in res:
return res[key][0]['id']
if __name__=="__main__":
title = movie
imdb_info_returned = ("{1}".format(title, imdb_id_from_title(title)))
print imdb_info_returned
import os
import requests
CONFIG_PATTERN = 'http://api.themoviedb.org/3/configuration?api_key={key}'
IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}'
KEY = '47db65094c31430c5a2b65112088d70e'
imdb_id_input = imdb_info_returned
print('You searched for: ', imdb_id_input)
def _get_json(url):
r = requests.get(url)
return r.json()
def _download_images(urls, path='.'):
"""download all images in list 'urls' to 'path' """
for nr, url in enumerate(urls):
r = requests.get(url)
filetype = r.headers['content-type'].split('/')[-1]
filename = 'poster_{0}.{1}'.format(nr+1,filetype)
filepath = os.path.join(path, filename)
with open(filepath,'wb') as w:
w.write(r.content)
def get_poster_urls(imdbid):
""" return image urls of posters for IMDB id
returns all poster images from 'themoviedb.org'. Uses the
maximum available size.
Args:
imdbid (str): IMDB id of the movie
Returns:
list: list of urls to the images
"""
config = _get_json(CONFIG_PATTERN.format(key=KEY))
base_url = config['images']['base_url']
sizes = config['images']['poster_sizes']
"""
'sizes' should be sorted in ascending order, so
max_size = sizes[-1]
should get the largest size as well.
"""
def size_str_to_int(x):
return float("inf") if x == 'original' else int(x[1:])
max_size = max(sizes, key=size_str_to_int)
posters = _get_json(IMG_PATTERN.format(key=KEY,imdbid=imdbid))['posters']
poster_urls = []
rel_path = posters[0]['file_path']
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
poster_urls.append(url)
return poster_urls
def tmdb_posters(imdbid, count=None, outpath='.'):
urls = get_poster_urls(imdbid)
if count is not None:
urls = urls[:count]
_download_images(urls, outpath)
savemovieurl = urls
print savemovieurl
conn = sqlite3.connect('tutorial.db')
c = conn.cursor()
def create_table():
c.execute("CREATE TABLE IF NOT EXISTS movies(unix REAL, datestamp TEXT, keyword TEXT, value REAL, moviename TEXT, movieimage TEXT, movieurl TEXT)")
def data_entry():
c.execute("INSERT INTO movies VALUES(1452549219,'2016-01-11 13:53:39','Python',6,'movienamehere1', 'savemovieurl', 'movieurlhere1')")
conn.commit()
c.close()
conn.close()
def dynamic_data_entry(argument) :
unix = time.time()
date = str(datetime.datetime.fromtimestamp(unix).strftime('%Y-%m-%d %H: %M: %S'))
keyword = 'keyword_string'
movieurl = 'bing.com'
value = random.randrange(0,10)
savemovieurl2 = 'testimageurl.com'
print argument
c.execute("INSERT INTO movies (unix, datestamp, keyword, value, moviename, movieimage, movieurl) VALUES (?, ?, ?, ?, ?, ?, ?)", (unix, date, keyword, value, movie, savemovieurl2, movieurl))
conn.commit()
create_table()
#data_entry()
for i in range(10) :
dynamic_data_entry(savemovieurl)
time.sleep(1)
c.close()
conn.close()
if __name__=="__main__":
tmdb_posters(imdb_id_input)
I think this has already been answered here: How do I use a variable so that it is inside and outside of a function
I know I should comment this however for some reason I can't so I just thought I'd write it as an answer instead. I hope this helps.
The program that I created will be used to parse xml files and put it the parse datas in the database. Though my code is okay now and running, my instructor has a comment in my code, BTW this is my code:
import os
import time
import MySQLdb
import ConfigParser
import elementtree.ElementTree as ET
def update_database(article_code, date_received, s100rsd, remark_text, db):
cur = db.cursor()
try:
cur_query = cur.execute("""INSERT INTO tblS100CurrentListing """
"""(article_Code, dateReceived, s100RSD, remarks) VALUES (%s, %s, %s, %s) """
"""ON DUPLICATE KEY UPDATE revisedRSD = %s, remarks = %s """,
(article_code, date_received, s100rsd, remark_text, s100rsd, remark_text))
db.commit()
except MySQLdb.Error, e:
print "An error has been passed %s" %e
db.rollback
rows_affected = cur.rowcount
if rows_affected > 0:
print "Changes made in the database"
else:
print "Nothing is change in the database"
def parse_xml(source_path, xml_file):
# Alvin: !!! globals?
global article_code
global date_received
global s100rsd
global remark_text
article_code = xml_file.split('.')[0]
tree = ET.parse(xml_file)
root = tree.getroot()
order = root.find('order')
order_time = order.find('time')
year = order_time.attrib['yr']
month = order_time.attrib['month']
day = order_time.attrib['day']
hour = order_time.attrib['hr']
min = order_time.attrib['min']
sec = order_time.attrib['sec']
date_received = year + month + day + hour + min + sec
due_date = order.find('due-date')
due_date_time = due_date.find('time')
yr = due_date_time.attrib['yr']
month = due_date_time.attrib['month']
day = due_date_time.attrib['day']
s100rsd = "%s-%s-%s" %(yr, month, day)
item_info = order.find('item-info')
item_remarks = item_info.find('item-remarks')
item_remark_list = item_remarks.findall('item-remark')
item_remark_len = len(item_remark_list) - 1
item_remark = item_remark_list[item_remark_len]
remark = item_remark.find('remark')
remark_text = remark.text
def main():
config = ConfigParser.ConfigParser()
config.readfp(open('part4b.ini'))
server = config.get('main', 'Server')
port = config.get('main', 'Port')
port = int(port)
schema = config.get('main', 'Schema')
table = config.get('main', 'Table')
user = config.get('main', 'User')
password = config.get('main', 'Password')
source_path = config.get('main', 'filepath')
db = MySQLdb.connect(server, user, password, schema, port)
xml_list = os.listdir(source_path)
for xml_file in xml_list:
if xml_file.endswith('.xml'):
parse_xml(source_path, xml_file)
update_database(article_code, date_received, s100rsd, remark_text, db)
db.close()
print "This will close after 2 seconds . ."
time.sleep(2)
if __name__ == '__main__':
main()
In parse_xml function he don't want me to use global variables in it. How can I use those variables in my main without declaring it as globals?
Thanks for all your help.
return them from the function
eg
return article_code, date_received, s100rsd, remark_text
This is really returning a single tuple containing 4 items
you can extract them at the other end like this
article_code, date_received, s100rsd, remark_text = parse_xml(...)
What you'd normally do is return a "data object", i.e. an object containing the relevant data:
class dto(object):
def __init__(self, **kw):
self.__dict__.update(kw)
def parse_xml(source_path, xml_file):
data = dto(article_code = '1234',
date_received = 'abc',
s100rsd = '%s-%s-%s' % ('ab', 'cd', 'efgh'),
remark_text = 'eh5jhe5')
return data
data = parse_xml('../', 'abc.xml')
Then just use data.data_received as you'd expect.
Also note that your lines
os.listdir(source_path)
...
if xml_file.endswith('.xml'):
is real nice to replace with
import glob
xml_list = glob.glob(os.path.join(source_path, '*.xml'))