Declaring global variables in a function in python - python

The program that I created will be used to parse xml files and put it the parse datas in the database. Though my code is okay now and running, my instructor has a comment in my code, BTW this is my code:
import os
import time
import MySQLdb
import ConfigParser
import elementtree.ElementTree as ET
def update_database(article_code, date_received, s100rsd, remark_text, db):
cur = db.cursor()
try:
cur_query = cur.execute("""INSERT INTO tblS100CurrentListing """
"""(article_Code, dateReceived, s100RSD, remarks) VALUES (%s, %s, %s, %s) """
"""ON DUPLICATE KEY UPDATE revisedRSD = %s, remarks = %s """,
(article_code, date_received, s100rsd, remark_text, s100rsd, remark_text))
db.commit()
except MySQLdb.Error, e:
print "An error has been passed %s" %e
db.rollback
rows_affected = cur.rowcount
if rows_affected > 0:
print "Changes made in the database"
else:
print "Nothing is change in the database"
def parse_xml(source_path, xml_file):
# Alvin: !!! globals?
global article_code
global date_received
global s100rsd
global remark_text
article_code = xml_file.split('.')[0]
tree = ET.parse(xml_file)
root = tree.getroot()
order = root.find('order')
order_time = order.find('time')
year = order_time.attrib['yr']
month = order_time.attrib['month']
day = order_time.attrib['day']
hour = order_time.attrib['hr']
min = order_time.attrib['min']
sec = order_time.attrib['sec']
date_received = year + month + day + hour + min + sec
due_date = order.find('due-date')
due_date_time = due_date.find('time')
yr = due_date_time.attrib['yr']
month = due_date_time.attrib['month']
day = due_date_time.attrib['day']
s100rsd = "%s-%s-%s" %(yr, month, day)
item_info = order.find('item-info')
item_remarks = item_info.find('item-remarks')
item_remark_list = item_remarks.findall('item-remark')
item_remark_len = len(item_remark_list) - 1
item_remark = item_remark_list[item_remark_len]
remark = item_remark.find('remark')
remark_text = remark.text
def main():
config = ConfigParser.ConfigParser()
config.readfp(open('part4b.ini'))
server = config.get('main', 'Server')
port = config.get('main', 'Port')
port = int(port)
schema = config.get('main', 'Schema')
table = config.get('main', 'Table')
user = config.get('main', 'User')
password = config.get('main', 'Password')
source_path = config.get('main', 'filepath')
db = MySQLdb.connect(server, user, password, schema, port)
xml_list = os.listdir(source_path)
for xml_file in xml_list:
if xml_file.endswith('.xml'):
parse_xml(source_path, xml_file)
update_database(article_code, date_received, s100rsd, remark_text, db)
db.close()
print "This will close after 2 seconds . ."
time.sleep(2)
if __name__ == '__main__':
main()
In parse_xml function he don't want me to use global variables in it. How can I use those variables in my main without declaring it as globals?
Thanks for all your help.

return them from the function
eg
return article_code, date_received, s100rsd, remark_text
This is really returning a single tuple containing 4 items
you can extract them at the other end like this
article_code, date_received, s100rsd, remark_text = parse_xml(...)

What you'd normally do is return a "data object", i.e. an object containing the relevant data:
class dto(object):
def __init__(self, **kw):
self.__dict__.update(kw)
def parse_xml(source_path, xml_file):
data = dto(article_code = '1234',
date_received = 'abc',
s100rsd = '%s-%s-%s' % ('ab', 'cd', 'efgh'),
remark_text = 'eh5jhe5')
return data
data = parse_xml('../', 'abc.xml')
Then just use data.data_received as you'd expect.
Also note that your lines
os.listdir(source_path)
...
if xml_file.endswith('.xml'):
is real nice to replace with
import glob
xml_list = glob.glob(os.path.join(source_path, '*.xml'))

Related

Error while creating chatbot using python (Row data)

So I am creating a chat bot inspired off the tutorials from sentdex yet I ran into an error I can not figure out.
I am using the latest version python
Code for the chat bot:
<i>
import sqlite3
import json
from datetime import datetime
timeframe = '2007-02'
sql_transaction = []
connection = sqlite3.connect('{}.db' .format(timeframe))
c = connection.cursor()
def create_table():
c.execute("""CREATE TABLE IF NOT EXISTS parent_reply
(parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT,
comment TEXT, subreddit TEXT, unix INT, score INT)""")
def format_data(date):
data = data.replace("\n"," newlinechar ").replace("\r"," newlinechar
").replace('"',"'")
return data
def find_parent(pid):
try:
sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}'
LIMIT 1".format(pid)
c.execture(sql)
result = c.fetchone()
if result != None:
return result [0]
else: return False
except Exception as e:
#print ("find_parent", e)
return False
if __name__ == "__main__":
create_table()
row_counter = 0
paired_rows = 0
with open("/home/anonymouz/Desktop/redditdata/{}/RC_{}".format(timeframe.split('-')[0], timeframe ), buffering=1000) as f:
for row in f:
print(row)
row_counter += 1
row = json.loads(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)<i>
And the error I am getting:
Traceback (most recent call last):
File "/home/anonymouz/Desktop/redditdata/reddit.py", line 44, in <module>
body = format_data(row['body'])
File "/home/anonymouz/Desktop/redditdata/reddit.py", line 17, in format_data
data = data.replace("\n"," newlinechar ").replace("\r"," newlinechar ").replace('"',"'")
UnboundLocalError: local variable 'data' referenced before assignment
>>>
Thank you for anyone who is able to help and isn't rude about it :)
More clean version of code with correct indents:
https://pastebin.com/2ifpEQy9
def format_data(date):
Your parameter is 'date' but your local is 'data'.
Change your parameter name to 'data'
def format_data(data):
data = data.replace("\n"," newlinechar ").replace("\r"," newlinechar
").replace('"',"'")
return data

Python code not creating tables on the database but able to query the results postgres

My usecase is to write create a temp table in the postgres database and fetch records from it and insert into a different table.
The code i used is:
import psycopg2
import sys
import pprint
from __future__ import print_function
from os.path import join,dirname,abspath
import xlrd
import os.path
newlist = []
itemidlist = []
def main():
conn_string = "host='prod-dump.cvv9i14mrv4k.us-east-1.rds.amazonaws.com' dbname='ebdb' user='ebroot' password='*********'"
# print the connection string we will use to connect
# print "Connecting to database" % (conn_string)
# get a connection, if a connect cannot be made an exception will be raised here
conn = psycopg2.connect(conn_string)
# conn.cursor will return a cursor object, you can use this cursor to perform queries
cursor = conn.cursor()
dealer_id = input("Please enter dealer_id: ")
group_id = input("Please enter group_id: ")
scriptpath = os.path.dirname('__file__')
filename = os.path.join(scriptpath, 'Winco - Gusti.xlsx')
xl_workbook = xlrd.open_workbook(filename, "rb")
xl_sheet = xl_workbook.sheet_by_index(0)
print('Sheet Name: %s' % xl_sheet.name)
row=xl_sheet.row(0)
from xlrd.sheet import ctype_text
print('(Column #) type:value')
for idx, cell_obj in enumerate(row):
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
#print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
num_cols = xl_sheet.ncols
for row_idx in range(0, xl_sheet.nrows): # Iterate through rows
num_cols = xl_sheet.ncols
id_obj = xl_sheet.cell(row_idx, 1) # Get cell object by row, col
itemid = id_obj.value
#if itemid not in itemidlist:
itemidlist.append(itemid)
# execute our Query
'''
cursor.execute("""
if not exists(SELECT 1 FROM model_enable AS c WHERE c.name = %s);
BEGIN;
INSERT INTO model_enable (name) VALUES (%s)
END;
""" %(itemid,itemid))
'''
cursor.execute("drop table temp_mbp1")
try:
cursor.execute("SELECT p.model_no, pc.id as PCid, g.id AS GROUPid into public.temp_mbp1 FROM products p, \
model_enable me, products_clients pc, groups g WHERE p.model_no = me.name \
and p.id = pc.product_id and pc.client_id = %s and pc.client_id = g.client_id and g.id = %s"\
% (dealer_id,group_id)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
cursor.execute("select count(*) from public.temp_mbp1")
# retrieve the records from the database
records = cursor.fetchall()
# print out the records using pretty print
# note that the NAMES of the columns are not shown, instead just indexes.
# for most people this isn't very useful so we'll show you how to return
# columns as a dictionary (hash) in the next example.
pprint.pprint(records)
if __name__ == "__main__":
main()
The try except block in between the program is not throwing any error but the table is not getting created in the postgres database as i see in the data admin.
The output shown is:
Please enter dealer_id: 90
Please enter group_id: 13
Sheet Name: Winco Full 8_15_17
(Column #) type:value
[(3263,)]
Thanks,
Santosh
You didn't commit the changes, so they aren't saved in the database. Add to the bottom, just below the pprint statement:
conn.commit()

Python3 + beatbox : Not able to querymore

I have logged into my SFDC org using instructions provided here http://tomhayden3.com/2013/08/04/salesforce-python/. However, I am not able to implement the queryMore part of it. It just does nothing. When I print(query_locator) it prints out an ID with a suffix -500. Can someone please look into this code and highlight what am I doing wrong?
#!/usr/bin/env python3
import beatbox
# Connecting to SFDC
sf = beatbox._tPartnerNS
service = beatbox.Client()
service.serverUrl = 'https://test.salesforce.com/services/Soap/u/38.0'
service.login('my-username', 'my-password')
query_result = service.query("SELECT id, Name, Department FROM User")
records = query_result['records'] # dictionary of results!
total_records = query_result['size'] # full size of results
query_locator = query_result['queryLocator'] # get the mystical queryLocator
# loop through, pulling the next 500 and appending it to your records dict
while query_result['done'] is False and len(records) < total_records:
query_result = self._service.queryMore(query_locator)
query_locator = query_result['queryLocator'] # get the updated queryLocator
records = records + query_result['records'] # append to records dictionary
print(records['id']) #This should print all IDs??? But it is not.
The examples here resolved the issue for me.
https://github.com/superfell/Beatbox/blob/master/examples/export.py
#!/usr/bin/env python3
import beatbox
import sqlalchemy
engine_str = 'mysql+mysqlconnector://db-username:db-pass#localhost/db-name'
engine = sqlalchemy.create_engine(engine_str, echo=False, encoding='utf-8')
connection = engine.connect()
sf = beatbox._tPartnerNS
service = beatbox.Client()
service.serverUrl = 'https://test.salesforce.com/services/Soap/u/38.0' #I hard quoted it since I was to test against sandbox only.
def export(objectSOQL):
service.login('sfdc-username', 'sfdc-pass')
query_result = service.query(objectSOQL)
while True:
for row in query_result[sf.records:]:
SQL_query = 'INSERT INTO user(' \
'id, ' \
'name, ' \
'department ' \
'VALUES(' \
'\"{}\",\"{}\",\"{}\")'\
.format(
row[2],
row[3],
row[4]
)
try:
connection.execute(SQL_query)
except Exception as e:
print(e)
# This is key part which actually pulls records beyond 500 until sf.done becomes true which means the query has been completed.
if str(query_result[sf.done]) == 'true':
break
query_result = service.queryMore(str(query_result[sf.queryLocator]))
SOQL = 'SELECT id, Name, Department FROM User'
export(SOQL)

Cannot Access Variable Outside of Function

I keep getting the following error when trying to access a variable from one function inside another function.
NameError: global name 'savemovieurl' is not defined
how can i access the "savemovieurl" from the function "tmdb_posters" inside "dynamic_data_entry" to save it to the database?
i've tried adding global to the variable name, and had no success.
import requests
import urllib
import sqlite3
import time
import datetime
import random
movie = raw_input('Enter your movie: ')
print('You searched for: ', movie)
def imdb_id_from_title(title):
""" return IMDb movie id for search string
Args::
title (str): the movie title search string
Returns:
str. IMDB id, e.g., 'tt0095016'
None. If no match was found
"""
pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
url = pattern.format(movie_title=urllib.quote(title))
r = requests.get(url)
res = r.json()
# sections in descending order or preference
for section in ['popular','exact','substring']:
key = 'title_' + section
if key in res:
return res[key][0]['id']
if __name__=="__main__":
title = movie
imdb_info_returned = ("{1}".format(title, imdb_id_from_title(title)))
print imdb_info_returned
import os
import requests
CONFIG_PATTERN = 'http://api.themoviedb.org/3/configuration?api_key={key}'
IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}'
KEY = '47db65094c31430c5a2b65112088d70e'
imdb_id_input = imdb_info_returned
print('You searched for: ', imdb_id_input)
def _get_json(url):
r = requests.get(url)
return r.json()
def _download_images(urls, path='.'):
"""download all images in list 'urls' to 'path' """
for nr, url in enumerate(urls):
r = requests.get(url)
filetype = r.headers['content-type'].split('/')[-1]
filename = 'poster_{0}.{1}'.format(nr+1,filetype)
filepath = os.path.join(path, filename)
with open(filepath,'wb') as w:
w.write(r.content)
def get_poster_urls(imdbid):
""" return image urls of posters for IMDB id
returns all poster images from 'themoviedb.org'. Uses the
maximum available size.
Args:
imdbid (str): IMDB id of the movie
Returns:
list: list of urls to the images
"""
config = _get_json(CONFIG_PATTERN.format(key=KEY))
base_url = config['images']['base_url']
sizes = config['images']['poster_sizes']
"""
'sizes' should be sorted in ascending order, so
max_size = sizes[-1]
should get the largest size as well.
"""
def size_str_to_int(x):
return float("inf") if x == 'original' else int(x[1:])
max_size = max(sizes, key=size_str_to_int)
posters = _get_json(IMG_PATTERN.format(key=KEY,imdbid=imdbid))['posters']
poster_urls = []
rel_path = posters[0]['file_path']
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
poster_urls.append(url)
return poster_urls
def tmdb_posters(imdbid, count=None, outpath='.'):
urls = get_poster_urls(imdbid)
if count is not None:
urls = urls[:count]
_download_images(urls, outpath)
savemovieurl = urls
print savemovieurl
conn = sqlite3.connect('tutorial.db')
c = conn.cursor()
def create_table():
c.execute("CREATE TABLE IF NOT EXISTS movies(unix REAL, datestamp TEXT, keyword TEXT, value REAL, moviename TEXT, movieimage TEXT, movieurl TEXT)")
def data_entry():
c.execute("INSERT INTO movies VALUES(1452549219,'2016-01-11 13:53:39','Python',6,'movienamehere1', 'savemovieurl', 'movieurlhere1')")
conn.commit()
c.close()
conn.close()
def dynamic_data_entry(argument) :
unix = time.time()
date = str(datetime.datetime.fromtimestamp(unix).strftime('%Y-%m-%d %H: %M: %S'))
keyword = 'keyword_string'
movieurl = 'bing.com'
value = random.randrange(0,10)
savemovieurl2 = 'testimageurl.com'
print argument
c.execute("INSERT INTO movies (unix, datestamp, keyword, value, moviename, movieimage, movieurl) VALUES (?, ?, ?, ?, ?, ?, ?)", (unix, date, keyword, value, movie, savemovieurl2, movieurl))
conn.commit()
create_table()
#data_entry()
for i in range(10) :
dynamic_data_entry(savemovieurl)
time.sleep(1)
c.close()
conn.close()
if __name__=="__main__":
tmdb_posters(imdb_id_input)
I think this has already been answered here: How do I use a variable so that it is inside and outside of a function
I know I should comment this however for some reason I can't so I just thought I'd write it as an answer instead. I hope this helps.

searching for occurrences in access database

I am trying to search in access database for some occurrence, but I found that my code miss somes when it made a search.
I found that he miss the second occurence when it found the first one.
Example: if I have the following and I am looking for T300 and I have this structure:
T200
T300
T300
it will catch first T300 and pass the second T300
enter code here
import csv
import pyodbc
from xml.dom import minidom
# *************************************
def DBAccess (Term):
MDB = 'c:/test/mydb.mdb'
DRV = '{Microsoft Access Driver (*.mdb)}'
PWD = ''
conn = pyodbc.connect('DRIVER=%s;DBQ=%s;PWD=%s' % (DRV,MDB,PWD))
curs = conn.cursor()
curs.execute("select * from gdo_segment")
rows = curs.fetchall()
for row in rows:
T = 'T' + str(row.troncon) + '_' + row.noeud1 + '-' + row.noeud2
if (T == Term ):
print T
curs.close()
conn.close()
#*************************************
def findTerminal():
xmldoc = minidom.parse('c:\\test\mydoc.xml')
#printing the number of blocs in my xml file
itemlist = xmldoc.getElementsByTagName('ACLineSegment')
for item in itemlist:
found = False
for child in item.childNodes:
if child.nodeName == 'Terminal':
found = True
if not found:
Term = item.getAttribute('Name')
DBAccess (Term)
#***********************************
findTerminal()
I assume it is finding the last item, and this would be because of your code indenting. Correct indenting is essential in Python. the docs
Currently, your if statement only applies after all the looping has completed, so will only check the last value of T.
def DBAccess (Term):
MDB = 'c:/test/gdomt.mdb'
DRV = '{Microsoft Access Driver (*.mdb)}'
PWD = ''
conn = pyodbc.connect('DRIVER=%s;DBQ=%s;PWD=%s' % (DRV,MDB,PWD))
curs = conn.cursor()
curs.execute("select * from gdo_segment")
rows = curs.fetchall()
for row in rows:
T = 'T' + str(row.troncon) + '_' + row.noeud1 + '-' + row.noeud2
if (T == Term ):
print T
curs.close()
conn.close()

Categories

Resources