Python, MySQL regression, SQL bug or faulty conditionals? - python

I have a bucket folder that contains csv files of the form yy-mm-dd.CSV with several rows of header I can ignore apart from the date at the end of the second row, and then 151 rows of timestamp:power(kW). Here's a snippet:
sep=;
Version CSV|Tool SunnyBeam11|Linebreaks CR/LF|Delimiter semicolon|Decimalpoint point|Precision 3|Language en-UK|TZO=0|DST|2012.06.21
;SN: removed
;SB removed
;2120138796
Time;Power
HH:mm;kW
00:10;0.000
00:20;0.000
00:30;0.000
00:40;0.000
00:50;0.000
01:00;0.000
01:10;0.000
01:20;0.000
01:30;0.000
01:40;0.000
01:50;0.000
02:00;0.000
02:10;0.000
02:20;0.000
02:30;0.000
02:40;0.000
02:50;0.000
03:00;0.000
03:10;0.000
03:20;0.000
03:30;0.000
03:40;0.000
03:50;0.000
04:00;0.000
04:10;0.000
04:20;0.000
04:30;0.000
04:40;0.000
04:50;0.006
05:00;0.024
05:10;0.006
05:20;0.000
05:30;0.030
05:40;0.036
05:50;0.042
06:00;0.042
06:10;0.042
06:20;0.048
06:30;0.060
06:40;0.114
06:50;0.132
07:00;0.150
I parse the bucket folder for these files checking that they have this format filename, as there are other files I don't want to parse, and I grab the date from row two of each file and store it. I connect to the database and then work down the remaining lines, concatenating the stored date with the timestamp on each line after row 9 (or thereabouts). I also grab the second value on each line (power, in kW). The intention is to insert the concatenated date-time value and associated power value into the connected mysql database. When the last line is read, the file is moved to a subfolder, called 'parsed'. All of this proceeds as expected but every row read goes through the except branch of the try/except loop (Line 107) that prints 'cannot append to Db'. I've checked the stored database credentails work by logging in to MySQL (actually MariaDB on OpenSuse LEAP 4.2) and that works and I've printed the connection variable, both of which lead me to believe that I am actually connected properly for each file. I would snip out parts of my Python script to make it shorter but I'm not a particuarly advanced Python coder and I don't want to risk missing the key part:
#!/usr/bin/python
from os import listdir
from datetime import datetime
import MySQLdb
import shutil
import syslog
#from sys import argv
def is_dated_csv(filename):
"""
Return True if filename matches format YY-MM-DD.csv, otherwise False.
"""
date_format = '%y-%m-%d.csv'
try:
date = datetime.strptime(filename, date_format)
return True
except ValueError:
# filename did not match pattern
syslog.syslog('SunnyData file ' + filename + ' did NOT match')
#print filename + ' did NOT match'
pass
#'return' terminates a function
return False
def parse_for_date(filename):
"""
Read file for the date - from line 2 field 10
"""
currentFile = open(filename,'r')
l1 = currentFile.readline() #ignore first line read
date_line = currentFile.readline() #read second line
dateLineArray = date_line.split("|")
day_in_question = dateLineArray[-1]#save the last element (date)
currentFile.close()
return day_in_question
def normalise_date_to_UTF(day_in_question):
"""
Rather wierdly, some days use YYYY.MM.DD format & others use DD/MM/YYYY
This function normalises either to UTC with a blank time (midnight)
"""
if '.' in day_in_question: #it's YYYY.MM.DD
dateArray = day_in_question.split(".")
dt = (dateArray[0] +dateArray[1] + dateArray[2].rstrip() + '000000')
elif '/' in day_in_question: #it's DD/MM/YYYY
dateArray = day_in_question.split("/")
dt = (dateArray[2].rstrip() + dateArray[1] + dateArray[0] + '000000')
theDate = datetime.strptime(dt,'%Y%m%d%H%M%S')
return theDate #A datetime object
def parse_power_values(filename, theDate):
currentFile = open(filename,'r')
for i, line in enumerate(currentFile):
if i <= 7:
doingSomething = True
print 'header' + str(i) + '/ ' + line.rstrip()
elif ((i > 7) and (i <= 151)):
lineParts = line.split(';')
theTime = lineParts[0].split(':')
theHour = theTime[0]
theMin = theTime[1]
timestamp = theDate.replace(hour=int(theHour),minute=int(theMin))
power = lineParts[1].rstrip()
if power == '-.---':
power = 0.000
if (float(power) > 0):
print str(i) + '/ ' + str(timestamp) + ' power = ' + power + 'kWh'
append_to_database(timestamp,power)
else:
print str(i) + '/ '
elif i > 151:
print str(timestamp) + ' DONE!'
print '----------------------'
break
currentFile.close()
def append_to_database(timestampval,powerval):
host="localhost", # host
user="removed", # username
#passwd="******"
passwd="removed"
database_name = 'SunnyData'
table_name = 'DTP'
timestamp_column = 'DT'
power_column = 'PWR'
#sqlInsert = ("INSERT INTO %s (%s,%s) VALUES('%s','%s')" % (table_name, timestamp_column, power_column, timestampval.strftime('%Y-%m-%d %H:%M:%S'), powerval) )
#sqlCheck = ("SELECT TOP 1 %s.%s FROM %s WHERE %s.%s = %s;" % (table_name, timestamp_column, table_name, table_name, timestamp_column, timestampval.strftime('%Y-%m-%d %H:%M:%S')) )
sqlInsert = ("INSERT INTO %s (%s,%s) VALUES('%s','%s')", (table_name, timestamp_column, power_column, timestampval.strftime('%Y-%m-%d %H:%M:%S'), powerval) )
sqlCheck = ("SELECT TOP 1 %s.%s FROM %s WHERE %s.%s = %s;", (table_name, timestamp_column, table_name, table_name, timestamp_column, timestampval.strftime('%Y-%m-%d %H:%M:%S')) )
cur = SD.cursor()
try:
#cur.execute(sqlCheck)
# Aim here is to see if the datetime for the file has an existing entry in the database_name
#If it does, do nothing, otherwise add the values to the datbase
cur.execute(sqlCheck)
if cur.fetchone() == "None":
cur.execute(sqlInsert)
print ""
SD.commit()
except:
print 'DB append failed!'
syslog.syslog('SunnyData DB append failed')
SD.rollback()
# Main start of program
path = '/home/greg/currentGenerated/SBEAM/'
destination = path + '/parsed'
syslog.syslog('parsing SunnyData CSVs started')
for filename in listdir(path):
print filename
if is_dated_csv(filename):
#connect and disconnect once per CSV file - wasteful to reconnect for every line in def append_to_database(...)
SD = MySQLdb.connect(host="localhost", user="root",passwd="removed", db = 'SunnyData')
print SD
print filename + ' matched'
day_in_question = parse_for_date(filename)
print 'the date is ' + day_in_question
theDate = normalise_date_to_UTF(day_in_question)
parse_power_values(filename, theDate)
SD.close()
shutil.move(path + '/' + filename, destination)
syslog.syslog('SunnyData file' + path + '/' + filename + 'parsed & moved to ' + destination)
It used to work but it's been a long time and many updates since I last checked it. I worry that a regression may have changed something under my code. Just not sure how to work it all out.
Apologies that this isn't a very crisp and specific question but if you can help me sort it, it may still serve as a good example for others?
Thanks
Greg

There is no SELECT TOP ... syntax in MySQL/MariaDB, so your script must be failing upon trying to execute sqlCheck.
It should be SELECT %s.%s FROM %s WHERE %s.%s = %s LIMIT 1 instead.

Related

Python / Oracle Insert Statement

being fairly new at Python, looking for some help on why this INSERT statement is not working:
<pre>
v_history_seq = curs1.callfunc('GET_NEXT_SEQUENCE', int, ['upload_history_seq'])
v_upload_date = now.strftime("%Y-%m-%d %H:%M:%S")
v_insert = ("""\
INSERT INTO upload_history (sequence, upload_date, editor, broker, filename, orig_filename, file_type,
account_type_id, upload_type, quarter, quarter_year, user_notes_id)
VALUES (:sequence, TO_DATE(:upload_date,'YYYY-MM-DD HH24:MI:SS'), :editor, :broker, :filename, :orig_filename, :file_type,
:account_type_id, :upload_type, :quarter, :quarter_year, :user_notes_id)""")
try:
curs1.execute(v_insert,
sequence = v_history_seq,
upload_date = v_upload_date,
editor = p_editor,
broker = p_broker,
filename = file_list[x][0],
orig_filename = file_list[x][1],
file_type = p_upload_type,
account_type_id = p_account_type,
upload_type = p_upload_type,
quarter = p_quarter,
quarter_year = p_year,
user_notes_seq = p_user_notes_seq)
except cx.Oracle.Error as err:
error, = err.args
logging.info(" Error inserting into Upload History: " + error.code)
logging.info(" Error inserting into Upload History: " + error.message)
</pre>
Any help would be great, and thanks in advance.

Confused by a python type errror

I've been using python for a little while and have made some improvements but this a new error to me. I'm trying to learn social media analysis for my career and that's why I am trying out this set of code here.
I've de bugged one error but this one, which appears at line 81, has got me stumped as I can't see why the function "def get_user_objects(follower_ids):" returns none and what i'd need to change it in accordance with previous advice on other questions here.
Here's script to that point for simplicity. All help appreciated.
The error, to repeat is TypeError: object of type 'NoneType' has no len()
from tweepy import OAuthHandler
from tweepy import API
from collections import Counter
from datetime import datetime, date, time, timedelta
import sys
import json
import os
import io
import re
import time
# Helper functions to load and save intermediate steps
def save_json(variable, filename):
with io.open(filename, "w", encoding="utf-8") as f:
f.write(str(json.dumps(variable, indent=4, ensure_ascii=False)))
def load_json(filename):
ret = None
if os.path.exists(filename):
try:
with io.open(filename, "r", encoding="utf-8") as f:
ret = json.load(f)
except:
pass
return ret
def try_load_or_process(filename, processor_fn, function_arg):
load_fn = None
save_fn = None
if filename.endswith("json"):
load_fn = load_json
save_fn = save_json
else:
load_fn = load_bin
save_fn = save_bin
if os.path.exists(filename):
print("Loading " + filename)
return load_fn(filename)
else:
ret = processor_fn(function_arg)
print("Saving " + filename)
save_fn(ret, filename)
return ret
# Some helper functions to convert between different time formats and
perform date calculations
def twitter_time_to_object(time_string):
twitter_format = "%a %b %d %H:%M:%S %Y"
match_expression = "^(.+)\s(\+[0-9][0-9][0-9][0-9])\s([0-9][0-9][0-9]
[09])$"
match = re.search(match_expression, time_string)
if match is not None:
first_bit = match.group(1)
second_bit = match.group(2)
last_bit = match.group(3)
new_string = first_bit + " " + last_bit
date_object = datetime.strptime(new_string, twitter_format)
return date_object
def time_object_to_unix(time_object):
return int(time_object.strftime("%s"))
def twitter_time_to_unix(time_string):
return time_object_to_unix(twitter_time_to_object(time_string))
def seconds_since_twitter_time(time_string):
input_time_unix = int(twitter_time_to_unix(time_string))
current_time_unix = int(get_utc_unix_time())
return current_time_unix - input_time_unix
def get_utc_unix_time():
dts = datetime.utcnow()
return time.mktime(dts.timetuple())
# Get a list of follower ids for the target account
def get_follower_ids(target):
return auth_api.followers_ids(target)
# Twitter API allows us to batch query 100 accounts at a time
# So we'll create batches of 100 follower ids and gather Twitter User
objects for each batch
def get_user_objects(follower_ids):
batch_len = 100
num_batches = len(follower_ids)/100
batches = (follower_ids[i:i+batch_len] for i in range(0,
len(follower_ids), batch_len))
all_data = []
for batch_count, batch in enumerate(batches):
sys.stdout.write("\r")
sys.stdout.flush()
sys.stdout.write("Fetching batch: " + str(batch_count) + "/" +
str(num_batches))
sys.stdout.flush()
users_list = auth_api.lookup_users(user_ids=batch)
users_json = (map(lambda t: t._json, users_list))
all_data += users_json
return all_data
# Creates one week length ranges and finds items that fit into those range
boundaries
def make_ranges(user_data, num_ranges=20):
range_max = 604800 * num_ranges
range_step = range_max/num_ranges
# We create ranges and labels first and then iterate these when going
through the whole list
# of user data, to speed things up
ranges = {}
labels = {}
for x in range(num_ranges):
start_range = x * range_step
end_range = x * range_step + range_step
label = "%02d" % x + " - " + "%02d" % (x+1) + " weeks"
labels[label] = []
ranges[label] = {}
ranges[label]["start"] = start_range
ranges[label]["end"] = end_range
for user in user_data:
if "created_at" in user:
account_age = seconds_since_twitter_time(user["created_at"])
for label, timestamps in ranges.iteritems():
if account_age > timestamps["start"] and account_age <
timestamps["end"]:
entry = {}
id_str = user["id_str"]
entry[id_str] = {}
fields = ["screen_name", "name", "created_at",
"friends_count", "followers_count", "favourites_count", "statuses_count"]
for f in fields:
if f in user:
entry[id_str][f] = user[f]
labels[label].append(entry)
return labels
if __name__ == "__main__":
account_list = []
if (len(sys.argv) > 1):
account_list = sys.argv[1:]
if len(account_list) < 1:
print("No parameters supplied. Exiting.")
sys.exit(0)
consumer_key="XXXXXXX"
consumer_secret="XXXXXX"
access_token="XXXXXXX"
access_token_secret="XXXXXXXX"
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
auth_api = API(auth)
for target in account_list:
print("Processing target: " + target)
# Get a list of Twitter ids for followers of target account and save it
filename = target + "_follower_ids.json"
follower_ids = try_load_or_process(filename, get_follower_ids,
target)
# Fetch Twitter User objects from each Twitter id found and save the data
filename = target + "_followers.json"
user_objects = try_load_or_process(filename, get_user_objects,
follower_ids)
total_objects = len(user_objects)
# Record a few details about each account that falls between specified age
ranges
ranges = make_ranges(user_objects)
filename = target + "_ranges.json"
save_json(ranges, filename)
# Print a few summaries
print
print("\t\tFollower age ranges")
print("\t\t===================")
total = 0
following_counter = Counter()
for label, entries in sorted(ranges.iteritems()):
print("\t\t" + str(len(entries)) + " accounts were created
within " + label)
total += len(entries)
for entry in entries:
for id_str, values in entry.iteritems():
if "friends_count" in values:
following_counter[values["friends_count"]] += 1
print("\t\tTotal: " + str(total) + "/" + str(total_objects))
print
print("\t\tMost common friends counts")
print("\t\t==========================")
total = 0
for num, count in following_counter.most_common(20):
total += count
print("\t\t" + str(count) + " accounts are following " +
str(num) + " accounts")
print("\t\tTotal: " + str(total) + "/" + str(total_objects))
print
print
The immediate problem is in load_json: you assume its return value is a list or dict, or something that can be passed to len. However, it can return None in a number of circumstances:
The file to read from isn't found
There is some error reading from the file
There is a problem decoding the contents of the file
The file contains just the JSON value null.
At no point after you call load_json do you check its return value.
Worse, you catch and ignore any exception that might occur in load_json, causing it to silently return None with no indication that something went wrong.
The function would be better written like
def load_json(filename):
with io.open(filename, "r", encoding="utf-8") as f:
return json.load(f)
At least now, any errors will raise an uncaught exception, making it more obvious that there was a problem and providing a clue as to what the problem was. The golden rule of exception handling is to only catch the exceptions you can do something about, and if you can't do anything about a caught exception, re-raise it.
You could check for the resultant value and follow accordingly:
# Fetch Twitter User objects from each Twitter id found and save the data
filename = target + "_followers.json"
res_get_user_objects = get_user_objects()
if res_get_user_objects is not None:
user_objects = try_load_or_process(filename, get_user_objects,
follower_ids)
total_objects = len(user_objects)
else:
# handle it otherwise

Python MySQL TypeError: must be str, not tuple

I have the following code which connects to a MySQL DB and checks which records are marked as 'active' by a field that has '1' in.
The code then downloads some files using the contents of the vulntype field in the URL for the download.
I think the issue lies with the MySQL query or the for loop to do the downloads.
The code is:-
import requests
import os
import MySQLdb
from hurry.filesize import size, si
import logging
import logging.handlers
from logging.config import fileConfig
logging.handlers = logging.handlers
fileConfig('data/logging_config.ini')
logger = logging.getLogger("downloader")
active_vuln_type = None
def get_active_vuln_sets():
global active_vuln_type
try:
logging.info('Connecting to the database...')
active_vuln_type = con = MySQLdb.connect(*******)
logging.info('Database connected!')
except FileNotFoundError as fnf:
logging.error(fnf)
except MySQLdb.Error as e:
logging.error(e)
try:
logging.info('Getting active vulnerability sets...')
cur = con.cursor()
active = "1"
cur.execute("""SELECT vulntype FROM vuln_sets WHERE active = %s""", (active))
active_vuln_type = cur.fetchall()
except MySQLdb.Error as e:
logging.exception(e)
def download():
try:
logging.info('Downloading vulnerability set files...')
for x in active_vuln_type:
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
response = requests.get('https://vulners.com/api/'
'v3/archive/collection/?type=' + x)
with open(filepath + '/vuln_files/' + x + '.zip', 'wb') as f:
f.write(response.content)
filesize = size(os.path.getsize
(filepath + '/vuln_files/'
+ x + '.zip'), system=si)
files = x + ".zip - " + str(filesize)
logging.info('Downloaded ' + x + '.zip Successfully')
logging.info('File details: ' + files)
except Exception as e:
logging.exception(e)
The traceback for this is:-
Traceback (most recent call last):
File "/home/luke/projects/vuln_backend/vuln_backend/download.py", line 61, in download
'v3/archive/collection/?type=' + x)
TypeError: must be str, not tuple
active_vuln_type = cur.fetchall()
This line returns a list of rows from the database. Each row is a tuple. Of course you're only selecting one column from the table, but the interface is the same: each row is a tuple, one value per column.
for x in active_vuln_type:
Here x is a tuple like ("vulnerability of some kind",); note the trailing comma. You need to unpack it:
for db_row in active_vuln_type:
x = db_row[0] # The first and column in the row.
Besides that, please consider naming x descriptively, returning active_vuln_type from one procedure and passing it to the other as a parameter. This will make your code less brittle and easier to test.
def get_active_vuln_sets(db_host, db_user, db_password):
# Do your stuff, using the db credentials....
return active_vuln_type
def download(active_vuln_type):
# Same source as now.
Now you can download(get_active_vuln_sets('192.168.100.1', 'joe', 'secret'))
Or you can test / retry the same thing without touching the DB: download([("CVE-xyxxy",), ("CVE-asdfghjkl",)]).
One more thing you can do is to return a clean list of vuln names, not raw DB tuples:
def get_active_vuln_sets(...):
# .....
return [x[0] for x in cur.fetchall()]
Now the value returned will be a list of single, directly usable values, so your original download(...) code would work with it.
What you get it's a tuple, so you will need to get the first element with x[0]
def download():
try:
logging.info('Downloading vulnerability set files...')
for x in active_vuln_type:
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
response = requests.get('https://vulners.com/api/'
'v3/archive/collection/?type=' + x[0])
with open(filepath + '/vuln_files/' + x[0] + '.zip', 'wb') as f:
f.write(response.content)
filesize = size(os.path.getsize
(filepath + '/vuln_files/'
+ x[0] + '.zip'), system=si)
files = x[0] + ".zip - " + str(filesize)
logging.info('Downloaded ' + x[0] + '.zip Successfully')
logging.info('File details: ' + files)
except Exception as e:
logging.exception(e)
Reasons to avoid these types of errors: like you have to use try: and except function as I know.
Fetching the data using another source may not be the exect you want.
Is your MySQL DB getting typeerror? Do check which records are marked as 'active' by a field that has '1' in by filtering them.
str(function(argument))
And it such likes:
try:
print(x)
except TypeError:
print("Variable x is not defined")
except:
print("Something else went wrong")
And that's all you can pass out your variable by replacing.
I do not think it will work perfectly that you want? But according to wholeblog
have to understand the whole code.
If the statement of if and else works then they will pass out some results.
Thank you.

Save a file name as "date - backup"

I am currently exporting a table from by Bigquery to G.C.S as another form of a backup. This is the code I have so far that saves the file name as "firebase_connectioninfo.csv".
# Export table to GCS as a CSV
data = 'dataworks-356fa'
destination = 'gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv'
def export_data_to_gcs(data, Firebase_ConnectionInfo, destination):
bigquery_client = bigquery.Client(data)
dataset = bigquery_client.dataset('FirebaseArchive')
table = dataset.table('Firebase_ConnectionInfo')
job_name = str(uuid.uuid4())
job = bigquery_client.extract_table_to_storage(
job_name, table, 'gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv')
job.source_format = 'CSV'
job.begin()
wait_for_job(job)
def wait_for_job(job):
while True:
job.reload()
if job.state == 'DONE':
if job.error_result:
raise RuntimeError(job.errors)
return
time.sleep(1)
export_data_to_gcs(data, 'Firebase_ConnectionInfo', destination)
I want this file to be named as "thedate_firebase_connectioninfo_backup". How do I add this command in a Python script?
So this is your string:
gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv'
What I would suggest is putting it into its own variable:
filename = 'gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv'
Additionally, we should put in a spot for the date. We can handle formatting the string a couple different ways, but this is my preferred method:
filename = 'gs://firebase_results/firebase_backups1/{date}-Firebase_ConnectionInfo.csv'
We can then call format() on the filename with the date like this:
from datetime import datetime
date = datetime.now().strftime("%M-%D-%Y")
filename.format(date=date)
Another way we could format the string would be the old string formatting style with %. I hate this method, but some people like it. I think it may be faster.
date = datetime.now().strftime("%M-%D-%Y")
filename = 'gs://firebase_results/firebase_backups1/%s-Firebase_ConnectionInfo.csv' % date
Or, you could use the other guy's answer and just add the strings like
"This " + "is " + "a " + "string."
outputs: "This is a string."
Try something like this:
import datetime
datestr = datetime.date.today().strftime("%B-%d-%Y")
destination = 'gs://firebase_results/firebase_backups1/' + datestr + '_Firebase_ConnectionInfo.csv'

Handling Unicode string pulled from SOQL In Python

The purpose of the code is to use SOQL to query the SalesForce API, then to format the data and do some stuff before putting putting it into an oracle database. My code successfully handles the first and third part but the second part keeps breaking.
The code is using Python 2.7 with the standard C python compiler on Windows 7.
The SOQL is
SELECT ID, Name, Type, Description, StartDate, EndDate, Status
FROM CAMPAIGN
ORDER BY ID
This query pulls back a few hundred results in a JSON Dict.
I have to pull each record (Record contains ID, Name, Type, Description, StartDate, EndDate, and Status) one at a time and pass them to a function that generates the proper SQL to put the data in the proper Oracle Database. All of the results of the query come back as Unicode strings.
After I query the data and try to pass it to the function to generate the SQL to insert it into the Oracle database is where the trouble shows up.
Here is the section of code where the error occurs.
keys = ['attributes', 'Id', 'Name', 'Type', 'Description', 'StartDate', 'EndDate', 'Status']
for record in SrcData['records']: #Data cleaning in this loop.
processedRecs = []
if record['Description'] is not None:
record['Description'] = encodeStr(record['Description'])
record['Description'] = record['Description'][0:253]
for key in keys:
if key == 'attributes':
continue
elif key == 'StartDate' and record[key] is not None:
record[key] = datetime.datetime.strptime(record[key], "%Y-%m-%d")
elif key == 'EndDate' and record[key] is not None:
record[key] = datetime.datetime.strptime(record[key], "%Y-%m-%d")
else:
pass
processedRecs.append(record[key])
sqlFile.seek(0)
Query = RetrieveSQL(sqlFile, processedRecs)
The key list is because there was issues with looping on SrcData.keys().
the encodeStr function is:
def encodeStr(strToEncode):
if strToEncode == None:
return ""
else:
try:
tmpstr = strToEncode.encode('ascii', 'ignore')
tmpstr = ' '.join(tmpstr.split())
return tmpstr
except:
return str(strToEncode)
The error message I get is:
Traceback (most recent call last): File "XXX", line 106, in Query = ASPythonLib.RetrieveSQL(sqlFile, processedRecs), UnicodeEncodeError: ascii codec cant encode character u\u2026 in position 31: ordinal not in range(128)
the XXXX is just a file path to where this code is in our file system. Boss said I must remove the path.
I have also tried multiple variation of:
record['Description'] = record['Description'].encode('ascii', 'ignore').decode(encoding='ascii',errors='strict')
I have tried swapping the order of the encode and decode functions. I have tried different codecs and different error handling schemes.
****Edit**** This code works correct in like 20 other cycles so it's safe to assume the error is not in the RetrieveSQL().
Here is the code for RetrieveSQL:
def RetrieveSQL(SQLFile, VarList, Log = None):
SQLQuery = SQLFile.readline()
FileArgs = [""]
NumArgValues = len(VarList)
if( "{}" in SQLQuery ):
# NumFileArgs == 0
if (NumArgValues != 0):
print "Number of File Arguments is zero for File " + str(SQLFile) + " is NOT equal to the number of values provided per argument (" + str(NumArgValues) + ")."
return SQLFile.read()
elif( SQLQuery[0] != "{" ):
print "File " + str(SQLFile) + " is not an SQL source file."
return -1
elif( SQLQuery.startswith("{") ):
FileArgs = SQLQuery.replace("{", "").replace("}", "").split(", ")
for Arg in xrange(0, len(FileArgs)):
FileArgs[Arg] = "&" + FileArgs[Arg].replace("\n", "").replace("\t", "") + "&" # Add &'s for replacing
NumFileArgs = len(FileArgs)
if (NumFileArgs != NumArgValues):
if (NumArgValues == 0):
print "No values were supplied to RetrieveSQL() for File " + str(SQLFile) + " when there were supposed to be " + str(NumFileArgs) + " values."
return -1
elif (NumArgValues > 0):
"Number of File Arguments (" + str(NumFileArgs) + ") for File " + str(SQLFile) + " is NOT equal to the number of values provided per argument (" + str(NumArgValues) + ")."
return -1
SQLQuery = SQLFile.read()
VarList = list(VarList)
for Arg in xrange(0, len(FileArgs)):
if (VarList[Arg] == None):
SQLQuery = SQLQuery.replace(FileArgs[Arg], "NULL")
elif ("'" in str(VarList[Arg])):
SQLQuery = SQLQuery.replace(FileArgs[Arg], "'" + VarList[Arg].replace("'", "''") + "'")
elif ("&" in str(VarList[Arg])):
SQLQuery = SQLQuery.replace(FileArgs[Arg], "'" + VarList[Arg].replace("&", "&'||'") + "'")
elif (isinstance(VarList[Arg], basestring) == True):
VarList[Arg] = VarList[Arg].replace("'", "''")
SQLQuery = SQLQuery.replace(FileArgs[Arg], "'" + VarList[Arg] + "'")
else:
SQLQuery = SQLQuery.replace(FileArgs[Arg], str(VarList[Arg]))
SQLFile.seek(0)
return SQLQuery
****Edit #2 ****
Tried finding a complete traceback in logging files but the logging system for this script is terrible and never logs more than 'Cycle success' or 'Cycle Fail'. Ahh the fun of rewriting code written by people who don't know how to code.

Categories

Resources