Python 3.6 Lambda code error for os.environ variable - python

try :
sf = Salesforce(username = sfdc_username,
password = sfdc_password,
security_token = sfdc_security_token,
instance_url = sfdc_salesforce_instance_url,
domain = sfdc_sandbox)
print('salesforce login good')
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'])
sys.exit(1)
this portion of code on lambda is failing with the error:
a bytes-like object is required, not 'str': TypeError
Traceback (most recent call last):
File "/var/task/sfdc_etl/bin/sfdc_etl.py", line 80, in lambda_handler
domain = sfdc_sandbox)
File "/var/task/sfdc_etl/lib/python3.6/site-packages/simple_salesforce/api.py", line 146, in __init__
domain=self.domain)
File "/var/task/sfdc_etl/lib/python3.6/site-packages/simple_salesforce/login.py", line 80, in SalesforceLogin
username = escape(username)
File "/var/lang/lib/python3.6/html/__init__.py", line 19, in escape
s = s.replace("&", "&") # Must be done first!
TypeError: a bytes-like object is required, not 'str'
When I move this code to my test environment on an EC2 amazon linux and set the sfdc_sandox to 'test' in line, it works with no issues. I tried using os.environb["L_SFDC_SANDBOX"] and os.environ["L_SFDC_SANDBOX"].encode('utf8'), but that also did not help as it gave the same error. How do I fix the type error when I pull in this variable in Lambda?
Here is the entire script, maybe the error isn't because of that specific piece of code even though it seems like it is.
import os
import sys
# this adds the parent directory of bin so we can find the module
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
sys.path.append(parent_dir)
#This addes venv lib/python2.7/site-packages/ to the search path
mod_path = os.path.abspath(parent_dir+"/lib/python"+str(sys.version_info[0])+"."+str(sys.version_info[1])+"/site-packages/")
sys.path.append(mod_path)
from awsmgr.awsmgr import S3Helper
from base64 import b64decode
import boto3
from collections import OrderedDict
import datetime
from dateutil.parser import parse
import logging
import json
import math
import pandas as pd
from simple_salesforce import Salesforce, SalesforceLogin
from simple_salesforce.exceptions import SalesforceGeneralError, SalesforceMoreThanOneRecord, SalesforceMalformedRequest, SalesforceExpiredSession, SalesforceRefusedRequest, SalesforceResourceNotFound
from sqlalchemy import create_engine
from sqlalchemy import exc
current_path = os.path.dirname(os.path.realpath(__file__))
# Use this one for the parent directory
ENV_ROOT = os.path.abspath(os.path.join(current_path, os.path.pardir))
# Use this one for the current directory
#ENV_ROOT = os.path.abspath(os.path.join(current_path))
sys.path.append(ENV_ROOT)
def lambda_handler(event, context):
###############################
# Global Variable Definitions #
###############################
d_parse = parse
TMP_PATH = '/tmp'
igersUser = 'admin'
igersPwd = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["RS_PASSWORD"]))['Plaintext']
igersHost = os.environ["RS_HOST"]
igers = create_engine('postgres://{}:{}#{}/ibdrs'.format(igersUser, igersPwd, igersHost), encoding="utf-8")
igersSchema = os.environ["RS_SCHEMA"]
s3 = S3Helper(debug=os.environ["DEBUG"])
nextObjFile = s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/next_object.txt',os.path.abspath(os.path.join(TMP_PATH,'next_object.txt')))
s3Destination = 's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/json/'
s3Path = '{}_json'
s3NextObjDestination = 's3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/{}'
fileCount = 1
sfdc_username = os.environ["L_USERNAME"].encode('utf8')
sfdc_salesforce_instance_url = os.environ["L_SALESFORCE_INSTANCE_URL"].encode('utf8')
sfdc_password = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_PASSWORD"]))['Plaintext']
sfdc_security_token = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_SECURITY_TOKEN"]))['Plaintext']
sfdc_sandbox = os.environ["L_SFDC_SANDBOX"].encode('utf8')
print(type(sfdc_username), type(sfdc_password), type(sfdc_security_token), type(sfdc_salesforce_instance_url), type(sfdc_sandbox))
try :
sf = Salesforce(username = sfdc_username,
password = sfdc_password,
security_token = sfdc_security_token,
instance_url = sfdc_salesforce_instance_url,
domain = sfdc_sandbox)
print('salesforce login good')
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'])
sys.exit(1)
# get nextobj from s3
with open(nextObjFile, 'r') as f :
nextObjItem = f.read().strip().lower()
nextObj = nextObjItem.lower()
print('Processing {}'.format(nextObj))
######################################################
# get rs table group permissions, store in dataframe #
######################################################
def rsGetGroupPerms(igers, nextObj) :
global groupPerms
groupPerms = {}
existingGroupPerms = '''
SELECT
namespace, item, type, groname
FROM
(
SELECT
use.usename AS subject,
nsp.nspname AS NAMESPACE,
cls.relname AS item,
cls.relkind AS TYPE,
use2.usename AS OWNER,
cls.relacl
FROM
pg_user use
CROSS JOIN pg_class cls
LEFT JOIN pg_namespace nsp ON cls.relnamespace = nsp.oid
LEFT JOIN pg_user use2 ON cls.relowner = use2.usesysid
WHERE
cls.relowner = use.usesysid
AND nsp.nspname NOT IN ( 'pg_catalog', 'pg_toast', 'information_schema' )
AND nsp.nspname IN ( 'salesforce' )
AND relacl IS NOT NULL
ORDER BY
subject,
NAMESPACE,
item
)
JOIN pg_group pu ON array_to_string( relacl, '|' ) LIKE'%%' || pu.groname || '%%'
WHERE item = '{}'
'''.format(nextObj)
groupPerms = pd.read_sql(existingGroupPerms, igers)
print('got the group permissions')
return groupPerms
#####################################################
# get rs table user permissions, store in dataframe #
# NOT CURRENTLY IN USE #
#####################################################
#def rsGetUseerPerms(igers, nextObj) :
# existingUserPerms = '''
# SELECT *
# FROM
# (
# SELECT
# schemaname
# ,objectname
# ,usename
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'select') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS sel
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'insert') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS ins
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'update') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS upd
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'delete') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS del
# ,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'references') AND has_schema_privilege(usrs.usename, schemaname, 'usage') AS ref
# FROM
# (
# SELECT schemaname, 't' AS obj_type, tablename AS objectname, schemaname + '.' + tablename AS fullobj FROM pg_tables
# UNION
# SELECT schemaname, 'v' AS obj_type, viewname AS objectname, schemaname + '.' + viewname AS fullobj FROM pg_views
# ) AS objs
# ,(SELECT * FROM pg_user) AS usrs
# ORDER BY fullobj
# )
# WHERE (sel = true or ins = true or upd = true or del = true or ref = true)
# and objectname = '{}'
# '''.format(nextObj)
#
# userPerms = pd.read_sql_query(existingUserPerms, igers)
# return userPerms
####################################################
# Connect to Salesforce, Query JSON, and Copy to S3#
####################################################
def sfToS3(fileCount, sf, nextObj) :
# Initiate list for returned data
pulls = []
# Pull initial Query
sfobject = sf.restful('sobjects/{}/describe/'.format(nextObj), params=None)
fields_list = [record['name'] for record in sfobject['fields']]
initialQuery = sf.query("SELECT {} FROM {}".format(','.join(fields_list),nextObj))
#Send a single file or the first file to S3
data = initialQuery['records']
try :
send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)
# Append initial query data to pulls
if 'nextRecordsUrl' in initialQuery :
pulls.append(initialQuery['nextRecordsUrl'])
nextChunk = initialQuery['nextRecordsUrl']
nextQuery = sf.query_more(nextChunk,True)
if 'nextRecordsUrl' in nextQuery :
pulls.append(nextQuery['nextRecordsUrl'])
x = True
fileCount = 2
while x == True:
try:
# set up while loop to re-query salesforce until returned
# query does not have a 'nextRecordsUrl' return value
# Query new 'nextRecordsUrl'
nextQuery = sf.query_more(nextQuery['nextRecordsUrl'],True)
# append new query to pulls
pulls.append(nextQuery['nextRecordsUrl'])
except: # This triggers when nextQuery['nextRecordsUrl'] does not exist
# set x to False to end loop
x = False
#if there was a follow on set of records, query it and add to S3
if len(pulls) >= 1 :
for i in range(len(pulls)) :
data = sf.query_more(str(pulls[i].split('/')[5]))['records']
send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path)
fileCount += 1
print('completed sending JSON files to S3')
except :
print('Salesforce Object Empty, ending execution')
updateNextObj(nextObj, s3NextObjDestination)
sys.exit(1)
####################
# JSONL to S3 #
####################
def send_temp_jsonl_to_s3(data, nextObj, s3, s3Destination, fileCount, s3Path) :
fileName = '{}_file{}.json'
localFilePath ='/tmp/'
for element in data :
item = data.pop()
item.pop('attributes', None)
tempdict = OrderedDict({})
for k,v in item.items() :
if 'date' in k.lower() or 'stamp' in k.lower() :
if not v is None :
d = d_parse(v)
v = d.strftime('%Y-%m-%d %I:%M:%S')
tempdict[k.lower()] = v
else :
tempdict[k.lower()] = v
with open(localFilePath+fileName.format(nextObj,fileCount), 'a') as outfile :
outfile.write(json.dumps(tempdict))
outfile.write('\n')
s3.put_s3_file_datedpath(localFilePath+fileName.format(nextObj,fileCount),s3Destination+s3Path.format(nextObj))
os.remove(localFilePath+fileName.format(nextObj,fileCount))
#################################################
# maps SFDC type to SQL type - used for ddl #
#################################################
def map_data_type(sfdc_type, length):
"""
Definition to map Salesforce datatype to Redshift datatype.
"""
__MULTIPLIER = 1.3 # may not be Zero!
if length == 0:
length = 1092
if length == 4095:
length = 15000
if length > 65535:
length = 65534
if sfdc_type == u'boolean':
return u'varchar(5)'
elif sfdc_type == u'date':
return u'timestamp'
elif sfdc_type == u'datetime':
return u'timestamp'
elif sfdc_type == u'currency':
return u'decimal(38,6)'
elif sfdc_type == u'double':
return u'decimal(38,6)'
elif sfdc_type == u'int':
return u'numeric(10)'
elif sfdc_type == u'picklist':
return u'varchar({})'.format(length)
elif sfdc_type == u'id':
return u'varchar({})'.format(length)
elif sfdc_type == u'reference':
return u'varchar({})'.format(length)
elif sfdc_type == u'textarea':
if length >= (65535/length*__MULTIPLIER):
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format( math.ceil(length*__MULTIPLIER))
elif sfdc_type == u'email':
return u'varchar({})'.format(length)
elif sfdc_type == u'phone':
return u'varchar({})'.format(length)
elif sfdc_type == u'url':
return u'varchar({})'.format(length)
elif sfdc_type == u'multipicklist':
return u'varchar({})'.format(length)
elif sfdc_type == u'anyType':
if length >= 65535:
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
elif sfdc_type == u'percent':
return u'numeric(38,6)'
elif sfdc_type == u'combobox':
return u'varchar({})'.format(length)
elif sfdc_type == u'base64':
return u'varchar({})'.format(length)
elif sfdc_type == u'time':
return u'varchar(255)'
elif sfdc_type == u'string':
if length >= 65535:
return u'varchar({})'.format(65534)
else:
return u'varchar({})'.format(math.ceil(length*__MULTIPLIER))
else:
return u'varchar(65535)'
####################################
# Turn SFDC metadata into SQL #
####################################
def get_ddl(sf, nextObj, igersSchema, col_remove=None):
md = sf.restful("sobjects/{}/describe/".format(nextObj), params=None)
target_table=nextObj
total_field_count = 0
global ddl_str
ddl_str = ''
ddl_str += 'CREATE TABLE '+ igersSchema+"."+target_table +' ('
for x in md["fields"]:
#print x["name"]
if col_remove:
if x["name"].lower() in [element.lower() for element in col_remove]:
print("Skipping: {}".format(x["name"]))
continue
ddl_str += x["name"] + ' ' + map_data_type(x["type"],x["length"])
if x["name"] == 'Id':
ddl_str += ' NOT NULL DISTKEY'
ddl_str += ","
total_field_count = total_field_count + 1
ddl_str = ddl_str[:-1]
ddl_str += ')'
logging.info('DDL Successfully created...')
# print("Total Field Count: "+str(total_field_count))
return ddl_str
#########################
# Create Table from DDL, execute the copy query and update permissions #
#########################
def rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers) :
today = datetime.date.today()
dated_path = today.strftime('%Y/%m/%d')
perms_statement = ''
drop_table = '''
DROP TABLE IF EXISTS {}.{} CASCADE
'''.format(igersSchema, nextObj)
loadQuery = '''
COPY {}.{}
FROM '{}{}/{}/'
iam_role 'arn:aws:iam::087024238921:role/LambdaFullAccessRole'
TRUNCATECOLUMNS
FORMAT AS JSON 'auto'
'''.format(igersSchema, nextObj, s3Destination, s3Path.format(nextObj), dated_path)
grantPerms = '''
GRANT SELECT ON {}.{} TO GROUP {}
'''
with igers.connect() as conn:
try :
conn.execute(drop_table)
print('completed drop table')
conn.execute(ddl_str)
print('completed create table')
conn.execute(loadQuery)
print('completed load query')
for row in range(len(groupPerms)) :
perms_statement = grantPerms.format(groupPerms['namespace'].iloc[row],groupPerms['item'].iloc[row],groupPerms['groname'].iloc[row])
conn.execute(perms_statement)
print('completed grant group permissions')
conn.close()
except exc.SQLAlchemyError as e :
print(e)
######################################
# Update Next Object and Write to S3 #
######################################
def updateNextObj(nextObj, s3NextObjDestination) :
objectsList = []
objectsFile = s3.get_s3_file('s3://test-sfdc-sds-team/sfdc-etl-jp-test/sfdc_etl/objects/sfdc_etl_objects.txt',os.path.abspath(os.path.join(TMP_PATH,'sfdc_etl_objects.txt')))
localNobjTempFile = os.path.abspath(os.path.join(TMP_PATH,'next_object.txt'))
nextObjText = ''
with open (objectsFile, 'r') as objs :
for line in objs :
objectsList.append(line.strip("\n"))
for i in range(len(objectsList)-1) :
if objectsList[i].lower() == nextObj :
nextObjText = objectsList[i+1]
print(nextObjText)
with open (localNobjTempFile, 'w') as f :
f.write(nextObjText)
s3.put_s3_file(localNobjTempFile,s3NextObjDestination.format('next_object.txt'))
print('completed Updating the next object')
################################################
# Test if the object exists and execute #
################################################
try :
getattr(sf,nextObj).describe()
except (SalesforceGeneralError,
SalesforceMoreThanOneRecord,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceResourceNotFound) as e :
print(e.content[0]['message'] +', writing next object and ending')
updateNextObj(nextObj, s3NextObjDestination)
sys.exit(1)
rsGetGroupPerms(igers, nextObj)
sfToS3(fileCount, sf, nextObj)
get_ddl(sf, nextObj, igersSchema, col_remove=None)
rs_operations(ddl_str, groupPerms, igersSchema, nextObj, s3Destination, s3Path, igers)
updateNextObj(nextObj, s3NextObjDestination)

The problem is that the following line
sfdc_password = boto3.client('kms').decrypt(CiphertextBlob=b64decode(os.environ["L_PASSWORD"]))['Plaintext']
is returning a bytes object, and in the other hand, the Salesforce class expects a string.
Solution:
try :
sf = Salesforce(username=sfdc_username,
password=sfdc_password.decode("utf-8"),
security_token=sfdc_security_token,
instance_url=sfdc_salesforce_instance_url,
domain=sfdc_sandbox)
print('salesforce login good')
Note that the sfdc_password variable is being converted to string using the .decode bytes method

Related

Unable to run Python script in PowerBI

I need to connect with API which requires a SHA from a token and session id.
I call for token and session id, however to make a call with that I need a Python script to run. I wanted to use the table from the authorization call as a parameter in the Python script:
let
api_key = "_api_key",
Source = Json.Document(Web.Contents("https://xxxxxxxxxxx/api/public/json/_Api/auth_call/_api_method/getToken?_api_auth=key", [ApiKeyName = "api_key"])),
#"Converted to Table" = Record.ToTable(Source),
Value = #"Converted to Table"{0}[Value],
#"Converted to Table1" = Record.ToTable(Value),
#"Transposed Table" = Table.Transpose(#"Converted to Table1"),
#"Promoted Headers" = Table.PromoteHeaders(#"Transposed Table", [PromoteAllScalars=true]),
#"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"token", type text}, {"session_id", type text}}),
#"Run Python script" = Python.Execute("# 'dataset' holds the input data for this script#(lf)#(lf)import requests#(lf)import pandas as pd#(lf)from hashlib import sha1#(lf)#(lf)def LSGetData(sufix, limit=all, object_type='contact', additionallData ={}):#(lf) host = ""https://xxxxxxxxxxxxx/api/public/json/""#(lf) authHost = ""_Api/auth_call/_api_method/getToken""#(lf) api_key = 'xxxxxxxxxxxxx'#(lf) api_secret = 'xxxxxxxxxxxxx'#(lf)#(lf) data = {'_api_auth': ""key"", '_api_key': api_key}#(lf)#(lf) token = dataset['token']#(lf) session_id = dataset['session_id']#(lf)#(lf) api_sha = sha1((api_key + token + api_secret).encode('utf-8')).hexdigest()#(lf) data = {'_api_auth': ""key"", '_api_key': api_key, '_api_sha': api_sha, '_api_session': session_id, 'limit': limit}#(lf)#(lf) if sufix == 'kontakty':#(lf) data['type'] = 'contact'#(lf) elif sufix == 'firmy':#(lf) data['type'] = 'company'#(lf) elif sufix == 'przestrzenie':#(lf) data['type'] = 'space'#(lf) elif sufix == 'tablica':#(lf) data['object type'] = object_type # 'contact', 'company'. 'deal', 'space'#(lf)#(lf) for key, data in additionallData.keys():#(lf) data[key] = data#(lf)#(lf) lsq = requests.post(host + typeOfData[sufix], data=data)#(lf) return lsq.json()#(lf)#(lf)#(lf)def CreateDataFrame(lsdata):#(lf) if sufix == 'kontakty':#(lf) df = pd.DataFrame(lsdata['data']['contact'])#(lf) elif sufix == 'firmy':#(lf) df = pd.DataFrame(lsdata['data']['company'])#(lf) elif sufix == 'szanse':#(lf) df = pd.DataFrame(lsdata['data']['deal'])#(lf) elif sufix == 'tablica':#(lf) df = pd.DataFrame(lsdata['data']['items'])#(lf) elif sufix == 'zadania':#(lf) df = pd.DataFrame(lsdata['data']['todo'])#(lf) elif sufix == 'przestrzenie':#(lf) df = pd.DataFrame(lsdata['data']['space'])#(lf) return df#(lf)#(lf)#(lf)def DataFiler(dataFrame):#(lf) #filter dataFrame here#(lf) dataFrame = dataFrame[dataFrame.owner_name.notnull()]#(lf) return dataFrame#(lf)#(lf)typeOfData = {#(lf) 'kontakty': 'Contact/getAll',#(lf) 'firmy': 'Contact/getAll',#(lf) 'szanse': 'Deal/getAll',#(lf) 'przestrzenie': 'Space/getAll',#(lf) 'zadania': 'Todo/getTodoObjects',#(lf) 'tablica': 'Wall/getList'#(lf)}#(lf)#(lf)sufix = 'firmy'#(lf)#(lf)lsdata = LSGetData(sufix)#(lf)lsDataFrame = CreateDataFrame(lsdata)#(lf)fileredlsLSDataFrame = DataFiler(lsDataFrame)#(lf)#(lf)print(lsDataFrame)",[dataset=#"Changed Type"])
in
#"Run Python script"
Unfortunately this doesn't work.
How do I use "dataset" in this script to make it work?

Selenium Webscraper Unbound local error in set [ ]

I inherited a bit of Python code and I have no background. I am getting unbound local error and is probably something really silly on my part.
UnboundLocalError Traceback (most recent call last)
Input In [1], in <cell line: 372>()
368 print('\n----------------------------------------------------------------------\n')
369 ##############################################################################
--> 372 main()
Input In [1], in main()
319 Gender = p.getGender()
320 StateRes = p.getStateRes()
--> 321 children = immunte(Fname, Lname, DOB, Gender, driver)
323 if children == []:
324 not_found += 1
Input In [1], in immunte(Fname, Lname, DOB, Gender, driver)
204 except WebDriverException:
205 al = []
--> 207 return al
UnboundLocalError: local variable 'al' referenced before assignment
I have looked at this for a few days now and I can't seem to find an answer to this problem even though it is likely simple. It seems a solution to this error is a global keyword somewhere but I am not sure if this applies here as every time I tried to apply global to al = [] i got an error or same result. Any help is appreciated, thank you.
# Imports
import csv
import datetime
import os
import os.path
import time
import pandas as pd
from dateutil.parser import parse
from pandas import DataFrame
from selenium import webdriver
from selenium.common.exceptions import (NoSuchElementException,
WebDriverException)
from selenium.webdriver.support.select import Select
from selenium.webdriver.chrome.service import Service
##############################################################################
# Classes
class Person(object):
def __init__(self, measYr, MEMID, MIDSK, fname, lname, LNMSFX, DRB, GDR, STRES, meas):
self.measYr = measYr
self.MEMID = MEMID
self.MIDSK = MIDSK
self.fname = fname
self.lname = lname
self.LNMSFX = LNMSFX
self.DRB = DRB
self.GDR = GDR
self.STRES = STRES
self.meas = meas
def GTMESYR(self):
return self.measYr
def GTMEMSKY(self):
return self.MIDSK
def GTMEMID(self):
return self.MEMID
def GTFSNM(self):
return self.fname
def GTLSNM(self):
return self.lname
def GTLSTNMSF(self):
return self.LNMSFX
def GTDRB(self):
return self.DRB
def GTGDR(self):
return self.GDR
def getStateRes(self):
return self.STRES
def getMeas(self):
return self.meas
###############################################################################
# Function
def is_date(string, fuzzy=False):
try:
parse(string, fuzzy=fuzzy)
return True
except ValueError:
return False
def immunte(Fname, Lname, DRB, GDR, driver):
# work on search button
driver.find_element_by_xpath("//*[#id='edittesttest']").click()
# work on
lastname = driver.find_element_by_id("LM")
lastname.clear()
lastname.send_keys(Lname)
# work on
firstname = driver.find_element_by_id("FN")
firstname.clear()
firstname.send_keys(Fname)
# work on
birthdate = driver.find_element_by_id("DRB")
birthdate.clear()
birthdate.send_keys(DRB)
# work on advanced search button to input GDR
try:
driver.find_element_by_xpath(
"//*[#id='queryResultsForm']/table/tbody/tr/td[2]/table/tbody/tr[3]/td/table/tbody/tr[2]/td[5]/input").click()
# work on GDR selection button
obj = Select(driver.find_element_by_name("OSC"))
if GDR == 'W':
obj.select_by_index(2)
elif GDR == 'S':
obj.select_by_index(1)
else:
obj.select_by_index(3)
# work on search button
driver.find_element_by_name("cmdFindClient").click()
# two scenarios could emerge as a search result: 1, not found 2, the found
if "No were found for the requested search criteria" in driver.find_element_by_id("queryResultsForm").text:
al = []
elif "the found" in driver.find_element_by_id("queryResultsForm").text:
# work on button
driver.find_element_by_xpath(
"//*[#id='queryResultsForm']/table[2]/tbody/tr[2]/td[2]/span/label").click()
# work on pt button
driver.find_element_by_id("redirect1").click()
# work on getting rid of opt out - header
header = driver.find_elements_by_class_name("large")[1].text
if "Access Restricted" in header:
print(Fname+' '+Lname+' '+" Opt out")
al = []
elif "Information" in header:
# find the first line
first = driver.find_element_by_xpath(
"//*[#id='container']/table[3]/tbody/tr/td[2]/table[2]/tbody/tr/td/table/tbody/tr[1]/td/table/tbody/tr[5]/td[1]").text
if (first == None):
al = []
else:
even = driver.find_elements_by_class_name("evenRow")
odd = driver.find_elements_by_class_name("oddRow")
o = []
e = []
for value in odd:
o.append(value.text)
for value in even:
e.append(value.text)
length = len(o)
i = 0
al = []
# merge odd and even row together and remove the row marked with complete
while i < length:
al.append(e[i])
al.append(o[i])
i = i+1
# parse each row of information with a comma, add group name for row that are without one
for x in range(len(al)):
if is_date(al[x][1:10]):
al[x] = al[x].replace(' ', ',')
al[x] = al[x].replace(',of,', ' of ')
al[x] = group + ',' + al[x][2:]
else:
al[x] = al[x].replace(' ', ',')
al[x] = al[x].replace(',of,', ' of ')
g = al[x].split(',', 1)
group = g[0]
# work on returning to home page
driver.find_element_by_xpath(
"//*[#id='headerMenu']/table/tbody/tr/td[2]/div/a").click()
except NoSuchElementException:
al = []
except WebDriverException:
al = []
return al
def main():
# Welcome message and input info
print('\nThis is the test.')
print('You will be prompted to type .')
print('If you need to exit the script and stop its process press \'CTRL\' + \'C\'.')
file = input("\nEnter file name: ")
user = input("\nEnter username: ")
pw = input("\nEnter password: ")
date = str(datetime.date.today())
# output file
fileOutputName = 'FILELIST' + \
date.replace('-', '_') + '.csv'
fileOutputNameNotFound = 'NOTFOUNDFILELIST' + \
date.replace('-', '_') + '.csv'
fileOutput = open(fileOutputName, 'w')
fileOutputNotFound = open(fileOutputNameNotFound, 'w')
fileOutput.write('MEAS_YR,MEMLFIDSK,MEMLFID,MEMB_FRST_NM,MEMLSTNM,' +
'DRB,GNDR,RSDNC_STATE,IMUN_RGSTRY_STATE,VCCN_GRP,VCCN_ADMN_DT,DOSE_SERIES,' +
'BRND_NM,DOSE_SIZE,RCTN\n')
fileOutputNotFound.write('MEAS_YR,MEMLFIDSK,MEMLFID,MEMB_FRST_NM,MEMLSTNM,MEMB_SUFFIX,' +
'DRB,GNDR,RSDNC_STATE,IMUN_RGSTRY_STATE,VCCN_GRP,VCCN_ADMN_DT,DOSE_SERIES,' +
'BRND_NM,DOSE_SIZE,RCTN\n')
# If the file exists
try:
os.path.isfile(file)
except:
print('File Not Found\n')
df = pd.read_excel(file)
# create array of People objects and member ID
peopleArray = []
memberIdArray = []
df.dropna()
total = len(df)
not_found = 0
found = 0
# assign each record in the data frame into Person class
for i in range(total):
measYr = str(df.loc[i, "MEAS_YR"])
MEMID = str(df.loc[i, "MEMLFID"])
MIDSK = str(df.loc[i, "MEMLFIDSK"])
fname = str(df.loc[i, "MEMLFID"])
lname = str(df.loc[i, "MEMLSTNM"])
inputDate = str(df.loc[i, "DRB"])
# If date is null then assign an impossible date
if not inputDate:
DRB = '01/01/1900'
if '-' in inputDate:
DRB = datetime.datetime.strptime(
inputDate, "%Y-%m-%d %H:%M:%S").strftime('%m/%d/%Y')
else:
DRB = datetime.datetime.strptime(
str(df.loc[i, "DRB"]), '%m/%d/%Y').strftime('%m/%d/%Y')
GDR = str(df.loc[i, "GDR"])
STRES = str(df.loc[i, "STATE_RES"])
meas = str(df.loc[i, "MEAS"])
p = Person(measYr, MEMID, MIDSK, fname, lname,
LNMSFX, DRB, GDR, STRES, meas)
# append array
m = df.loc[i, "MEMLFID"]
if (m not in memberIdArray):
peopleArray.append(p)
memberIdArray.append(m)
# work on setting up driver for md immunet - mac forward slash/windows double backward slash
PATH = os.getcwd()+'\\'+'chromedriver'
s = Service(PATH)
driver = webdriver.Chrome(service = s)
driver.get("https://www.wow2.pe.org/prd-IR/portalmanager.do")
# work on login ID
username = driver.find_element_by_id("userField")
username.clear()
username.send_keys(user)
# work on password
password = driver.find_element_by_name("password")
password.clear()
password.send_keys(pw)
# work on getting to home page - where loop will start
driver.find_element_by_xpath(
"//*[#id='loginButtonForm']/div/div/table/tbody/tr[3]/td[1]/input").click()
for n in range(total):
p = peopleArray[n]
recordToWrite = ''
print('Looking up: ' + str(n)+' ' +
p.GTLSNM() + ', ' + p.GTFSNM())
MeasYr = p.GTMESYR()
MIDSK = p.GTMEMSKY()
MEMID = p.GTMEMID()
Fname = p.GTFSNM()
Lname = p.GTLSNM()
DRB = str(p.GTDRB())
GDR = p.GTGDR()
STRES = p.getStateRes()
children = immunte(Fname, Lname, DRB, GDR, driver)
if children == []:
not_found += 1
recordToWrite = MeasYr+','+MIDSK+','+MEMID+',' + Fname + \
','+Lname + ',' + ' ' + ','+DRB+','+GDR+','+STRES+','+'MD'
fileOutputNotFound.write(recordToWrite + '\n')
elif children != []:
found += 1
for x in range(len(children)):
data_element = children[x].split(",")
# if the admin date is not valid
if is_date(data_element[1]) and is_date(data_element[3]):
children[x] = ''
elif is_date(data_element[1]) and data_element[2] == 'NOT' and data_element[3] == 'VALID':
children[x] = ''
elif is_date(data_element[1]) and is_date(data_element[3]) == False:
if data_element[5] != 'No':
data_element[4] = data_element[5]
data_element[5] = ''
children[x] = ','.join(data_element[0:6])
else:
data_element[5] = ''
children[x] = ','.join(data_element[0:6])
else:
children[x] = ''
for x in range(len(children)):
if children[x] != '':
recordToWrite = MeasYr+','+MIDSK+','+MEMID+',' + \
Fname+','+Lname + ','+DRB+','+GDR+','+STRES+','+'MD'
recordToWrite = recordToWrite+','+children[x]
fileOutput.write(recordToWrite + '\n')
n = +1
fileOutput.close()
fileOutputNotFound.close()
print('\n--------------------------------OUTPUT--------------------------------')
print("Script completed.")
##############################################################################
main()
You can try this in the 'immunte' function:
def immunte(Fname, Lname, DRB, GDR, driver):
al = []
# work on search button
driver.find_element_by_xpath("//*[#id='edittesttest']").click()
# work on
lastname = driver.find_element_by_id("LM")
lastname.clear()
lastname.send_keys(Lname)
# work on
firstname = driver.find_element_by_id("FN")
firstname.clear()
firstname.send_keys(Fname)
# work on
birthdate = driver.find_element_by_id("DRB")
birthdate.clear()
birthdate.send_keys(DRB)
# work on advanced search button to input GDR
try:
driver.find_element_by_xpath(
"//*[#id='queryResultsForm']/table/tbody/tr/td[2]/table/tbody/tr[3]/td/table/tbody/tr[2]/td[5]/input").click()
# work on GDR selection button
obj = Select(driver.find_element_by_name("OSC"))
if GDR == 'W':
obj.select_by_index(2)
elif GDR == 'S':
obj.select_by_index(1)
else:
obj.select_by_index(3)
# work on search button
driver.find_element_by_name("cmdFindClient").click()
# two scenarios could emerge as a search result: 1, not found 2, the found
if "No were found for the requested search criteria" in driver.find_element_by_id("queryResultsForm").text:
return al
if "the found" in driver.find_element_by_id("queryResultsForm").text:
# work on button
driver.find_element_by_xpath(
"//*[#id='queryResultsForm']/table[2]/tbody/tr[2]/td[2]/span/label").click()
# work on pt button
driver.find_element_by_id("redirect1").click()
# work on getting rid of opt out - header
header = driver.find_elements_by_class_name("large")[1].text
if "Access Restricted" in header:
print(Fname+' '+Lname+' '+" Opt out")
return al
if "Information" in header:
# find the first line
first = driver.find_element_by_xpath(
"//*[#id='container']/table[3]/tbody/tr/td[2]/table[2]/tbody/tr/td/table/tbody/tr[1]/td/table/tbody/tr[5]/td[1]"
).text
if (first == None):
return al
even = driver.find_elements_by_class_name("evenRow")
odd = driver.find_elements_by_class_name("oddRow")
o = []
e = []
for value in odd:
o.append(value.text)
for value in even:
e.append(value.text)
length = len(o)
i = 0
al = []
# merge odd and even row together and remove the row marked with complete
while i < length:
al.append(e[i])
al.append(o[i])
i = i+1
# parse each row of information with a comma, add group name for row that are without one
for x in range(len(al)):
if is_date(al[x][1:10]):
al[x] = al[x].replace(' ', ',')
al[x] = al[x].replace(',of,', ' of ')
al[x] = group + ',' + al[x][2:]
else:
al[x] = al[x].replace(' ', ',')
al[x] = al[x].replace(',of,', ' of ')
g = al[x].split(',', 1)
group = g[0]
# work on returning to home page
driver.find_element_by_xpath(
"//*[#id='headerMenu']/table/tbody/tr/td[2]/div/a").click()
except NoSuchElementException:
pass
except WebDriverException:
pass
return al

Duplicate inserts to Database Table

My code works perfectly. What it does is it scans a text file and retrieves user defined information and stores it into access. The only issue I'm having is that is when the code is ran again, it inserts data that has already been inserted.
Is there an easy way to fix this? One thing that the data all have in common is that they all have a timestamp. Is it possible to use this to prevent a duplicate?
cur.execute("SELECT * FROM Main_Setup order by ID");
rows = cur.fetchall()
# outFileName = "out4.txt"
# The regex pattern that is used to extract timestamp from file
# it will search for timestamps like this 2017-06-13-22.31.30.978293
dateRegEx_1 = r"[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}\.[0-9]{2}\.[0-9]{2}\.[0-9]+"
dateRegEx_2 = r"[0-9]{4}-[0-9]{2}-[0-9]{2} \/ [0-9]+:[0-9]+:[0-9]+"
# Compile the pattern
regdExPtrn_1 = re.compile(dateRegEx_1)
regdExPtrn_2 = re.compile(dateRegEx_2)
field_names = ''
# Call insertToAccess function to insert into access database
cur.execute('SELECT * FROM lastran order by ID')
tlran = cur.fetchall()
def insertLastran(JobName,timeStamp):
print(JobName,timeStamp)
def insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names):
# try:
params = (JobName, timeStamp, TableSeq, accessTableValues[0], accessTableValues[1], accessTableValues[2],
accessTableValues[3], accessTableValues[4], \
accessTableValues[5], accessTableValues[6], accessTableValues[7], accessTableValues[8],
accessTableValues[9], field_names)
cur.execute("INSERT INTO Report_Table (Job_Name,Run_TS,Seq_Num,Field1,Field2,Field3,Field4,Field5,Field6,Field7,Field8,Field9,Field10,Field11) VALUES \
(?,?,?,?,?,?,?,?,?,?,?,?,?,?)", params);
conn.commit()
# except:
# conn.rollback()
# Extract the current job fields
def field_Extract(fileLines, fieldsArray, JobName, timeStamp, delimit):
# Empty string in which we will append the
# extracted fields
matchStr = ""
count = 0
TableSeq = 0
accessTableValues = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
field_names = ''
for line in fileLines:
for field in fieldsArray:
if field in line:
key, value = line.split(delimit)
matchStr += key.strip() + "\t\t : " + value.strip() + "\n"
accessTableValues[count] = value.strip()
field_names += key.strip() + ';'
count += 1
if count == 10:
TableSeq += 1
insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names)
count = 0
accessTableValues = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
field_names = ''
if count > 0 and count < 10:
TableSeq += 1
insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names)
# Return the string hold the extracted fields
# Each field is onn a separate line
return matchStr
# Open input and output files
test_file = open(r'C:\Users\cqt7wny\Desktop\new\SAVERS_REPT_DT0712.txt', 'r+')
# outFile = open(outFileName, 'w')
# Initialize used variables
currentJobData = []
startAppending = False
currentJobFound = False # Gaurav note
fields_To_Extract = []
outFileStr = ""
for line in test_file:
if startAppending == False:
# for jobStart in job_start:
if currentJobFound == False:
# Find the job name for the current report and exit the loop #====##########===== Gaurav note
for rowx in rows:
if rowx[1] in line:
currentJobName = rowx[1]
search_Start_Point = rowx[2]
search_End_Point = rowx[3]
fields_To_Extract = rowx[4].split(';')
currentJobFound = True
break
if currentJobName == 'xx':
currentJobName = previousJobName
search_Start_Point = previous_search_Start_Point
search_End_Point = previous_search_End_Point
fields_To_Extract = previous_fields_To_Extract
if search_Start_Point in line:
startAppending = True
if startAppending == True:
currentJobData.append(line)
if len(search_End_Point) > 1 and (search_End_Point in line):
# As a job end found, stop gathering lines
startAppending = False
# Get the time stamp
# We search for it in the currnet line using the previously
# compiled regex pattern
txt = "".join(currentJobData)
# Find all occurance of timestamps on the current job lines
timeStamp = regdExPtrn_1.findall(txt)
# Check that a timestamp found
if len(timeStamp) >= 1:
# If there is more than one timestamp in the current
# job lines, get only the first one
timeStamp = timeStamp[0]
else:
timeStamp = regdExPtrn_2.findall(txt)
if len(timeStamp) >= 1:
timeStamp = timeStamp[0]
# Append the found output to the output string
outFileStr += '########============ NEW JOB STARTS HERE ===========#########'
outFileStr += "\n"
outFileStr += "job# " + str(currentJobName)
outFileStr += "\n"
outFileStr += "Timestamp: " + timeStamp
outFileStr += "\n"
outFileStr = field_Extract(currentJobData, fields_To_Extract, currentJobName, timeStamp, ':')
insertLastran(currentJobName,timeStamp)
print('Current job Name :', currentJobName, ' : ', timeStamp)
print(outFileStr)
previousJobName = currentJobName
previous_search_Start_Point = search_Start_Point
previous_search_End_Point = search_End_Point
previous_fields_To_Extract = fields_To_Extract
currentJobName = 'xx'
currentJobFound = False
currentJobData = []
fields_To_Extract = []
search_Start_Point = ' '
search_End_Point = ' '
test_file.close()
There can be different possible ways which can prevent duplicate inserts .
1.Check if already data is inserted/present in the Database/table.If it does not exists then Insert the data else ignore/do not insert the data .
2.You can Create a UNIQUE CONSTRAINT on which you think the duplicacy exist .
like ALTER TABLE MYTABLE ADD CONSTRAINT constraint1 UNIQUE(column1)

Python tnsnames.ora parser

I need a dict containing all database connections from tnsnames.ora file.
I need to go from this :
(DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=mydbserver.mydomain.com)(PORT=1521)))(CONNECT_DATA=(SID=CATAL)(SERVER=DEDICATED)(SERVICE_NAME=mydb.mydomain.com)))
to this :
{'DESCRIPTION': [{'ADDRESS_LIST': [{'ADDRESS': [{'PROTOCOL': 'TCP'},
{'HOST': 'mydbserver.mydomain.com'},
{'PORT': '1521'}
]
}]
},
{'CONNECT_DATA': [{'SID': 'CATAL'},
{'SERVER': 'DEDICATED'},
{'SERVICE_NAME': 'mydb.mydomain.com'}
]
}
]
}
So far, my code is :
def get_param(param_string):
print("get_param input:", param_string)
if param_string.count("(") != param_string.count(")"):
raise Exception("Number of '(' is not egal to number of ')' : " + str(param_string.count("(")) + " and " + str(param_string.count(")")))
else:
param_string = param_string[1:-1]
splitted = param_string.split("=")
keywork = splitted[0]
if len(splitted) == 2:
return {keywork: splitted[1]}
else:
splitted.remove(keywork)
values = "=".join(splitted)
return {keywork: get_value_list(values)}
def get_value_list(value_string):
print("get_value_list input:", value_string)
to_return = list()
if "=" not in value_string and "(" not in value_string and ")" not in value_string:
to_return.append(value_string)
elif value_string[0] != "(":
raise Exception("[ERROR] Format error '(' is not the first char: " + repr(value_string))
else:
parenth_count = 0
strlen = len(value_string)
current_value = ""
for i in range(0,strlen):
current_char = value_string[i]
current_value += current_char
if current_char == "(":
parenth_count += 1
elif current_char == ")":
parenth_count += -1
if parenth_count == 0:
to_return.append(get_param(current_value))
if i != (strlen - 1):
if value_string[i+1] == "(":
to_return += get_value_list(value_string[i+1:])
else:
raise Exception("Format error - Next char should be a '('. value_string[i+1]:" + repr(value_string[i+1]) )
break
print("get_value_list return:", to_return)
if len(to_return) == 0:
to_return = ""
elif len(to_return) == 1:
to_return = to_return[0]
return to_return
connection_infos = "(DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=mydbserver.mydomain.com)(PORT=1521)))(CONNECT_DATA=(SID=CATAL)(SERVER=DEDICATED)(SERVICE_NAME=mydb.mydomain.com)))"
current_connection = get_param(connection_infos)
print("current_connection:", current_connection)
pprint(current_connection)
And I got this :
{'DESCRIPTION': [{'ADDRESS_LIST': {'ADDRESS': [{'PROTOCOL': 'TCP'},
{'HOST': 'mydbserver.mydomain.com'},
'PORT']
}
},
'CONNECT_DATA'
]
}
So I'm doing something wrong. And I feel I'm doing something too complicated. Would anyone point some mistake I made or help me find a simpler way to do this ?
I have a working code now, but I'm not really satisfied with it. It's too long, not flexible, and will not work with some other tnsnames.ora possible formats :
class Tnsnames():
def __init__(self, file_path, file_name='tnsnames.ora'):
self.file_path = file_path
self.file_name = file_name
self.load_file()
def load_file(self):
try:
fhd = open(os.path.join(self.file_path, self.file_name), 'rt', encoding='utf-8')
except:
raise
else:
#Oracle doc : https://docs.oracle.com/cd/B28359_01/network.111/b28317/tnsnames.htm#NETRF007
file_content = list()
for l in fhd:
l = " ".join(l.split()).strip(" \n")
if len(l) > 0:
if "#" not in l:
file_content.append(l)
fhd.close()
file_content = " ".join(file_content)
connections_list = dict()
current_depth = 0
current_word = ""
current_keyword = ""
name_to_register = ""
is_in_add_list = False
current_addr = dict()
connections_aliases = dict()
stop_registering = False
connections_duplicates = list()
for c in file_content:
if c == " ":
pass
elif c == "=":
current_keyword = str(current_word)
current_word = ""
if current_keyword == "ADDRESS_LIST":
is_in_add_list = True
elif c == "(":
if current_depth == 0:
current_keyword = current_keyword.upper()
names_list = current_keyword.replace(" ","").split(",")
if len(names_list) == 1:
name_to_register = names_list[0]
else:
name_to_register = None
# We use either the first name with at least
# a dot in it, or the longest one.
for n in names_list:
if "." in n:
name_to_register = n
break
else:
name_to_register = max(names_list, key=len)
names_list.remove(name_to_register)
for n in names_list:
if n in connections_aliases.keys():
print("[ERROR] already registered alias:", n,
". Registered to:", connections_aliases[n],
". New:", name_to_register,
". This possible duplicate will not be registered.")
connections_duplicates.append(n)
stop_registering = True
else:
connections_aliases[n] = name_to_register
if not stop_registering:
connections_list[name_to_register] = {"ADDRESS_LIST": list(),
"CONNECT_DATA": dict(),
"LAST_TEST_TS": None}
current_depth += 1
elif current_depth in [1,2,3]:
current_depth += 1
else:
print("[ERROR] Incorrect depth:", repr(current_depth), ". Current connection will not be registered" )
del connections_list[name_to_register]
stop_registering = True
elif c == ")":
if current_depth == 1:
if stop_registering:
stop_registering = False
else:
# Before moving to next connection,
# we check that current connection
# have at least a HOST, and a SID or
# SERVICE_NAME
connection_is_valid = True
if isinstance(connections_list[name_to_register]["ADDRESS_LIST"], dict):
if "HOST" not in connections_list[name_to_register]["ADDRESS_LIST"].keys():
print("[ERROR] Only one address defined, and no HOST defined. Current connection will not be registered:", name_to_register)
connection_is_valid = False
elif isinstance(connections_list[name_to_register]["ADDRESS_LIST"], list):
for current_address in connections_list[name_to_register]["ADDRESS_LIST"]:
if "HOST" in current_address.keys():
break
else:
print("[ERROR] Multiple addresses but none with HOST. Current connection will not be registered:", name_to_register)
connection_is_valid = False
else:
print("[ERROR] Incorrect address format:", connections_list[name_to_register]["ADDRESS_LIST"], " Connection:", name_to_register)
connection_is_valid = False
if not connection_is_valid:
del connections_list[name_to_register]
else:
if "SERVICE_NAME" not in connections_list[name_to_register]["CONNECT_DATA"].keys() and \
"SID" not in connections_list[name_to_register]["CONNECT_DATA"].keys():
print("[ERROR] Missing SERVICE_NAME / SID for connection:", name_to_register)
del connections_list[name_to_register]
elif current_depth == 2:
if is_in_add_list:
is_in_add_list = False
if not stop_registering:
if len(connections_list[name_to_register]["ADDRESS_LIST"]) == 1:
connections_list[name_to_register]["ADDRESS_LIST"] = connections_list[name_to_register]["ADDRESS_LIST"][0]
elif current_depth == 3:
if is_in_add_list:
if not stop_registering:
connections_list[name_to_register]["ADDRESS_LIST"].append(current_addr)
current_addr = dict()
elif current_keyword.upper() in ["SID", "SERVER", "SERVICE_NAME"]:
if not stop_registering:
connections_list[name_to_register]["CONNECT_DATA"][current_keyword.upper()] = current_word.upper()
elif current_depth == 4:
if is_in_add_list:
if not stop_registering:
current_addr[current_keyword.upper()] = current_word.upper()
current_keyword = ""
current_word = ""
current_depth += -1
else:
current_word += c
self.connections = connections_list
self.aliases = connections_aliases
self.duplicates = connections_duplicates
Test tnsnames.ora :
########################################
# This is a sample tnsnames.ora #
########################################
###################################################
# PRODDB
###################################################
proddb.mydbs.domain.com, PRODDB =
(DESCRIPTION =
(ADDRESS_LIST =
(ADDRESS = (PROTOCOL = TCP)(HOST = proddb1.mydbs.domain.com)(PORT = 1522))
(ADDRESS = (PROTOCOL = TCP)(HOST = proddb2.mydbs.domain.com)(PORT = 1522))
(ADDRESS = (PROTOCOL = TCP)(HOST = proddb3.mydbs.domain.com)(PORT = 1522))
(ADDRESS = (PROTOCOL = TCP)(HOST = proddb4.mydbs.domain.com)(PORT = 1522))
)
(CONNECT_DATA =
(SID = PRODDB)
(SERVER = DEDICATED)
(SERVICE_NAME = proddb.mydbs.domain.com)
)
)
###################################################
# DEVDBA : Test database for DBA usage
###################################################
devdba.mydbs.domain.com, DEVDBA =
(DESCRIPTION =
(ADDRESS_LIST =
(ADDRESS = (PROTOCOL = TCP)(HOST = devdba.mydbs.domain.com)(PORT = 1521))
)
(CONNECT_DATA =
(SID = DEVDBA)
)
)
Test code :
from pprint import pprint
from lib_database import Tnsnames
tnsnnames = Tnsnames('/usr/lib/oracle/12.2/client64/network/admin')
print('Connexions:')
pprint(tnsnnames.connections)
print('Aliases:')
pprint(tnsnnames.aliases)
print('Duplicates:')
pprint(tnsnnames.duplicates)
Output :
Connexions:
{'DEVDBA.MYDBS.DOMAIN.COM': {'ADDRESS_LIST': {'HOST': 'DEVDBA.MYDBS.DOMAIN.COM',
'PORT': '1521',
'PROTOCOL': 'TCP'},
'CONNECT_DATA': {'SID': 'DEVDBA'},
'PRODDB.MYDBS.DOMAIN.COM': {'ADDRESS_LIST': [{'HOST': 'PRODDB1.MYDBS.DOMAIN.COM',
'PORT': '1522',
'PROTOCOL': 'TCP'},
{'HOST': 'PRODDB2.MYDBS.DOMAIN.COM',
'PORT': '1522',
'PROTOCOL': 'TCP'},
{'HOST': 'PRODDB3.MYDBS.DOMAIN.COM',
'PORT': '1522',
'PROTOCOL': 'TCP'},
{'HOST': 'PRODDB4.MYDBS.DOMAIN.COM',
'PORT': '1522',
'PROTOCOL': 'TCP'}],
'CONNECT_DATA': {'SERVER': 'DEDICATED',
'SERVICE_NAME': 'PRODDB.MYDBS.DOMAIN.COM',
'SID': 'PRODDB'}}
Aliases:
{'DEVDBA': 'DEVDBA.MYDBS.DOMAIN.COM', 'PRODDB': 'PRODDB.MYDBS.DOMAIN.COM'}
Duplicates:
[]
I could not find other Python parser for tnsnames.ora files. If you know about one, please point me to it.
You can do this with pyparsing:
import pyparsing as pp
# 1. Literals
VAR = pp.Word(pp.alphas + "_", pp.alphanums + "_").setName('variable')
SPACE = pp.Suppress(pp.Optional(pp.White()))
EQUALS = SPACE + pp.Suppress('=') + SPACE
OPEN = pp.Suppress('(') + SPACE
CLOSE = pp.Suppress(')') + SPACE
INTEGER = pp.Optional('-') + pp.Word(pp.nums) + ~pp.Char(".")
INTEGER.setParseAction(lambda t: int(t[0]))
FLOAT = pp.Optional('-') + pp.Word(pp.nums) + pp.Char('.') + pp.Optional(pp.Word(pp.nums))
FLOAT.setParseAction(lambda t: float(t[0]))
STRING = pp.Word(pp.alphanums + r'_.-')
# 2. Literal assignment expressions: (IDENTIFIER = VALUE)
INTEGER_ASSIGNMENT = pp.Group(OPEN + VAR + EQUALS + INTEGER + CLOSE)
FLOAT_ASSIGNMENT = pp.Group(OPEN + VAR + EQUALS + FLOAT + CLOSE)
STRING_ASSIGNMENT = pp.Group(OPEN + VAR + EQUALS + STRING + CLOSE)
# 3. Nested object assignment
ASSIGNMENT = pp.Forward()
NESTED_ASSIGNMENT = pp.Group(OPEN + VAR + EQUALS + ASSIGNMENT + CLOSE)
ASSIGNMENT << pp.OneOrMore(INTEGER_ASSIGNMENT |
FLOAT_ASSIGNMENT |
STRING_ASSIGNMENT |
NESTED_ASSIGNMENT)
# 4. Net service name(s): NAME(.DOMAIN)[, NAME(.DOMAIN)...]
NET_SERVICE_NAME = pp.OneOrMore(pp.Word(pp.alphas + '_' + '.', pp.alphanums + '_' + '.')
+ pp.Optional(pp.Suppress(',')))
# 5. Full TNS entry
TNS_ENTRY = NET_SERVICE_NAME + EQUALS + ASSIGNMENT
Here is some example data:
TNS_NAMES_ORA = """
MYDB =
(DESCRIPTION =
(ADDRESS_LIST =
(ADDRESS = (PROTOCOL = TCP)(HOST = server01)(PORT = 25881))
)
(CONNECT_DATA =
(SID = MYDB01)
)
)
OTHERDB.DOMAIN, ALIAS_FOR_OTHERDB.DOMAIN =
(DESCRIPTION_LIST =
(DESCRIPTION =
(ADDRESS_LIST = (ADDRESS = (PROTOCOL = TCP)
(HOST = server02)
(PORT = 25881)
))
(CONNECT_DATA = (SID = MYDB02))
)
)
"""
This part is a little one-off-ish, but here's an example of putting some additional parsing on top of that in order to extract all the data:
def _parse_addresses(tns_entry, addresses):
"""
Parse ADDRESS keywords from the a TNS entry
:param definition: Unparsed part of the TNS entry
:param addresses: List of addresses parsed
"""
keyword = tns_entry[0]
# Base Case: We found an ADDRESS, so extract the data
# and do not recurse into it
if keyword.upper() == 'ADDRESS':
port = None
host = None
for k, v in tns_entry[1:]:
if k == 'PORT':
port = v
elif k == 'HOST':
host = v
if port is None:
print('WARNING: Ignoring ADDRESS due to missing PORT')
elif host is None:
print('WARNING: Ignoring ADDRESS due to missing HOST')
addresses.append({'host': host, 'port': port})
# Else recursively descend through the definition
for d in tns_entry[1:]:
# Only parse sub-lists, not literals
if isinstance(d, list):
_parse_addresses(d, addresses)
def _parse_connect_data(tns_entry, sids):
"""
Parse CONNECT_DATA keywords from the a TNS entry
:param definition: Unparsed part of the TNS entry
:param sids: List of Oracle SIDs
"""
keyword = tns_entry[0]
# Base Case: We found a CONNECT_DATA, so extract the data
# and do not recurse into it
if keyword.upper() == 'CONNECT_DATA':
sid = None
for k, v in tns_entry[1:]:
if k == 'SID':
sid = v
if sid is None:
print('WARNING: Ignoring CONNECT_DATA due to missing SID')
sids.append(sid)
for d in tns_entry[1:]:
# Only parse sub-lists, not literals
if isinstance(d, list):
_parse_connect_data(d, sids)
def get_connection_info(net_service_name: str, tns_string: str):
"""
Generator over all simple connections inferred from a TNS entry
:param net_service_name: Net service name to return connection info for
:param tns_string: tnsnames.ora file contents
"""
# Parse the TNS entries and keep the requested definition
definition = None
for tokens, _start, _end in TNS_ENTRY.scanString(tns_string):
if net_service_name in tokens.asList()[0]:
definition = tokens.asList()[1]
break
# Check if we found a definition
if definition is None:
raise KeyError(f'No net service named {net_service_name}')
# Look for all the ADDRESS keywords
addresses = []
_parse_addresses(definition, addresses)
# Look for all CONNECT_DATA keywords
sids = []
_parse_connect_data(definition, sids)
# Emit all combinations
for address in addresses:
for sid in sids:
yield {'sid': sid, **address}
# Try it out!
for connection_info in get_connection_info('MYDB', TNS_NAMES_ORA):
print(connection_info)
I wrote a blog post about it here for "fun":
https://unparameterized.blogspot.com/2021/02/parsing-oracle-tns-files-in-python.html
import re
def find_match(tns_regex, y):
x1 = re.match(tns_regex, y, re.M + re.I + re.S)
if x1 is not None:
x1 = x1.groups(1)[0] # Only first match is returned
x1 = x1.strip('\n')
return(x1)
# Removing commented text
with open("C:\\Oracle\\product\\11.2.0\\client_1\\network\\admin\\tnsnames.ora") as tns_file:
with open("test_tns.ora", 'w+') as output:
lines =tns_file.readlines()
for line in lines:
if not line.startswith('#'):
output.write(line)
with open('test_tns.ora') as tns_file:
tnsnames = tns_file.read()
tnsnames1 = re.split(r"\){3,}\n\n", tnsnames)
# Regex matches
tns_name = '^(.+?)\s?\=\s+\(DESCRIPTION.*'
tns_host = '.*?HOST\s?=\s?(.+?)\)'
tns_port = '.*?PORT\s?=\s?(\d+?)\)'
tns_sname = '.*?SERVICE_NAME\s?=\s?(.+?)\)'
tns_sid = '.*?SID\s?=\s?(.+?)\)'
easy_connects = []
for y in tnsnames1:
y = '%s))' % y
l = [find_match(x, y) for x in [tns_name, tns_host, tns_port, tns_sname, tns_sid]]
d = {
'name': l[0],
'host': l[1],
'port': l[2],
'service_name': l[3],
'sid': l[4]
}
easy_connects.append(d)
print(easy_connects)
I have written this small code. It parses tnsnames.ora. It is fast and works great.
def parse_ora_tns_file(fpath,tnskey=None,return_all_keys=False,view_file=False,logger=None)->str:
"""
This function parse oracle tns file
parms: fpath : full file path like
fpath=filepath\tnsnames.ora
param: tnskey: find tns entry for given tns key like tnskey='ABC.WORLD'
param: return_all_keys: if True it will return all tns key names
param: view_file : if True it returns tnsnames.ora as str
"""
clean_tns_file=''
if logger:
logger.info('Reading tnsnames ora file at {} ...'.format(fpath))
with open(fpath,mode='r') as tns_file:
lines =tns_file.readlines()
for line in lines:
if not line.startswith('#'):
clean_tns_file=clean_tns_file+line
#clean file
clean_str = clean_tns_file.replace('\n','')
clean_str = clean_str.replace('\t','')
#replace = with ' ' so later I can split with ' '
#with below it becomes like ABC.WORLD = (DESCRIPTION) to ABC.WORLD ' ' (DESCRIPTION)
clean_str = clean_str.replace('=',' ')
#Below one output to ['ABC.WORLD','',' (DESCRIPTION)']
lstresult= clean_str.split(" ")
#Below code remove extra space from char list like it becomes ['ABC.WORLD','','(DESCRIPTION)']
lstresult = [txt.strip() for txt in lstresult]
#Below code to replace () chars to '' from list so output as ['ABC.WORLD','','DESCRIPTION']
removetable = str.maketrans('', '', '()')
out_list = [s.translate(removetable) for s in lstresult]
#Below code remove any empty list items so output as ['ABC.WORLD','DESCRIPTION']
out_list = list(filter(None, out_list))
#find index of DESCRIPTION words
indices = [i for i, x in enumerate(out_list ) if x.upper() == "DESCRIPTION"]
tns_keys= ""
for i in indices:
#use index of DESCRIPTION to tns keys which is required for regx pattern below.
tns_keys=tns_keys+out_list[i-1]+"|"
if return_all_keys:
return tns_keys.replace('|',',')[:-1]
if logger:
logger.info('Keys found in tnsnames ora: {}'.format(tns_keys))
regex = r"\s+(?!^({}))".format(tns_keys)
result = re.sub(regex, '', clean_tns_file, 0, re.MULTILINE)
if view_file:
return result
if result:
for match in re.finditer(r'^((?:{}))(.*)'.format(tns_keys), result, re.MULTILINE):
if match.group(1) == tnskey:
#removing = sign from start of entry
if logger:
logger.info('Found tns entry: {} {}'.format(match.group(1),match.group(2)))
return match.group(2)[1:]
if logger:
logger.info('No tns entry found for {}'.format(tnskey))
return None

Parsing Event Information Table files

My dreambox compatible video recorder stores event information table ".eit" files with every recording. I'd like to work with this information to rearrange my recordings.
A similar question came up in http://www.i-have-a-dreambox.com/wbb2/thread.php?threadid=186234&sid=3b36acb1ba62e4724cb47216ce08a564
The format seems to be a binary format as outlined in:
https://de.wikipedia.org/wiki/Event_Information_Table
and
http://www.etsi.org/deliver/etsi_en/300400_300499/300468/01.14.01_60/en_300468v011401p.pdf
I am now looking for a parser for such files. Where could I find one that works with files and does not assume a broadcast stream as input?
What did i try so far?
I searched the web and found the following links and pointers:
There seems to be a java library
https://docs.oracle.com/javame/config/cdc/opt-pkgs/api/jsr927/javax/tv/service/guide/ProgramEvent.html
which is part of the JSR 927 https://jcp.org/en/jsr/detail?id=927
specification.
As it looks this libary is only available for Java-ME see https://en.wikipedia.org/wiki/Java_TV
If found some dvb related EIT code snippets e.g.
https://github.com/jinfeng-geeya/3202C/blob/master/SRC/lib/libdvb/libepg/eit_parser.c
or
http://linuxtv.org/docs/libdvbv5/eit_8h.html
as part of the Kamaelia DVB Tools Project http://www.kamaelia.org/Developers/Projects/DVBTools.html there seems to be a python solution:
http://pydoc.net/Python/Kamaelia/0.6.0/Kamaelia.Device.DVB.Parse.ParseEventInformationTable/
The closest I found so far was from a hint athttp://forums.openpli.org/topic/29141-eit-file-format/ which points to:
https://github.com/betonme/e2openplugin-EnhancedMovieCenter/blob/master/src/EitSupport.py
Currently I am pursuing to go from this Open Source Python Code.
This is a Python script that seems to be a valid start.
It's available as opensource at https://github.com/WolfgangFahl/eitparser where you'll find the latest python3 compatible version and documentation.
When you call it with
python EitParser.py SomeEitFile
it will print out the name and description of the eit file.
Add you language codes as you need e.g. from https://github.com/libo/Enigma2/blob/master/lib/python/Tools/ISO639.py
#!/usr/bin/python
# encoding: utf-8
#
# EitSupport
# Copyright (C) 2011 betonme
# Copyright (C) 2016 Wolfgang Fahl
#
# This EITParser is based on:
# https://github.com/betonme/e2openplugin-EnhancedMovieCenter/blob/master/src/EitSupport.py
#
# In case of reuse of this source code please do not remove this copyright.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# For more information on the GNU General Public License see:
# <http://www.gnu.org/licenses/>.
#
import os
import struct
import time
from datetime import datetime
#from Components.config import config
#from Components.Language import language
#from EMCTasker import emcDebugOut
#from IsoFileSupport import IsoSupport
#from MetaSupport import getInfoFile
#def crc32(data):
# poly = 0x4c11db7
# crc = 0xffffffffL
# for byte in data:
# byte = ord(byte)
# for bit in range(7,-1,-1): # MSB to LSB
# z32 = crc>>31 # top bit
# crc = crc << 1
# if ((byte>>bit)&1) ^ z32:
# crc = crc ^ poly
# crc = crc & 0xffffffffL
# return crc
decoding_charSpecHR = {u'Ć': u'\u0106', u'æ': u'\u0107', u'®': u'\u017D', u'¾': u'\u017E', u'©': u'\u0160', u'¹': u'\u0161', u'Č': u'\u010C', u'è': u'\u010D', u'ð': u'\u0111'}
decoding_charSpecCZSK = {u'Ï'+u'C': u'Č',u'Ï'+u'E': u'Ě',u'Ï'+u'L': u'Ľ',u'Ï'+u'N': u'Ň',u'Ï'+u'R': u'Ř',u'Ï'+u'S': u'Š',u'Ï'+u'T': u'Ť',u'Ï'+u'Z': u'Ž',u'Ï'+u'c': u'č',u'Ï'+u'd': u'ď',u'Ï'+u'e': u'ě',u'Ï'+u'l': u'ľ', u'Ï'+u'n': u'ň',
u'Ï'+u'r': u'ř',u'Ï'+u's': u'š',u'Ï'+u't': u'ť',u'Ï'+u'z': u'ž',u'Ï'+u'D': u'Ď',u'Â'+u'A': u'Á',u'Â'+u'E': u'É',u'Â'+u'I': u'Í',u'Â'+u'O': u'Ó',u'Â'+u'U': u'Ú',u'Â'+u'a': u'á',u'Â'+u'e': u'é',u'Â'+u'i': u'í',u'Â'+u'o': u'ó',
u'Â'+u'u': u'ú',u'Â'+u'y': u'ý',u'Ã'+u'o': u'ô',u'Ã'+u'O': u'Ô',u'Ê'+u'u': u'ů',u'Ê'+u'U': u'Ů',u'È'+u'A': u'Ä',u'È'+u'E': u'Ë',u'È'+u'I': u'Ï',u'È'+u'O': u'Ö',u'È'+u'U': u'Ü',u'È'+u'Y': u'Ÿ',u'È'+u'a': u'ä',u'È'+u'e': u'ë',
u'È'+u'i': u'ï',u'È'+u'o': u'ö',u'È'+u'u': u'ü',u'È'+u'y': u'ÿ'}
def convertCharSpecHR(text):
for i, j in decoding_charSpecHR.iteritems():
text = text.replace(i, j)
return text
def convertCharSpecCZSK(text):
for i, j in decoding_charSpecCZSK.iteritems():
text = text.replace(i, j)
return text
def parseMJD(MJD):
# Parse 16 bit unsigned int containing Modified Julian Date,
# as per DVB-SI spec
# returning year,month,day
YY = int( (MJD - 15078.2) / 365.25 )
MM = int( (MJD - 14956.1 - int(YY*365.25) ) / 30.6001 )
D = MJD - 14956 - int(YY*365.25) - int(MM * 30.6001)
K=0
if MM == 14 or MM == 15: K=1
return (1900 + YY+K), (MM-1-K*12), D
def unBCD(byte):
return (byte>>4)*10 + (byte & 0xf)
#from Tools.ISO639 import LanguageCodes
# -*- coding: iso-8859-2 -*-
LanguageCodes = { }
LanguageCodes["deu"] = LanguageCodes["ger"] = LanguageCodes["de"] = ("German", "Germanic")
LanguageCodes["fra"] = LanguageCodes["fre"] = LanguageCodes["fr"] = ("French", "Romance")
def language_iso639_2to3(alpha2):
ret = alpha2
if alpha2 in LanguageCodes:
language = LanguageCodes[alpha2]
for alpha, name in LanguageCodes.items():
if name == language:
if len(alpha) == 3:
return alpha
return ret
#TEST
#print LanguageCodes["sv"]
#print language_iso639_2to3("sv")
# Eit File support class
# Description
# http://de.wikipedia.org/wiki/Event_Information_Table
class EitList():
EIT_SHORT_EVENT_DESCRIPTOR = 0x4d
EIT_EXTENDED_EVENT_DESCRIPOR = 0x4e
def __init__(self, path=None):
self.eit_file = None
#TODO
# The dictionary implementation could be very slow
self.eit = {}
self.iso = None
self.__newPath(path)
self.__readEitFile()
def __newPath(self, path):
name = None
if path:
if self.eit_file != path:
self.eit_file = path
def __mk_int(self, s):
return int(s) if s else 0
def __toDate(self, d, t):
if d and t:
#TODO Is there another fast and safe way to get the datetime
try:
return datetime(int(d[0]), int(d[1]), int(d[2]), int(t[0]), int(t[1]))
except ValueError:
return None
else:
return None
##############################################################################
## Get Functions
def getEitsid(self):
return self.eit.get('service', "") #TODO
def getEitTsId(self):
return self.eit.get('transportstream', "") #TODO
def getEitWhen(self):
return self.eit.get('when', "")
def getEitStartDate(self):
return self.eit.get('startdate', "")
def getEitStartTime(self):
return self.eit.get('starttime', "")
def getEitDuration(self):
return self.eit.get('duration', "")
def getEitName(self):
return self.eit.get('name', "").strip()
def getEitDescription(self):
return self.eit.get('description', "").strip()
# Wrapper
def getEitShortDescription(self):
return self.getEitName()
def getEitExtendedDescription(self):
return self.getEitDescription()
def getEitLengthInSeconds(self):
length = self.eit.get('duration', "")
#TODO Is there another fast and safe way to get the length
if len(length)>2:
return self.__mk_int((length[0]*60 + length[1])*60 + length[2])
elif len(length)>1:
return self.__mk_int(length[0]*60 + length[1])
else:
return self.__mk_int(length)
def getEitDate(self):
return self.__toDate(self.getEitStartDate(), self.getEitStartTime())
##############################################################################
## File IO Functions
def __readEitFile(self):
data = ""
path = self.eit_file
#lang = language.getLanguage()[:2]
lang = language_iso639_2to3( "de" )
#print lang + str(path)
if path and os.path.exists(path):
#print "Reading Event Information Table " + str(path)
# Read data from file
# OE1.6 with Pyton 2.6
#with open(self.eit_file, 'r') as file: lines = file.readlines()
f = None
try:
f = open(path, 'rb')
#lines = f.readlines()
data = f.read()
except Exception, e:
emcDebugOut("[META] Exception in readEitFile: " + str(e))
finally:
if f is not None:
f.close()
# Parse the data
if data and 12 <= len(data):
# go through events
pos = 0
e = struct.unpack(">HHBBBBBBH", data[pos:pos+12])
event_id = e[0]
date = parseMJD(e[1]) # Y, M, D
time = unBCD(e[2]), unBCD(e[3]), unBCD(e[4]) # HH, MM, SS
duration = unBCD(e[5]), unBCD(e[6]), unBCD(e[7]) # HH, MM, SS
running_status = (e[8] & 0xe000) >> 13
free_CA_mode = e[8] & 0x1000
descriptors_len = e[8] & 0x0fff
if running_status in [1,2]:
self.eit['when'] = "NEXT"
elif running_status in [3,4]:
self.eit['when'] = "NOW"
self.eit['startdate'] = date
self.eit['starttime'] = time
self.eit['duration'] = duration
pos = pos + 12
short_event_descriptor = []
short_event_descriptor_multi = []
extended_event_descriptor = []
extended_event_descriptor_multi = []
component_descriptor = []
content_descriptor = []
linkage_descriptor = []
parental_rating_descriptor = []
endpos = len(data) - 1
while pos < endpos:
rec = ord(data[pos])
length = ord(data[pos+1]) + 2
if rec == 0x4D:
descriptor_tag = ord(data[pos+1])
descriptor_length = ord(data[pos+2])
ISO_639_language_code = str(data[pos+3:pos+5])
event_name_length = ord(data[pos+5])
short_event_description = data[pos+6:pos+6+event_name_length]
if ISO_639_language_code == lang:
short_event_descriptor.append(short_event_description)
short_event_descriptor_multi.append(short_event_description)
elif rec == 0x4E:
ISO_639_language_code = str(data[pos+3:pos+5])
extended_event_description = ""
extended_event_description_multi = ""
for i in range (pos+8,pos+length):
if str(ord(data[i]))=="138":
extended_event_description += '\n'
extended_event_description_multi += '\n'
else:
if data[i]== '\x10' or data[i]== '\x00' or data[i]== '\x02':
pass
else:
extended_event_description += data[i]
extended_event_description_multi += data[i]
if ISO_639_language_code == lang:
extended_event_descriptor.append(extended_event_description)
extended_event_descriptor_multi.append(extended_event_description)
elif rec == 0x50:
component_descriptor.append(data[pos+8:pos+length])
elif rec == 0x54:
content_descriptor.append(data[pos+8:pos+length])
elif rec == 0x4A:
linkage_descriptor.append(data[pos+8:pos+length])
elif rec == 0x55:
parental_rating_descriptor.append(data[pos+2:pos+length])
else:
#print "unsopported descriptor: %x %x" %(rec, pos + 12)
#print data[pos:pos+length]
pass
pos += length
# Very bad but there can be both encodings
# User files can be in cp1252
# Is there no other way?
if short_event_descriptor:
short_event_descriptor = "".join(short_event_descriptor)
else:
short_event_descriptor = "".join(short_event_descriptor_multi)
if short_event_descriptor:
#try:
# short_event_descriptor = short_event_descriptor.decode("iso-8859-1").encode("utf-8")
#except UnicodeDecodeError:
# pass
try:
short_event_descriptor.decode('utf-8')
except UnicodeDecodeError:
try:
short_event_descriptor = short_event_descriptor.decode("cp1252").encode("utf-8")
except UnicodeDecodeError:
# do nothing, otherwise cyrillic wont properly displayed
#short_event_descriptor = short_event_descriptor.decode("iso-8859-1").encode("utf-8")
pass
if (lang == "cs") or (lang == "sk"):
short_event_descriptor = str(convertCharSpecCZSK(short_event_descriptor))
if (lang == "hr"):
short_event_descriptor = str(convertCharSpecHR(short_event_descriptor))
self.eit['name'] = short_event_descriptor
# Very bad but there can be both encodings
# User files can be in cp1252
# Is there no other way?
if extended_event_descriptor:
extended_event_descriptor = "".join(extended_event_descriptor)
else:
extended_event_descriptor = "".join(extended_event_descriptor_multi)
if extended_event_descriptor:
#try:
# extended_event_descriptor = extended_event_descriptor.decode("iso-8859-1").encode("utf-8")
#except UnicodeDecodeError:
# pass
try:
extended_event_descriptor.decode('utf-8')
except UnicodeDecodeError:
try:
extended_event_descriptor = extended_event_descriptor.decode("cp1252").encode("utf-8")
except UnicodeDecodeError:
# do nothing, otherwise cyrillic wont properly displayed
#extended_event_descriptor = extended_event_descriptor.decode("iso-8859-1").encode("utf-8")
pass
if (lang == "cs") or (lang == "sk"):
extended_event_descriptor = str(convertCharSpecCZSK(extended_event_descriptor))
if (lang == "hr"):
extended_event_descriptor = str(convertCharSpecHR(extended_event_descriptor))
self.eit['description'] = extended_event_descriptor
else:
# No date clear all
self.eit = {}
"""Module docstring.
Read Eit File and show the information.
"""
import sys
import getopt
def readeit(eitfile):
eitlist=EitList(eitfile)
print eitlist.getEitName();
print eitlist.getEitStartDate();
print eitlist.getEitDescription();
def main():
# parse command line options
try:
opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
except getopt.error, msg:
print msg
print "for help use --help"
sys.exit(2)
# process options
for o, a in opts:
if o in ("-h", "--help"):
print __doc__
sys.exit(0)
# process arguments
for arg in args:
readeit(arg) # process() is defined elsewhere
if __name__ == "__main__":
main()

Categories

Resources