Is it ok to use a global variable in this case? - python

So I have a scheduled script that runs continuously. Here's that scheduler script:
import os
import schedule
import time
os.chdir("B:\Scheduled_Scripts")
def DSE():
print("")
print(colors.OK + '***DSE***')
try:
start = time.time()
exec(open('B:/Scheduled_Scripts/DSE.py').read())
print("")
print(colors.OK + "DSE completed successfully in", round(time.time() - start, 2), 'seconds!')
except Exception as e:
logger.error(e)
print(colors.WARNING + 'An error has occurred in DSE.py. It was a ' + type(e).__name__ + '-' + format(e))
print('')
pass
And now it'll run my DSE.py script where at one point I end up using pandas sql query. I then create a function to start mapping some values.
query = '''(my query)'''
price_levels = pd.read_sql_query(query, KORE, params={my_params})
def get_price_level(seat):
pl = price_levels[price_levels['seatsid'] == seat]['priceleveldesc'].values()
return str(pl)
DSE_avs['priceleveldesc'] = DSE_avs['seatsid'].map(get_price_level)
The problem is that now it won't recognize that 'price_levels' is defined within the function. How frowned upon would it be if I just set price_levels as a global variable?

Use a lambda to pass price_levels as a parameter.
query = '''(my query)'''
price_levels = pd.read_sql_query(query, KORE, params={my_params})
def get_price_level(seat, price_levels):
pl = price_levels[price_levels['seatsid'] == seat]['priceleveldesc'].values()
return str(pl)
DSE_avs['priceleveldesc'] = DSE_avs['seatsid'].map(lambda s: get_price_level(s, price_levels))

Related

How to apply changes to ontology saved on SQLite Database?

Everytime I create a new instance on my ontology, something goes wrong If I try to read from the same database again.
ps - these are all part of different views on Django
This is how I am adding instances to my ontology:
# OWLREADY2
try:
myworld = World(filename='backup.db', exclusive=False)
kiposcrum = myworld.get_ontology(os.path.dirname(__file__) + '/kipo.owl').load()
except:
print("Error opening ontology")
# Sync
#--------------------------------------------------------------------------
sync_reasoner()
seed = str(time.time())
id_unico = faz_id(seed)
try:
with kiposcrum:
# here I am creating my instance, these are all strings I got from the user
kiposcrum[input_classe](input_nome + id_unico)
if input_observacao != "":
kiposcrum[input_nome + id_unico].Observacao.append(input_observacao)
sync_reasoner()
status = "OK!"
myworld.close()
myworld.save()
except:
print("Mistakes were made!")
status = "Error!"
input_nome = "Mistakes were made!"
input_classe = "Mistakes were made!"
finally:
print(input_nome + " " + id_unico)
print(input_classe)
print(status)
This is how I am reading stuff from It:
# OWLREADY2
try:
myworld = World(filename='backup.db', exclusive=False)
kiposcrum = myworld.get_ontology(os.path.dirname(__file__) + '/kipo_fialho.owl').load()
except:
print("Error")
sync_reasoner()
try:
with kiposcrum:
num_inst = 0
# gets a list of properties given an instance informed by the user
propriedades = kiposcrum[instancia].get_properties()
num_prop = len(propriedades)
myworld.close()
I am 100% able to read from my ontology, but If I try to create an instance and then try to read the database again, something goes wrong.

Execute statement 500 times ignoring exceptions

import random
import string
from bitcoinrpc.authproxy import AuthServiceProxy, JSONRPCException
rpc_port = 18444
rpc_user = 'user3'
rpc_password = 'pass3'
def wallet_name(size):
generate_wallet = ''.join([random.choice(string.punctuation + string.ascii_letters)
for n in range(size)])
return generate_wallet
try:
rpc_connection = AuthServiceProxy("http://%s:%s#127.0.0.1:%s"%(rpc_user,rpc_password,rpc_port))
i=0
while i < 500:
wallet = wallet_name(20)
result = rpc_connection.createwallet(wallet)
i += 1
except Exception:
pass
I want this code to try and create 500 wallets but it stops at 2-3. If I print the exception its giving an error related to incorrect file name or file path but the exception should be ignored and try creating wallet with next string.
What's the point of creating 500 randomly named wallets, when you're not even saving the names?
for i in range(500):
wallet = wallet_name(20)
try:
result = rpc_connection.createwallet(wallet)
except:
pass

How can I get Google Calendar API status_code in Python when get list events?

I try to use Google Calendar API
events_result = service.events().list(calendarId=calendarId,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
Everything is ok, when I have permission to access the calendarId, but it will be errors if wrong when I don't have calendarId permission.
I build an autoload.py function with schedule python to load events every 10 mins, this function will be stopped if error come, and I have to use SSH terminal to restart autoload.py manually
So i want to know:
How can I get status_code, example, if it is 404, python will PASS
Answer:
You can use a try/except block within a loop to go through all your calendars, and skip over accesses which throw an error.
Code Example:
To get the error code, make sure to import json:
import json
and then you can get the error code out of the Exception:
calendarIds = ["calendar ID 1", "calendar ID 2", "calendar Id 3", "etc"]
for i in calendarIds:
try:
events_result = service.events().list(calendarId=i,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
except Exception as e:
print(json.loads(e.content)['error']['code'])
continue
Further Reading:
Python Try Except - w3schools
Python For Loops - w3schools
Thanks to #Rafa Guillermo, I uploaded the full code to the autoload.py program, but I also wanted to know, how to get response json or status_code for request Google API.
The solution:
try:
code here
except Exception as e:
continue
import schedule
import time
from datetime import datetime
import dir
import sqlite3
from project.function import cmsCalendar as cal
db_file = str(dir.dir) + '/admin.sqlite'
def get_list_shop_from_db(db_file):
cur = sqlite3.connect(db_file).cursor()
query = cur.execute('SELECT * FROM Shop')
colname = [ d[0] for d in query.description ]
result_list = [ dict(zip(colname, r)) for r in query.fetchall() ]
cur.close()
cur.connection.close()
return result_list
def auto_load_google_database(list_shop, calendarError=False):
shopId = 0
for shop in list_shop:
try:
shopId = shopId+1
print("dang ghi vao shop", shopId)
service = cal.service_build()
shop_step_time_db = list_shop[shopId]['shop_step_time']
shop_duration_db = list_shop[shopId]['shop_duration']
slot_available = list_shop[shopId]['shop_slots']
slot_available = int(slot_available)
workers = list_shop[shopId]['shop_workers']
workers = int(workers)
calendarId = list_shop[shopId]['shop_calendarId']
if slot_available > workers:
a = workers
else:
a = slot_available
if shop_duration_db == None:
shop_duration_db = '30'
if shop_step_time_db == None:
shop_step_time_db = '15'
shop_duration = int(shop_duration_db)
shop_step_time = int(shop_step_time_db)
shop_start_time = list_shop[shopId]['shop_start_time']
shop_start_time = datetime.strptime(shop_start_time, "%H:%M:%S.%f").time()
shop_end_time = list_shop[shopId]['shop_end_time']
shop_end_time = datetime.strptime(shop_end_time, "%H:%M:%S.%f").time()
# nang luc moi khung gio lay ra tu file Json WorkShop.js
booking_status = cal.auto_load_listtimes(service, shopId, calendarId, shop_step_time, shop_duration, a,
shop_start_time,
shop_end_time)
except Exception as e:
continue
def main():
list_shop = get_list_shop_from_db(db_file)
auto_load_google_database(list_shop)
if __name__ == '__main__':
main()
schedule.every(5).minutes.do(main)
while True:
# Checks whether a scheduled task
# is pending to run or not
schedule.run_pending()
time.sleep(1)

Python multicore CSV short program, advice/help needed

I'm a hobby coder started with AHK, then some java and now I try to learn Python. I have searched and found some tips but I have yet not been able to implement it into my own code.
Hopefully someone here can help me, it's a very short program.
I'm using .txt csv database with ";" as a separator.
DATABASE EXAMPLE:
Which color is normally a cat?;Black
How tall was the longest man on earth?;272 cm
Is the earth round?;Yes
The database now consists of 20.000 lines which makes the program "to slow", only using 25% CPU (1 core).
If I can make it use all 4 cores (100%) I guess it would perform the task alot faster. The task is basically to compare the CLIPBOARD with the database and if there is a match, it should give me an answer as a return. Perhaps also I can separate the database into 4 pieces?
The code right now looks like this! Not more then 65 lines and its doing its job (but to slow). Advice on how I can make this process into multi core needed.
import time
import pyperclip as pp
import pandas as pd
import pymsgbox as pmb
from fuzzywuzzy import fuzz
import numpy
ratio_threshold = 90
fall_back_time = 1
db_file_path = 'database.txt'
db_separator = ';'
db_encoding = 'latin-1'
def load_db():
while True:
try:
# Read and create database
db = pd.read_csv(db_file_path, sep=db_separator, encoding=db_encoding)
db = db.drop_duplicates()
return db
except:
print("Error in load_db(). Will sleep for %i seconds..." % fall_back_time)
time.sleep(fall_back_time)
def top_answers(db, question):
db['ratio'] = db['question'].apply(lambda q: fuzz.ratio(q, question))
db_sorted = db.sort_values(by='ratio', ascending=False)
db_sorted = db_sorted[db_sorted['ratio'] >= ratio_threshold]
return db_sorted
def write_txt(top):
result = top.apply(lambda row: "%s" % (row['answer']), axis=1).tolist()
result = '\n'.join(result)
fileHandle = open("svar.txt", "w")
fileHandle.write(result)
fileHandle.close()
pp.copy("")
def main():
try:
db = load_db()
last_db_reload = time.time()
while True:
# Get contents of clipboard
question = pp.paste()
# Rank answer
top = top_answers(db, question)
# If answer was found, show results
if len(top) > 0:
write_txt(top)
time.sleep(fall_back_time)
except:
print("Error in main(). Will sleep for %i seconds..." % fall_back_time)
time.sleep(fall_back_time)
if name == 'main':
main()'
If you could divide the db into four equally large you could process them in parallel like this:
import time
import pyperclip as pp
import pandas as pd
import pymsgbox as pmb
from fuzzywuzzy import fuzz
import numpy
import threading
ratio_threshold = 90
fall_back_time = 1
db_file_path = 'database.txt'
db_separator = ';'
db_encoding = 'latin-1'
def worker(thread_id, question):
thread_id = str(thread_id)
db = pd.read_csv(db_file_path + thread_id, sep=db_separator, encoding=db_encoding)
db = db.drop_duplicates()
db['ratio'] = db['question'].apply(lambda q: fuzz.ratio(q, question))
db_sorted = db.sort_values(by='ratio', ascending=False)
db_sorted = db_sorted[db_sorted['ratio'] >= ratio_threshold]
top = db_sorted
result = top.apply(lambda row: "%s" % (row['answer']), axis=1).tolist()
result = '\n'.join(result)
fileHandle = open("svar" + thread_id + ".txt", "w")
fileHandle.write(result)
fileHandle.close()
pp.copy("")
return
def main():
question = pp.paste()
for i in range(1, 4):
t = threading.Thread(target=worker, args=(i, question))
t.start()
t.join()
if name == 'main':
main()
The solution with multiprocessing:
import time
import pyperclip as pp
import pandas as pd
#import pymsgbox as pmb
from fuzzywuzzy import fuzz
import numpy as np
# pathos uses better pickle to tranfer more complicated objects
from pathos.multiprocessing import Pool
from functools import reduce
import sys
import os
from contextlib import closing
ratio_threshold = 70
fall_back_time = 1
db_file_path = 'database.txt'
db_separator = ';'
db_encoding = 'latin-1'
chunked_db = []
NUM_PROCESSES = os.cpu_count()
def load_db():
while True:
try:
# Read and create database
db = pd.read_csv(db_file_path, sep=db_separator, encoding=db_encoding)
db.columns = ['question', 'answer']
#db = db.drop_duplicates() # i drop it for experiment
break
except:
print("Error in load_db(). Will sleep for %i seconds..." % fall_back_time)
time.sleep(fall_back_time)
# split database into equal chunks:
# (if you have a lot of RAM, otherwise you
# need to compute ranges in db, something like
# chunk_size = len(db)//NUM_PROCESSES
# ranges[i] = (i*chunk_size, (i+1)*cjunk_size)
# and pass ranges in original db to processes
chunked_db = np.split(db, [NUM_PROCESSES], axis=0)
return chunked_db
def top_answers_multiprocessed(question, chunked_db):
# on unix, python uses 'fork' mode by default
# so the process has 'copy-on-change' access to all global variables
# i.e. if process will change something in db, it will be copied to it
# with a lot of overhead
# Unfortunately, I'fe heard that on Windows only 'spawn' mode with full
# copy of everything is used
# Process pipeline uses pickle, it's quite slow.
# so on small database you may not have benefit from multiprocessing
# If you are going to transfer big objects in or out, look
# in the direction of multiprocessing.Array
# this solution is not fully efficient,
# as pool is recreated each time
# You can create daemon processes which will monitor
# Queue for incoming questions, but it's harder to implement
def top_answers(idx):
# question is in the scope of parent function,
chunked_db[idx]['ratio'] = chunked_db[idx]['question'].apply(lambda q: fuzz.ratio(q, question))
db_sorted = chunked_db[idx].sort_values(by='ratio', ascending=False)
db_sorted = db_sorted[db_sorted['ratio'] >= ratio_threshold]
return db_sorted
with closing(Pool(processes=NUM_PROCESSES)) as pool:
# chunked_db is a list of databases
# they are in global scope, we send only index beacause
# all the data set is pickled
num_chunks = len(chunked_db)
# apply function top_answers across generator range(num_chunks)
res = pool.imap_unordered(top_answers, range(num_chunks))
res = list(res)
# now res is list of dataframes, let's join it
res_final = reduce(lambda left,right: pd.merge(left,right,on='ratio'), res)
return res_final
def write_txt(top):
result = top.apply(lambda row: "%s" % (row['answer']), axis=1).tolist()
result = '\n'.join(result)
fileHandle = open("svar.txt", "w")
fileHandle.write(result)
fileHandle.close()
pp.copy("")
def mainfunc():
global chunked_db
chunked_db = load_db()
last_db_reload = time.time()
print('db loaded')
last_clip = ""
while True:
# Get contents of clipboard
try:
new_clip = pp.paste()
except:
continue
if (new_clip != last_clip) and (len(new_clip)> 0):
print(new_clip)
last_clip = new_clip
question = new_clip.strip()
else:
continue
# Rank answer
top = top_answers_multiprocessed(question, chunked_db)
# If answer was found, show results
if len(top) > 0:
#write_txt(top)
print(top)
if __name__ == '__main__':
mainfunc()

Python MySQL TypeError: must be str, not tuple

I have the following code which connects to a MySQL DB and checks which records are marked as 'active' by a field that has '1' in.
The code then downloads some files using the contents of the vulntype field in the URL for the download.
I think the issue lies with the MySQL query or the for loop to do the downloads.
The code is:-
import requests
import os
import MySQLdb
from hurry.filesize import size, si
import logging
import logging.handlers
from logging.config import fileConfig
logging.handlers = logging.handlers
fileConfig('data/logging_config.ini')
logger = logging.getLogger("downloader")
active_vuln_type = None
def get_active_vuln_sets():
global active_vuln_type
try:
logging.info('Connecting to the database...')
active_vuln_type = con = MySQLdb.connect(*******)
logging.info('Database connected!')
except FileNotFoundError as fnf:
logging.error(fnf)
except MySQLdb.Error as e:
logging.error(e)
try:
logging.info('Getting active vulnerability sets...')
cur = con.cursor()
active = "1"
cur.execute("""SELECT vulntype FROM vuln_sets WHERE active = %s""", (active))
active_vuln_type = cur.fetchall()
except MySQLdb.Error as e:
logging.exception(e)
def download():
try:
logging.info('Downloading vulnerability set files...')
for x in active_vuln_type:
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
response = requests.get('https://vulners.com/api/'
'v3/archive/collection/?type=' + x)
with open(filepath + '/vuln_files/' + x + '.zip', 'wb') as f:
f.write(response.content)
filesize = size(os.path.getsize
(filepath + '/vuln_files/'
+ x + '.zip'), system=si)
files = x + ".zip - " + str(filesize)
logging.info('Downloaded ' + x + '.zip Successfully')
logging.info('File details: ' + files)
except Exception as e:
logging.exception(e)
The traceback for this is:-
Traceback (most recent call last):
File "/home/luke/projects/vuln_backend/vuln_backend/download.py", line 61, in download
'v3/archive/collection/?type=' + x)
TypeError: must be str, not tuple
active_vuln_type = cur.fetchall()
This line returns a list of rows from the database. Each row is a tuple. Of course you're only selecting one column from the table, but the interface is the same: each row is a tuple, one value per column.
for x in active_vuln_type:
Here x is a tuple like ("vulnerability of some kind",); note the trailing comma. You need to unpack it:
for db_row in active_vuln_type:
x = db_row[0] # The first and column in the row.
Besides that, please consider naming x descriptively, returning active_vuln_type from one procedure and passing it to the other as a parameter. This will make your code less brittle and easier to test.
def get_active_vuln_sets(db_host, db_user, db_password):
# Do your stuff, using the db credentials....
return active_vuln_type
def download(active_vuln_type):
# Same source as now.
Now you can download(get_active_vuln_sets('192.168.100.1', 'joe', 'secret'))
Or you can test / retry the same thing without touching the DB: download([("CVE-xyxxy",), ("CVE-asdfghjkl",)]).
One more thing you can do is to return a clean list of vuln names, not raw DB tuples:
def get_active_vuln_sets(...):
# .....
return [x[0] for x in cur.fetchall()]
Now the value returned will be a list of single, directly usable values, so your original download(...) code would work with it.
What you get it's a tuple, so you will need to get the first element with x[0]
def download():
try:
logging.info('Downloading vulnerability set files...')
for x in active_vuln_type:
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
response = requests.get('https://vulners.com/api/'
'v3/archive/collection/?type=' + x[0])
with open(filepath + '/vuln_files/' + x[0] + '.zip', 'wb') as f:
f.write(response.content)
filesize = size(os.path.getsize
(filepath + '/vuln_files/'
+ x[0] + '.zip'), system=si)
files = x[0] + ".zip - " + str(filesize)
logging.info('Downloaded ' + x[0] + '.zip Successfully')
logging.info('File details: ' + files)
except Exception as e:
logging.exception(e)
Reasons to avoid these types of errors: like you have to use try: and except function as I know.
Fetching the data using another source may not be the exect you want.
Is your MySQL DB getting typeerror? Do check which records are marked as 'active' by a field that has '1' in by filtering them.
str(function(argument))
And it such likes:
try:
print(x)
except TypeError:
print("Variable x is not defined")
except:
print("Something else went wrong")
And that's all you can pass out your variable by replacing.
I do not think it will work perfectly that you want? But according to wholeblog
have to understand the whole code.
If the statement of if and else works then they will pass out some results.
Thank you.

Categories

Resources