After executing an api in django rest, the RAM remains high - python

After executing an api in django rest in production mode, the following method is called and executed. After each execution of this method, the amount of RAM usage goes up and up and does not go down, and I don't understand where the problem is.
def download(self):
try:
if self.adjust:
path = Path(UPLOAD_DIR / 'yf_history' / self.market / 'adjusted')
else:
path = Path(UPLOAD_DIR / 'yf_history' / self.market)
path.mkdir(parents=True, exist_ok=True)
data = yfinance.download(
progress=False,
tickers=self.ticker_list,
period=self.period,
interval=self.interval_period,
group_by='ticker',
auto_adjust=self.adjust,
prepost=False,
threads=True,
proxy=None
).T
for ticker in self.ticker_list:
try:
data.loc[(ticker,),].T.dropna().to_csv(path / f'{ticker}{self.suffix}.csv')
except:
pass
del data
except Exception as error:
return False, error
else:
return True, 'Saved successfully'
I don't have this problem with any other function
Python==3.9 Django==3.2.9 djangorestframework==3.13.1 yfinance==0.2.10

Related

How to fix aws lambda function logs error?

I'm trying to extract aws trust advisor data through lambda function(trigger by event scheduler) and upload to s3. However, some part of the function throws error. below is my code
##libraries
import boto3
import os
import csv
from csv import DictWriter
import time
import traceback
## bucket_name is set as env variable
bucket_name = "test-ta-reports"
fail_msg = 'Pulling Trusted Advisor data failed'
Filename = "/tmp/checks_list.csv"
obj_name = time.strftime("%Y-%m-%d-%H-%M-%S") + '/' + '.csv'
##upload to s3
def s3_upload(bucket_name, Filename, obj_name):
if obj_name is None:
obj_name = os.path.basename(Filename)
try:
s3 = boto3.client("s3", region_name="eu-west-1")
response = s3.upload_file(Filename, bucket_name, obj_name)
return True
except:
print('Data failed to upload to bucket')
traceback.print_exc()
return False
def lambda_handler(event, context):
try:
support_client = boto3.client('support', region_name='us-east-1')
ta_checks = support_client.describe_trusted_advisor_checks(language='en')
checks_list = {ctgs: [] for ctgs in list(set([checks['category'] for checks in ta_checks['checks']]))}
for checks in ta_checks['checks']:
print('Getting check:' + checks['name'] + checks['category'])
try:
check_summary = support_client.describe_trusted_advisor_check_summaries(
checkIds=[checks['id']])['summaries'][0]
if check_summary['status'] != 'not_available':
checks_list[checks['category']].append(
[checks['name'], check_summary['status'],
str(check_summary['resourcesSummary']['resourcesProcessed']),
str(check_summary['resourcesSummary']['resourcesFlagged']),
str(check_summary['resourcesSummary']['resourcesSuppressed']),
str(check_summary['resourcesSummary']['resourcesIgnored'])
])
else:
print("unable to append checks")
except:
print('Failed to get check: ' + checks['name'])
traceback.print_exc()
except:
print('Failed! Debug further.')
traceback.print_exc()
##rewrite dict to csv
with open('/tmp/checks_list.csv', 'w', newline='') as csvfile:
csv_writer = DictWriter(csvfile, fieldnames=['status','hasFlaggedResources','timestamp','resourcesSummary','categorySpecificSummary', 'checkId'])
csv_writer.writeheader()
csv_writer.writerow(check_summary)
return checks_list
if s3_upload(bucket_name, Filename, obj_name):
print("Successfully uploaded")
if __name__ == '__main__':
lambda_handler(event, context)
The error logs
unable to append checks
I'm new to Python. So, unsure of how to check for trackback stacks under else: statement. Is there any way to modify this code for getting traceback logs for the append block. Also, have i made any error in the above code. I'm unable to figure out any. PLz help
response = client.describe_trusted_advisor_check_summaries(
checkIds=[
'string',
]
)
describe_trusted_advisor_check_summaries() returns summarized results for one or more Trusted advisors. Here you are checking for the check_summary['status'] is not equal to not_avaialble i.e. alert status of the check is either "ok" (green), "warning" (yellow), "error" (red), and in that case, you are appending resourcesProcessed, resourcesFlagged, resourcesSuppressed, and resourcesIgnored to checks_list for further processing.
it's printing
unable to append checks
just because the status of the check is not_available. It is not an error log. Just deal with the case if the check status is not_available, what you should be doing?
See the documentation of describe_trusted_advisor_check_summaries. https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/support.html#Support.Client.describe_trusted_advisor_check_summaries

How to apply changes to ontology saved on SQLite Database?

Everytime I create a new instance on my ontology, something goes wrong If I try to read from the same database again.
ps - these are all part of different views on Django
This is how I am adding instances to my ontology:
# OWLREADY2
try:
myworld = World(filename='backup.db', exclusive=False)
kiposcrum = myworld.get_ontology(os.path.dirname(__file__) + '/kipo.owl').load()
except:
print("Error opening ontology")
# Sync
#--------------------------------------------------------------------------
sync_reasoner()
seed = str(time.time())
id_unico = faz_id(seed)
try:
with kiposcrum:
# here I am creating my instance, these are all strings I got from the user
kiposcrum[input_classe](input_nome + id_unico)
if input_observacao != "":
kiposcrum[input_nome + id_unico].Observacao.append(input_observacao)
sync_reasoner()
status = "OK!"
myworld.close()
myworld.save()
except:
print("Mistakes were made!")
status = "Error!"
input_nome = "Mistakes were made!"
input_classe = "Mistakes were made!"
finally:
print(input_nome + " " + id_unico)
print(input_classe)
print(status)
This is how I am reading stuff from It:
# OWLREADY2
try:
myworld = World(filename='backup.db', exclusive=False)
kiposcrum = myworld.get_ontology(os.path.dirname(__file__) + '/kipo_fialho.owl').load()
except:
print("Error")
sync_reasoner()
try:
with kiposcrum:
num_inst = 0
# gets a list of properties given an instance informed by the user
propriedades = kiposcrum[instancia].get_properties()
num_prop = len(propriedades)
myworld.close()
I am 100% able to read from my ontology, but If I try to create an instance and then try to read the database again, something goes wrong.

How to merge/delete snapshots with libvirt-python

I'm trying to write some Python code that uses libvirt-python to manage external snapshots with the KVM APIs.
I'm not sure of what to do in order to delete (aka merge) a certain snapshot. Let's say we have mainly 2 case in the following scenario: base <- snap_a <- snap-b <- top (as shown in this webpage).
I want to merge snap_a and snap_b (b is the active one). The result should have base as backing file in the snapshot chain.
I want to merge base and snap_a (none of them is active). The result should keep snap_b as active snapshot, which has base as backing file.
In the first case I lost my base backing file, while in the second one i got this error: Failed to merge snapshot: invalid argument: active commit requested but '/var/lib/nova/instances/b9c9cd3b-1102-4084-a7a9-6e85c179ac9c/disk.snap_system_1610446663' is not active (snap_system_numbers here is like snap_a in my example).
This is my merging function:
def snapshot_merge(instance_name: str, snapshot_name: str):
# Getting the connection to qemu
conn, dom = open_qemu_connection(instance_name)
if conn is not None and dom is not None:
logging.info(
'Merging domain {} from snapshot {}'
.format(instance_name, snapshot_name))
# Merge snapshot (Block commit, first phase)
# Get the vda path from my custom function
disk_path = get_vda_path(dom)
top = disk_path+'.'+snapshot_name
disk = 'vda'
bandwith = 0
# Check if snapshot in input is the active/current one. Also setting flags
snapshot = dom.snapshotCurrent()
if snapshot.getName() == snapshot_name:
bc_flags = (libvirt.VIR_DOMAIN_BLOCK_COMMIT_ACTIVE
+ libvirt.VIR_DOMAIN_BLOCK_COMMIT_SHALLOW)
else:
bc_flags = 0
try:
dom.blockCommit(disk,
None,
top,
bandwith,
bc_flags)
logging.info('Snapshot merged')
except libvirt.libvirtError as e:
logging.info('Failed to merge snapshot: %s' % e)
close_qemu_connection(conn)
raise HTTPException(status_code=500, detail=ERR_MERGE)
# Merge snapshot (Pivoting, second phase)
piv_flags = libvirt.VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT
# Wait for ready state
time.sleep(5)
try:
dom.blockJobAbort(disk, piv_flags)
logging.info('Snapshot pivoted')
snapshot_delete(dom, snapshot_name, top)
return {"snapshot_name": snapshot_name}
except libvirt.libvirtError as e:
logging.info('Failed to pivot snapshot: %s' % e)
# TODO snapshot_delete(dom, snapshot_name, top)
raise HTTPException(status_code=500, detail=ERR_PIVOT)
finally:
close_qemu_connection(conn)
else:
logging.info('Process failed')
raise HTTPException(status_code=500, detail=ERR_CONN)

Concurrent.futures and SQLAlchemy benchmarks vs. synchronous code

I have a project where i need to upload ~70 files to my flask app. I'm learning concurrency right now so this seems like perfect practice. When using print statements, the concurrent version of this function is about 2x to 2.5x faster than the synchronous function.
Though when actually writing to the SQLite database, it takes about the same amount of time.
Original func:
#app.route('/test_sync')
def auto_add():
t0 = time.time()
# Code does not work without changing directory. better option?
os.chdir('my_app/static/tracks')
list_dir = os.listdir('my_app/static/tracks')
# list_dir consists of .mp3 and .jpg files
for filename in list_dir:
if filename.endswith('.mp3'):
try:
thumbnail = [thumb for thumb in list_dir if thumb == filename[:-4] + '.jpg'][0]
except Exception:
print(f'ERROR - COULD NOT FIND THUMB for { filename }')
resize_image(thumbnail)
with open(filename, 'rb') as f, open(thumbnail, 'rb') as t:
track = Track(
title=filename[15:-4],
artist='Sam Gellaitry',
description='No desc.',
thumbnail=t.read(),
binary_audio=f.read()
)
else:
continue
db.session.add(track)
db.session.commit()
elapsed = time.time() - t0
return f'Uploaded all tracks in {elapsed} seconds.'
Concurrent func(s):
#app.route('/test_concurrent')
def auto_add_concurrent():
t0 = time.time()
MAX_WORKERS = 40
os.chdir('/my_app/static/tracks')
list_dir = os.listdir('/my_app/static/tracks')
mp3_list = [x for x in list_dir if x.endswith('.mp3')]
with futures.ThreadPoolExecutor(MAX_WORKERS) as executor:
res = executor.map(add_one_file, mp3_list)
for x in res:
db.session.add(x)
db.session.commit()
elapsed = time.time() - t0
return f'Uploaded all tracks in {elapsed} seconds.'
-----
def add_one_file(filename):
list_dir = os.listdir('/my_app/static/tracks')
try:
thumbnail = [thumb for thumb in list_dir if thumb == filename[:-4] + '.jpg'][0]
except Exception:
print(f'ERROR - COULD NOT FIND THUMB for { filename }')
resize_image(thumbnail)
with open(filename, 'rb') as f, open(thumbnail, 'rb') as t:
track = Track(
title=filename[15:-4],
artist='Sam Gellaitry',
description='No desc.',
thumbnail=t.read(),
binary_audio=f.read()
)
return track
Heres the resize_image func for completeness
def resize_image(thumbnail):
with Image.open(thumbnail) as img:
img.resize((500, 500))
img.save(thumbnail)
return thumbnail
And benchmarks:
/test_concurrent (with print statements)
Uploaded all tracks in 0.7054300308227539 seconds.
/test_sync
Uploaded all tracks in 1.8661110401153564 seconds.
------
/test_concurrent (with db.session.add/db.session.commit)
Uploaded all tracks in 5.303245782852173 seconds.
/test_sync
Uploaded all tracks in 6.123792886734009 seconds.
What am i doing wrong with this concurrent code, and how can I optimize it?
It seems that the DB writes dominate your timings, and they do not usually benefit from parallelization when writing many rows to the same table, or in case of SQLite the same DB. Instead of adding the ORM objects 1 by 1 to the session, perform a bulk insert:
db.session.bulk_save_objects(list(res))
In your current code the ORM has to insert the Track objects one at a time during flush just before the commit in order to fetch their primary keys after insert. Session.bulk_save_objects does not do that by default, which means that the objects are less usable after – they're not added to the session for example – but that does not seem to be an issue in your case.
"I’m inserting 400,000 rows with the ORM and it’s really slow!" is a good read on the subject.
As a side note, when working with files it is best to try and avoid any TOCTOU situations, if possible. In other words don't use
thumbnail = [thumb for thumb in list_dir if thumb == filename[:-4] + '.jpg'][0]
to check if the file exists, use os.path.isfile() or such instead if you must, but you should just try and open it and then handle the error, if it cannot be opened:
thumbnail = filename[:-4] + '.jpg'
try:
resize_image(thumbnail)
except FileNotFoundError:
print(f'ERROR - COULD NOT FIND THUMB for { filename }')
# Note that the latter open attempt will fail as well, if this fails
...

How to re-use ZK session with KazooClient?

I have different functions calling ZKNode and making changes to ZkPath. Currently, I am starting Zk (zk.start) session in each function and making zk.get / zk.stop. Is there a way to start one zk session and keep re-using same session until you are done?
from kazoo.client import KazooClient
zk_hosts = [ host1:2181, host2:2181, host3:2181]
def get_path_1(path):
try:
zk = KazooClient(hosts=zk_hosts, timeout=3000)
zk.start()
except Exception as e:
log.fatal(e)
if zk.exists(path):
k = zk.get(path)
else:
print "ZK Path {0} does not exist".format(path)
return k
def get_path_3(path2):
try:
zk = KazooClient(hosts=zk_hosts, timeout=3000)
zk.start()
except Exception as e:
log.fatal(e)
if zk.exists(path2):
k = zk.get(path2)
else:
print "ZK Path {0} does not exist".format(path)
return k
Since KazooClient is thread-safe, you could share the instance:
zk_hosts = [ host1:2181, host2:2181, host3:2181 ]
zk = KazooClient(hosts=zk_hosts, timeout=3000)
zk.start()
def get_path(path):
if not zk.exists(path):
print "ZK path {0} does not exist".format(path)
return None
return zk.get(path)
def get_already(path):
return zk.get(path)
And so on.
As an advice, things can change between the zk.exists and the zk.get calls (for example, if someone else delete the entry). For this reason, you should test existence and get in a transaction.

Categories

Resources