How to delete X% of entries from a DynamoDB Table?

How to delete X% of entries from a DynamoDB Table? - python

I want to remove 10% of entries from a DDB table every time a script is ran. So far, I have created a Python script using boto3 that will delete all items from a DDB table:
import boto3
import sys
src_region = sys.argv[1]
src_profile_name = sys.argv[2]
src_ddb_table = sys.argv[3]
# Create source session.
src_session = boto3.session.Session(profile_name=src_profile_name)
dynamoclient = src_session.client('dynamodb', region_name=src_region)
dynamoresponse = dynamoclient.get_paginator('scan').paginate(
TableName=src_ddb_table,
Select='ALL_ATTRIBUTES',
ReturnConsumedCapacity='NONE',
ConsistentRead=True
)
for page in dynamoresponse:
for item in page['Items']:
dynamoclient.delete_item(
Key={'testTableDest': item['testTableDest']},
TableName=src_ddb_table)
How can I modify this script to allow the user to select a percentage of entries they want to delete?
Thank you for any help!

If you want to delete them at random and are ok with a non-exact percentage you can do this easily with the python random package.
import boto3
import sys
import random
src_region = sys.argv[1]
src_profile_name = sys.argv[2]
src_ddb_table = sys.argv[3]
percent_delete = int(sys.argv[4]) # 20
# Create source session.
src_session = boto3.session.Session(profile_name=src_profile_name)
dynamoclient = src_session.client('dynamodb', region_name=src_region)
dynamoresponse = dynamoclient.get_paginator('scan').paginate(
TableName=src_ddb_table,
Select='ALL_ATTRIBUTES',
ReturnConsumedCapacity='NONE',
ConsistentRead=True
)
for page in dynamoresponse:
for item in page['Items']:
if random.random() * 100 < percent_delete:
dynamoclient.delete_item(
Key={'testTableDest': item['testTableDest']},
TableName=src_ddb_table)
This isn't "perfectly random" but will suffice.

Related

iterating intergers over a method from a range

Im trying to create a list of CommandLinks in Revit Dynamo using python with a forloop that checks the length of the inputs list and creates an array of Commandlinks from a range generated from that list. Is it possible to insert the integer into the CommandLink method using a forloop?
`
import clr
import sys
import System
clr.AddReference("System.Windows.Forms")
from System.Windows.Forms import Clipboard
# import Revit API
clr.AddReference("RevitAPI")
import Autodesk
from Autodesk.Revit.DB import *
clr.AddReference("RevitAPIUI")
from Autodesk.Revit.UI import (TaskDialog, TaskDialogCommonButtons,
TaskDialogCommandLinkId, TaskDialogResult)
title = IN[0]
buttonlists = IN[1]
resultslist = IN[2]
dialog = TaskDialog(title)
buttonNum = len(resultslist)
# Properties
dialog.MainInstruction = title
# dialog.ExpandedContent = expanded_content
# Settings and buttons
dialog.TitleAutoPrefix = False
dialog.AllowCancellation = True
dialog.CommonButtons = TaskDialogCommonButtons.Cancel
dialog.DefaultButton = TaskDialogResult.Cancel
# Add Command Link
for n in range(buttonNum+1):
dialog.AddCommandLink(TaskDialogCommandLinkId.CommandLink+(n), buttontext+(n))
result = dialog.Show()
if result == TaskDialogResult.Cancel:
OUT = 'Dialog was Cancelled'
if result == TaskDialogResult.CommandLink(n):
OUT = result(n)
`
enter image description here
I need to iterate (n) into the commandlink numbers and results, so it creates commandlinks based on the length of the input list.

Python code to send data from I2C sensor to a local SQL database

I am working on a system of I2C sensors connected together and communicating to a raspberry pi4B. With the code below I am able to save the measurements in a excel file.
I would like to store them in a table inside a sql database that I have created locally on my laptop. What should I change in this code?
import time
import datetime
import bme680
from as7262 import AS7262
from datetime import date
from openpyxl import load_workbook
as7262 = AS7262()
as7262.set_gain(1) # 1, 3.7, 16, 64
as7262.set_integration_time(10) #1 to 255 x 2.8ms exposure
#mode 0 - bank 1 only continuous, mode 1 - bank 2 only continuous, mode 2 - both banks continuous, mode 3 - both banks single read
as7262.set_measurement_mode(2) #2 all colours continuous
as7262.set_illumination_led_current(12.5) #12.5mA 25mA 50mA 100mA
as7262.set_illumination_led(0)
sensor_bme680 = bme680.BME680()
# Load the workbook and select the sheet
wb = load_workbook('/mypath/data.xlsx')
sheet = wb['data_log']
try:
while True:
values = as7262.get_calibrated_values() #get values from scan
spec = [float(i) for i in list(values)] #convert results from string to float
temperature = round(sensor_bme680.data.temperature, 2)
pressure = round (sensor_bme680.data.pressure, 2)
humidity = round(sensor_bme680.data.humidity, 2)
gas_resistance = round(sensor_bme680.data.gas_resistance, 2)
red_light = round(spec[0], 4)
orange_light = round(spec[1], 4)
yellow_light = round(spec[2], 4)
green_light = round(spec[3], 4)
blue_light = round(spec[4], 4)
violet_light = round(spec[5], 4)
today = date.today()
now = datetime.datetime.now().time()
# Inform the user!
print('Adding this data to the spreadsheet:')
print(today)
print(now)
print('{}*C {}hPa {}% {}res microM microM microM microM microM microM'.format(temperature, pressure, humidity, gas_resistance, red_light,orange_light,yellow_light,green_light,blue_light,violet_light))
# Append data to the spreadsheet
row = (today, now, temperature, pressure, humidity, gas_resistance, red_light,orange_light,yellow_light,green_light,blue_light,violet_light)
sheet.append(row)
#Save the workbook
wb.save('/home/pi/Documents/sensors/data.xlsx')
# Wait for 10 minutes seconds (600 seconds)
time.sleep(10)
finally:
# Make sure the workbook is saved!
wb.save('/mypath/data.xlsx')
print('Goodbye!')

I personally like to work with sqlalchemy in most cases when interacting with databases from python. it represents table-definitions as classes and for adding a row to your db, you only have to create an object of your class and add it via sqlalchemy commands to database. Therefore, you have to define your database in python, so that its structure is known to your code.
for an example, I assume we only have one table in your database, having the same columns as your excel sheet. the definition of your table and creation of your db (a local sqlite db created in the same folder this script is in) would look like this as a script (lets call this script db.py):
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Float, DateTime, Date
from sqlalchemy import create_engine
engine = create_engine('sqlite:///foo.db')
Base = declarative_base()
class Example(Base):
id = Column(Integer, primary_key=True)
temperature = Column(Float)
humidity = Column(Float)
.
.
# your other variables
.
.
today = Column(Date)
now = Column(DateTime)
if __name__ == '__main__':
Base.metadata.create_all(engine)
after running the above script, in your script (the one you posted) you have to import your Example class and replace the line where you add a row to excel with one where you add an Example object (after creating it) to your database.
import time
import datetime
import bme680
from as7262 import AS7262
from datetime import date
from openpyxl import load_workbook
from db import Example
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
# create a session for writing to your db
engine = create_engine('sqlite:///foo.db')
Session = sessionmaker(bind=engine)
session = Session()
as7262 = AS7262()
as7262.set_gain(1) # 1, 3.7, 16, 64
as7262.set_integration_time(10) #1 to 255 x 2.8ms exposure
#mode 0 - bank 1 only continuous, mode 1 - bank 2 only continuous, mode 2 - both banks continuous, mode 3 - both banks single read
as7262.set_measurement_mode(2) #2 all colours continuous
as7262.set_illumination_led_current(12.5) #12.5mA 25mA 50mA 100mA
as7262.set_illumination_led(0)
sensor_bme680 = bme680.BME680()
try:
while True:
example_object = Example(
temperature = round(sensor_bme680.data.temperature, 2),
humidity = round(sensor_bme680.data.humidity, 2),
.
.
# you other attributes
.
.
today = date.today(),
now = datetime.datetime.now().time())
# Inform the user!
print('Adding this data to the spreadsheet:')
print(today)
print(now)
print('{}*C {}hPa {}% {}res microM microM microM microM microM microM'.format(example_object.temperature, example_object.pressure, example_object.humidity, example_object.gas_resistance, example_object.red_light,example_object.orange_light,example_object.yellow_light,example_object.green_light,example_object.blue_light,example_object.violet_light))
# Add object to database
session.add(example_object)
session.commit()
finally:
print('Goodbye!')

For directly posting data in Database instead of Excel sheet you can use mariadb in python.
First download mariadb in RaspberryPi and setup a database with desired tables. Then you can add below mentioned code for connection to your program.
for example:
mariadb_connection = mariadb.connect(user='username', password='password', database='databasename')
cursor= mariadb_connection.cursor()
Query1="Your query that you want to run"
cursor.execute(Query1,"anyvalue that will be passed")
mariadb_connection.commit();

Python tkinter: create a dynamic dropdown menu and call different actions after selection

I am pretty new to python and this is the first time I use tkinter so I hope someone can help me to find the right direction.
Basically this is what I would like to achieve:
I retrieve from an XML 2 lists (APPs, IDs);
The APP List will be shown in a Dropdown menu;
The APP selection in the Dropdown menu will call the APP status using its ID.
I can't get the last point work, basically I think I understand why (I have no matching between the two lists or a function to match them, and the selection calls automatically the last ID of second list) but I am to the best of my knowledge not able to solve it.
import requests
import xml.etree.ElementTree as ET
import tkinter as tk
APP_OPTIONS = []
ID_OPTIONS = []
session = requests.Session()
session.auth = ('USER', 'PW')
applications = session.get('https://getapplicationslist.myurl.com/application/')
applications_xml = applications.content
root = ET.fromstring(applications_xml)
for application in root.findall('application'):
app_name = application.find('name').text
app_id = application.find('id').text
APP_OPTIONS.append(app_name)
ID_OPTIONS.append(app_id)
def appcall(*args):
app_status = session.get('https://getapplicationstatus.myurl.com?Id=' + app_id)
status_xml = app_status.content
root = ET.fromstring(status_xml)
for appStatus in root.findall('appStatus'):
status = appStatus.find('status').text
print(status)
root = tk.Tk()
root.title('Application List')
root.geometry("300x200")
var =tk.StringVar(root)
var.set('Choose an Application')
var.trace('w', appcall)
dropDownMenu = tk.OptionMenu(root, var, *APP_OPTIONS)
dropDownMenu.pack()
root.mainloop()
print('End Request')

As mentioned in my comment, the issue is your app_id in appcall does not change. You need to get the corresponding ID from the ID_OPTIONS instead.
def appcall(*args):
app_id = ID_OPTIONS[APP_OPTIONS.index(var.get())] # Add this line
app_status = session.get('https://getapplicationstatus.myurl.com?Id=' + app_id)
...
The app_id is now set to the ID_OPTIONS of the same index based on the app_name (since the insertion order is the same).
However, a better approach would be to initialize your options as a dictionary instead:
# instead of APP_OPTIONS / ID_OPTIONS, create:
apps = {}
...
for application in root.findall('application'):
app_name = application.find('name').text
app_id = application.find('id').text
# add to dictionary here:
apps[app_name] = app_id
def appcall(*args):
# Change the app_id to apps.get(var.get())
app_status = session.get('https://getapplicationstatus.myurl.com?Id=' + apps.get(var.get())
...
See how much simpler it is to recall the same reference?
If you are feeling comfortable about the language, you might even opt for a dictionary comprehension:
...
root = ET.fromstring(applications_xml)
app_id = {application.find('name').text: application.find('id').text for application in root.findall('application')}
...

Python / Django compare and update model objects

Iv only just started python but have learned a lot over the last few month, now I have hit a wall about updating objects on a model at a good speed.
I have a model called Products and this is populated from a csv file, every day this file get updated with changes like cost, and quantity, I can compare each line of the file with the Products Model but having 120k lines this takes 3-4hours.
What process can I take to make this process this file faster. I only want to modify the objects if cost and quantity have changed
Any suggestions how I tackle this?
Ver3 of what i have tried.
from django.core.management import BaseCommand
from multiprocessing import Pool
from django.contrib.auth.models import User
from pprint import pprint
from CentralControl.models import Product, Supplier
from CentralControl.management.helpers.map_ingram import *
from CentralControl.management.helpers.helper_generic import *
from tqdm import tqdm
from CentralControl.management.helpers.config import get_ingram
import os, sys, csv, zipfile, CentralControl
# Run Script as 'SYSTEM'
user = User.objects.get(id=1)
# Get Connection config.
SUPPLIER_CODE, FILE_LOCATION, FILE_NAME = get_ingram()
class Command(BaseCommand):
def handle(self, *args, **options):
list_in = get_file()
list_current = get_current_list()
pool = Pool(6)
pool.map(compare_lists(list_in, list_current))
pool.close()
def compare_lists(list_in, list_current):
for row_current in tqdm(list_current):
for row_in in list_in:
if row_in['order_code'] == row_current['order_code']:
#do more stuff here.
pass
def get_current_list():
try:
supplier = Supplier.objects.get(code='440040')
current_list = Product.objects.filter(supplier=supplier).values()
return current_list
except:
print('Error no products with supplier')
exit()
def get_file():
with zipfile.ZipFile(FILE_LOCATION + 'incoming/' + FILE_NAME, 'r') as zip:
with zip.open('228688 .csv') as csvfile:
reader = csv.DictReader(csvfile)
list_in = (list(reader))
for row in tqdm(list_in):
row['order_code'] = row.pop('Ingram Part Number')
row['order_code'] = (row['order_code']).lstrip("0")
row['name'] = row.pop('Ingram Part Description')
row['description'] = row.pop('Material Long Description')
row['mpn'] = row.pop('Vendor Part Number')
row['gtin'] = row.pop('EANUPC Code')
row['nett_cost'] = row.pop('Customer Price')
row['retail_price'] = row.pop('Retail Price')
row['qty_at_supplier'] = row.pop('Available Quantity')
row['backorder_date'] = row.pop('Backlog ETA')
row['backorder_date'] = (row['backorder_date'])
row['backorder_qty'] = row.pop('Backlog Information')
zip.close()
#commented out for dev precess.
#os.rename(FILE_LOCATION + 'incoming/' + FILE_NAME, FILE_LOCATION + 'processed/' + FILE_NAME)
return list_in

I have once faced a problem of slow load of data, I can tell you what i did maybe it can help you somehow, I passed the execution to debug mode and tried to find out wich colomn is causing the slow loading, and everytime i see that a colomn is causing the problem I add an index on it (in the SGBD --> postgreSQL in my case), and it worked. I hope that you are facing the same problem so my answer can help you.

Here it's rough idea:
1, when reading csv, use pandas as suggest by #BearBrow into array_csv
2, convert the obj data from Django into Numpy Arrary array_obj
3, don't compare them one by one , using numpy substraction
compare_index = (array_csv[['cost',['quantity']]] - array[['cost',['quantity']]] == 0)
4, find the updated column
obj_need_updated = array_obj[np.logic_any(compare_index['cost'], compare['quantity'])]
then use Django bulk update https://github.com/aykut/django-bulk-update to bulk update
Hope this will give you hints to speed up your code

Python: Looping a certain amount of time in order different process

I made a script which is suppose to use Tkinter to allow to choose and load files and store their content in different objects and then process each of these documents.
I would like to make the script able to process only a certain amount of documents determined by a question (the value is stored under "File_number")
For exemple: if at the question "how many files do you want to compare?"
the user enter 3
I would like the tkinter openfile window to ask only for 3 files then keep going
I am using the If Else statement like below
but it doesn't seem to work well and the code is really not pythonic.
Is there a better/shorter way to perform the same?
Thanks
My script look like this
import pandas as pd
from pandas import *
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import pylab
import pandas.io.data
import os
import Tkinter
from Tkinter import *
import tkFileDialog
import tkSimpleDialog
from tkFileDialog import askopenfilename
import sys
# Set up GUI
root = Tkinter.Tk(); root.withdraw()
# Prompt for user info
File_number = tkSimpleDialog.askinteger("File number", "How many files do you want to compare?")
# Prompt for file explorer
# Also extract the file_name
process_a = 0
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc1 = tkFileDialog.askopenfilename(parent=root, title='Choose file 1')
fileloc1_name_clean = os.path.splitext(fileloc1)[0]
fileloc1_name = os.path.basename(fileloc1_name_clean)
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc2 = tkFileDialog.askopenfilename(parent=root, title='Choose file 2')
fileloc2_name_clean = os.path.splitext(fileloc2)[0]
fileloc2_name = os.path.basename(fileloc2_name_clean)
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc3 = tkFileDialog.askopenfilename(parent=root, title='Choose file 3')
fileloc3_name_clean = os.path.splitext(fileloc3)[0]
fileloc3_name = os.path.basename(fileloc3_name_clean)
EDIT 1
The next part of my script is:
dfa_1 = pd.read_csv(fileloc1, delimiter='\t')
dfa_nodupli = dfa_1.drop_duplicates(cols='N', take_last=False)
dfa_nodu_2pep = dfa_nodupli[(dfa_nodupli['Peptides(95%)'] > 1)]
dfa_nodu_2pep = dfa_nodu_2pep[~dfa_nodu_2pep['Name'].str.contains('Keratin')]
dfa_nodu_2pep.to_csv(fileloc1_name + ".csv")
dfb_1 = pd.read_csv(fileloc2, delimiter='\t')
dfb_nodupli = dfb_1.drop_duplicates(cols='N', take_last=False)
dfb_nodu_2pep = dfb_nodupli[(dfb_nodupli['Peptides(95%)'] > 1)]
dfb_nodu_2pep = dfb_nodu_2pep[~dfb_nodu_2pep['Name'].str.contains('Keratin')]
dfb_nodu_2pep.to_csv(fileloc2_name + ".csv")

I modified your code, so that it works, in a way you want it ( I hope).
import Tkinter
import tkFileDialog
import tkSimpleDialog
from tkFileDialog import askopenfilename
import os
# Set up GUI
def main():
root = Tkinter.Tk();
root.withdraw()
# Prompt for user info
File_number = tkSimpleDialog.askinteger("File number",
"How many files do you want to compare?")
if not File_number:
return
user_fiels = []
max_file_no = int(File_number)
current_file = 1;
while(current_file <= max_file_no):
fileloc = tkFileDialog.askopenfilename(parent=root, title='Choose file {}'.format(current_file))
if not fileloc:
continue
fileloc_name_clean = os.path.splitext(fileloc)[0]
fileloc_name = os.path.basename(fileloc_name_clean)
user_fiels.append([fileloc, fileloc_name_clean, fileloc_name])
current_file += 1
#print(fileloc_name_clean, fileloc_name)
print(user_fiels)
main()
I use while loop to get file paths as many times as you want.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to delete X% of entries from a DynamoDB Table? - python

Related

iterating intergers over a method from a range

Python code to send data from I2C sensor to a local SQL database

Python tkinter: create a dynamic dropdown menu and call different actions after selection

Python / Django compare and update model objects

Python: Looping a certain amount of time in order different process

Categories

Resources