I have created this monitoring class that updates some Counter Metrics according to some logic. I have attached the code. Please can someone explain to me why would my registry be empty even after I add the test metric.
import logging
from prometheus_client import (
CollectorRegistry,
Counter,
start_http_server
)
class Reporter:
def __init__(self):
self._set_counters()
start_http_server(8080, registry=self.registry)
def _set_counters(self):
self.registry = CollectorRegistry()
self.bycounter = Counter(
'bycounter',
'blah blah',
['by', 'level0top'],
registry=self.registry
)
self.bycounter.labels(by='test', level0top='test').inc()
I am trying to test the metrics like
import unittest
from sc_eol.monitoring import TodayDataReporter
from sc_eol.sc_eol_utils import generate_query_url
reporter = TodayDataReporter()
class TestTodayDataReporter(unittest.TestCase):
#staticmethod
def test_publish():
by = 'level1'
parse_query = {'level0top' : 'WSJ2', 'date' : '2021-11-01'}
start = '2021-11-01'
print(dir(reporter.registry))
reporter.registry.collect()
before = reporter.registry.get_sample_value('bycounter', ['level1', 'WSJ2'])
print("BEFOREEE", before)
reporter.registry.collect()
generate_query_url(by, start, parse_query, reporter)
before = reporter.registry.get_sample_value('bycounter', {'by':'level1', 'level0top': 'WSJ2'})
reporter.registry.collect()
print("After", before)
if __name__ == "__main__":
unittest.main()
Why is the bycounter None?
How do I test if a server is running at port 8080 or not
before = reporter.registry.get_sample_value('bycounter', ['level1', 'WSJ2'])
The counter is being renamed as bycounter_total
I'm building simple app on streamlit. I'm struggling with making list of chooses depending on user permissions.
utils.py
Dict with pages:
pages = {
"Start": start_page,
"Test kabli": cable_test_page,
"ARP": arp_page,
"FGRestart": fg_restart_page,
"MACTable": mac_table_page,
"PingGateWAN": ping_gate_wan_page,
"PInterface": psyhical_interface_page,
"RoutingTable": routing_table_page,
"TestŁączaWWAN(LTE)": wan_lte_test_page,
"WanMAC": wan_mac_page,
"TestŁączaWAN": wan_test_page,
"ResetPortów": reset_poe_page,
"RestartSwitcha": switch_restart_page,
}
Function for selecting items from a list
def select_page(ssh):
page = st.selectbox("Select item", tuple(pages.keys()))
pages[page](ssh)
Permissions to each item in a list are made like this:
permissions = {
'cable_diag': ["user1","user2","user3"],
'ping':[ "user1","user2","user3"],
'arp': ["user1","user2","user3"],
'fgrestart':["user1","user2","user3"],
'mactable':["user1","user2","user3"],
'Pinterface':["user1","user2","user3"],
'poe':["user1","user2","user3"],
'routingtable':["user1","user3"],
'srestart':["user1","user2","user3"],
'lte':["user2","user3"],
'wanmac':["user1","user2","user3"],
'wan':["user2","user3"],}
def decorator(func):
#wraps(func)
def wrapper(*args, **kwargs):
if st.session_state["username"] in permissions[module_name]:
print('Accessed')
return func(*args, **kwargs)
else:
st.text('ACCESS DENIED')
return wrapper
return decorator
All file with page have assigned role like this:
#has_role('page_name')
It's working but i want that if user1 don't have permissions to site 'wan' i also want that he will not be able to see this page in the list. I really don't have idea how to solve it
Full utils.py
import streamlit as st
import pandas as pd
import paramiko
from permissions import permissions
from modules.streamlit.pages.cable_test import cable_test_page
from modules.streamlit.pages.arp import arp_page
from modules.streamlit.pages.fg_restart import fg_restart_page
from modules.streamlit.pages.mac_table import mac_table_page
from modules.streamlit.pages.ping_gate_wan import ping_gate_wan_page
from modules.streamlit.pages.psyhical_interface import psyhical_interface_page
from modules.streamlit.pages.routing_table import routing_table_page
from modules.streamlit.pages.switch_restart import switch_restart_page
from modules.streamlit.pages.wan_lte_test import wan_lte_test_page
from modules.streamlit.pages.wan_mac import wan_mac_page
from modules.streamlit.pages.wan_test import wan_test_page
from modules.streamlit.pages.reset_poe import reset_poe_page
from modules.streamlit.pages.start import start_page
from modules.streamlit.pages.test import test_page
pages = {
"Start": start_page,
"Test kabli": cable_test_page,
"ARP": arp_page,
"FGRestart": fg_restart_page,
"MACTable": mac_table_page,
"PingGateWAN": ping_gate_wan_page,
"PInterface": psyhical_interface_page,
"RoutingTable": routing_table_page,
"TestŁączaWWAN(LTE)": wan_lte_test_page,
"WanMAC": wan_mac_page,
"TestŁączaWAN": wan_test_page,
"ResetPortów": reset_poe_page,
"RestartSwitcha": switch_restart_page,
}
custom_options = {"sideBar": False, "enableCellTextSelection": True}
#st.cache
def load_markets_xls():
df = pd.read_excel("path",
index_col=None, engine="openpyxl")
return df
def choose_market(df):
shop_number = st.text_input('Number', '')
if shop_number:
df = df[df["Host_Name"].notna()]
df['Host_Name'] = df['Host_Name'].astype(int).astype(str)
try:
single_row = df.loc[df['Host_Name'] == shop_number].to_dict('records')[0]
ip = single_row['ip']
return ip
except IndexError:
st.text("No found ")
def connect_to_market(ssh,market_ip):
print(market_ip)
try:
ssh.connect(hostname=str(market_ip), port=22, username='user',
password='password', allow_agent=False, timeout=None, compress=False)
st.text('Connected!')
return True
except Exception as e:
st.text('----- ')
return False
def select_page(ssh):
page = st.selectbox("Choose", tuple(pages.keys()))
pages[page](ssh)
Full permissions.py:
from functools import wraps
import streamlit as st
import pandas as pd
import paramiko
permissions = {
'cable_diag': ["user1","user2","user3"],
'ping':[ "user1","user2","user3"],
'arp': ["user1","user2","user3"],
'fgrestart':["user1","user2","user3"],
'mactable':["user1","user2","user3"],
'Pinterface':["user1","user2","user3"],
'poe':["user1","user2","user3"],
'routingtable':["user1","user3"],
'srestart':["user1","user2","user3"],
'lte':["user2","user3"],
'wanmac':["user1","user2","user3"],
'wan':["user2","user3"],}
def has_role(module_name):
def decorator(func):
#wraps(func)
def wrapper(*args, **kwargs):
if st.session_state["username"] in permissions[module_name]:
print('jest dostep')
return func(*args, **kwargs)
else:
st.text('ACCESS DENIED!')
return wrapper
return decorator```
app.py:
import streamlit as st
import pandas as pd
import paramiko
import streamlit_authenticator as stauth
from utils import load_markets_xls, choose_market, connect_to_market, select_page
from cred import hashed_passwords , names, usernames
from PIL import Image
import base64
ssh = paramiko.SSHClient()
# ssh.load_system_host_keys()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
hide_menu_style = """
<style>
#MainMenu {visibility: hidden;}
</style>
"""
custom_options = {"sideBar": False, "enableCellTextSelection": True}
def main():
st.set_page_config(page_title='Market Tests', layout="wide")
st.markdown(hide_menu_style, unsafe_allow_html=True)
authenticator = stauth.Authenticate(names,usernames,hashed_passwords, 'cookie','some_key', cookie_expiry_days=0)
col1, col2, col3 = st.columns(3)
with col1:
st.write(' ')
with col2:
image = Image.open('path')
st.image(image)
with col3:
st.write(' ')
name, authentication_status, username = authenticator.login('Panel Logowania','main')
if authentication_status:
authenticator.logout('Logout', 'main')
data_load_state = st.text('Loading data...')
df = load_markets_xls()
data_load_state.text('Markets loaded!')
market_ip = choose_market(df)
if (market_ip):
if connect_to_market(ssh, market_ip):
select_page(ssh)
elif authentication_status == False:
st.error('Username/password is incorrect')
elif authentication_status == None:
st.warning('Please enter your username and password')
def refresh_data():
st.legacy_caching.clear_cache()
raise st.script_runner.RerunException(st.script_request_queue.RerunData(None))
if __name__ == "__main__":
main()
sample page:
import streamlit as st
from permissions import has_role
custom_options = {
"enableCellTextSelection": True,
"sideBar":True
}
#has_role('arp')
def arp_page(ssh):
st.title('ARP')
stdin, stdout, stderr = ssh.exec_command("get system arp")
for line in iter(stdout.readline, ""):
st.text(line)
I am getting an error while implementing the below code:
'''
from pandas.core.frame import DataFrame
import cx_Oracle
import pandas as pd
import sys
class IFTDataCore:
def __init__(self, accountCode):
i = 0
all_Procedures = []
dns_tns = cx_Oracle.makedsn("gbhenora06vd.corp.amvescap.net", "1525", "INVU")
db=cx_Oracle.connect("CORP-SVC-IFT", "C$Rp$vc1ftUat",dns_tns)
cursor = db.cursor()
cursor.execute("select procedure_name from all_procedures where object_name = 'PK_IVZ_IFT_EXTRACT' ")
rows = cursor.fetchall()
procedureName = ['PK_IVZ_IFT_EXTRACT.'+str(list(rows[indexRow])[0]) for indexRow in range(0,len(list(rows)))]
l_cur = cursor.var(cx_Oracle.CURSOR)
while i < len(procedureName):
if procedureName[i] == 'PK_IVZ_IFT_EXTRACT.SP_IVZ_IFT_EXTRACT_ACCOUNTS':
ret_cursor = cursor.callproc(procedureName[i],(l_cur,))
dfx = pd.DataFrame(ret_cursor[0])
all_Procedures.append(dfx)
else:
ret_cursor = cursor.callproc(procedureName[i],(l_cur,accountCode))
dfx = pd.DataFrame(ret_cursor[0])
all_Procedures.append(dfx)
i += 1
self.all_Procedures = all_Procedures
cursor.close()
db.close()
#property
def getallProcedures(self):
return self.all_Procedures
if __name__ == '__main__':
Procedures = []
all_Proc = IFTDataCore('TOUHI')
Procedures = all_Proc.getallProcedures()
print(Procedures[0])
PS: The code works fine if I do not put the logic in init and call the def logic directly in code. Please let me know the possible reason why when class initialization is done in main, the definition starts throwing error.
The solution works fine now as per the below code:
from pandas.core.frame import DataFrame
import cx_Oracle
import pandas as pd
import sys
import json
from pathlib import Path
import os
class IFTDataCore:
def __init__(self):
try:
db = cx_Oracle.connect('invest/invest#INVD.WORLD')
cursor = db.cursor()
cursor.execute("select procedure_name from all_procedures where object_name = 'PK_IVZ_IFT_EXTRACT' ")
rows = cursor.fetchall()
procedureName = ['PK_IVZ_IFT_EXTRACT.'+str(list(rows[indexRow])[0]) for indexRow in range(0,len(list(rows))-1)]
# To convert Accounts procedure to JSON format
l_cur_Account = cursor.var(cx_Oracle.CURSOR)
ret_cursor_Account = cursor.callproc(procedureName[1],(l_cur_Account,))
self.dfx_Account = pd.DataFrame(ret_cursor_Account[0])
self.dfx_Account.columns = ['fundCode', 'fundName', 'legalEntitiyIdentifier','isin']
result_Account = self.dfx_Account.to_json(orient='records')
except BaseException as e:
raise
def lambda_handler(event, context):
positional_data = IFTDataCore()
df_acct = positional_data.dfx_Account
df_acct=df_acct.fillna("")
Json=df_acct.to_json(orient='records')
lambda_response = __lambda_response__('200', Json)
return lambda_response
def __lambda_response__(status_code, response_body):
return {
'statusCode': status_code,
'headers': {
'Access-Control-Allow-Headers': 'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'OPTIONS,GET'
},
'body': response_body
}
I have a class called martCrawler which import other 3 crawlers from other files
However, the code becomes too long as the crawler imported increased.
Is there a better practice managing the code like this?
Thanks in advance~
from Scripts_mart.wat_ver2 import crawler_watsons
from Scripts_mart.cosmed_ver1 import crawler_cosmed
from Scripts_mart.pxmart_ver1 import crawler_pxmart
import datetime
class martCrawler():
def __init__(self):
self.wat = crawler_watsons()
self.cos = crawler_cosmed()
self.pxm = crawler_pxmart()
def crawler_testing(self):
result_pack = {}
wat_result = self.wat.run_before_insert()
cos_result = self.cos.run_before_insert()
pxm_result = self.pxm.run_before_insert()
result_pack['wat'] = wat_result
result_pack['cos'] = cos_result
result_pack['pxm'] = pxm_result
return result_pack
...
Then why not store all the crawlers in a dict from the beginning?
As an example:
from Scripts_mart.wat_ver2 import crawler_watsons
from Scripts_mart.cosmed_ver1 import crawler_cosmed
from Scripts_mart.pxmart_ver1 import crawler_pxmart
class MartCrawler():
def __init__(self, *args):
self.crawlers = {}
for crawler in args:
# use some introspection to get the name of the classes.
# The names should be "crawler_watsons", etc
self.crawlers[crawler.__name__] = crawler()
def crawler_testing(self):
result_pack = {}
for name, crawler in self.crawlers.items():
result_back[name] = crawler.run_before_insert()
return result_back
martCrawler = MartCrawler(crawler_watsons, crawler_cosmed, crawler_pxmart)
Just have in mind that the names in your dict will be the actual names of the imported classes, not 'wat', 'pos' and 'pxm'. But if this is a problem you can use some string manipulation and/or regexes to fix it.
For example you could replace crawler.__name__ with crawler.__name__[8:11]
Just put them into a dictionary:
from Scripts_mart.wat_ver2 import crawler_watsons
from Scripts_mart.cosmed_ver1 import crawler_cosmed
from Scripts_mart.pxmart_ver1 import crawler_pxmart
import datetime
class martCrawler():
def __init__(self):
self.crawlers = {
"wat": crawler_watsons(),
"cos": crawler_cosmed(),
"pxm": crawler_pxmart()
}
def crawler_testing(self):
result_pack = {}
for key in self.crawlers:
result_pack[key] = self.crawlers[key].run_before_insert()
return result_pack
I am looking to fetch and publish data from spark streaming onto cloudant. My code is as follows -
from CloudantPublisher import CloudantPublisher
from CloudantFetcher import CloudantFetcher
import pyspark
from pyspark.streaming.kafka import KafkaUtils
from pyspark import SparkContext, SparkConf, SQLContext
from pyspark.streaming import StreamingContext
from pyspark.sql import SparkSession
from kafka import KafkaConsumer, KafkaProducer
import json
class SampleFramework():
def __init__(self):
pass
#staticmethod
def messageHandler(m):
return json.loads(m.message)
#staticmethod
def processData(rdd):
if (rdd.isEmpty()):
SampleFramework.logger.info("RDD is empty")
return
# Expand
expanded_rdd = rdd.mapPartitions(CloudantFetcher.fetch)
expanded_rdd.foreachPartition(CloudantPublisher.publish)
def run(self, ssc):
self.ssc = ssc
directKafkaStream = KafkaUtils.createDirectStream(self.ssc, SUBSCRIBE_QUEUE], \
{"metadata.broker.list": METADATA, \
"bootstrap.servers": BOOTSTRAP_SERVERS}, \
messageHandler= SampleFramework.messageHandler)
directKafkaStream.foreachRDD(SampleFramework.processData)
ssc.start()
ssc.awaitTermination()
Other supporting classes -
from CloudantConnector import CloudantConnector
class CloudantFetcher:
config = Config.createConfig()
cloudantConnector = CloudantConnector(config)
#staticmethod
def fetch(data):
final_data = []
for row in data:
id = row["id"]
if(not CloudantFetcher.cloudantConnector.isReady()):
CloudantFetcher.cloudantConnector.open()
data_json = CloudantFetcher.cloudantConnector.getOne({"id": id})
row["data"] = data_json
final_data.append(row)
CloudantFetcher.cloudantConnector.close()
return final_data
class CloudantPublisher:
config = Config.createConfig()
cloudantConnector = CloudantConnector(config)
#staticmethod
def publish(data):
CloudantPublisher.cloudantConnector.open()
CloudantPublisher.cloudantConnector.postAll(data)
CloudantPublisher.cloudantConnector.close()
from cloudant.client import Cloudant
from cloudant.result import Result
from cloudant.result import QueryResult
from cloudant.document import Document
from cloudant.query import Query
from cloudant.database import CloudantDatabase
import json
class CloudantConnector:
def __init__(self, config, db_name):
self.config = config
self.client = Cloudant(self.config["cloudant"]["credentials"]["username"], self.config["cloudant"]["credentials"]["password"], url=self.config["cloudant"]["host"]["full"])
self.initialized = False
self.db_name = self.config["cloudant"]["host"]["db_name"]
def open(self):
try:
self.client.connect()
self.logger.info("Connection to Cloudant established.")
self.initialized = True
except:
raise Exception("Could not connect to Cloudant! Please verify credentials.")
self.database = CloudantDatabase(self.client,self.db_name)
if self.database.exists():
pass
else:
self.database.create()
def isReady(self):
return self.initialized
def close(self):
self.client.disconnect()
def getOne(self, query):
new_filter = query
query = Query(self.database, selector = query, limit=1)
results_string = json.dumps(query.result[0][0])
results_json = json.loads(results_string)
return results_json
def postAll(self, docs):
documents = []
quantum = self.config["cloudant"]["constants"]["bulk_quantum"]
count = 0
for doc in docs:
document = Document(self.database)
document["id"] = doc["id"]
document["data"] = doc["data"]
documents.append(document)
count = count + 1
if(count%quantum==0):
self.database.bulk_docs(documents)
documents = []
if(len(documents)!=0):
self.database.bulk_docs(documents)
self.logger.debug("Uploaded document to the Cloudant database.")
My implementation works, but it's slow as compared to what I would expect in the case of not initializing the cloudant connection in each partition and maintaining a static source of these connection threads which can be passed on to each partition to use/ fetched by each partition to use.
My Questions are as follows:
Do I need to create a connection pool with cloudant 2.0 API in python? (It seems that it already exists within the API). If yes, then how should I go about it? The closest I have seen an implementation is this - link, but it's on an outdated cloudant api and does not work with the new one.
If the answer to the above is 'Yes', How can I make this accessible to the workers? I see references to creating serializable, lazily instantiated connection-client objects here. This would mean that I would make a lazily instantiated cloudant connection object in the SampleFramework. How can I do this in Python? Just like given in the spark documentation.
connection = ConnectionPool.getConnection()
for record in iter:
connection.send(record)
ConnectionPool.returnConnection(connection)
If the above is not possible, how do I speed up my operations? The only alternative I can think off is maintaining a single connection on the driver program, collecting the data from all workers and then fetching/uploading the same. This would decrease the number of times I need to connect to cloudant, but would take away the distributed fetching/publishing architecture.