Is there a way to read data from redis server using pandas? - python

I'm working on a project with IoT devices that are connected to a dotnet server hosted in the Azure cloud. I'm currently using for loops to read real-time data but want to read some real-time stats from the Redis database using Pandas. Can Someone explain to me the way how to start with?
Using the below script to read stats but want to start using pandas.
import os
import re
import json
import traceback
from collections import Counter
import time
import datetime as dt
import redis
from tqdm import tqdm # taqadum (تقدّم) == progress
from jsonpointer import resolve_pointer as j_get
from jsonpointer import JsonPointerException
import pandas as pd
os.system("color 0c") # change console color to red
if False:
# x Redis
r = redis.Redis(host="****.redis.cache.windows.net",
port=***,
password="***",
ssl=True,)
else:
# y Redis
r = redis.Redis(host="***.redis.cache.windows.net",
port=****,
password="*****",
ssl=True,)
print(r.info())
print("Server started at: ", end="")
print(dt.datetime.now() - dt.timedelta(seconds=r.info()['uptime_in_seconds']))
print("Building pipe")
pipe = r.pipeline()
# for key in tqdm(r.scan_iter("MC:SessionInfo*")):
for key in tqdm(r.scan_iter("MC:SessionInfo*", count=2500)):
pipe.hgetall(key)
print("Executing pipe")
responses = pipe.execute()
print("Processing effluvia")
q = {}
k={}
first = True
last_contact = {}
for data in tqdm(responses):
try:
j = json.loads(data[b'LastStatusBody'])
serial = j['System']['Serial'].lower()
q[serial] = j
last_contact[serial] = time.time() - int(data[b'LastContact'])
# TODO: json searching sensibly!
vac[serial] = j['LiveA']['Unit']['Volatge_Vac']
except:
if first:
traceback.print_exc()
first = False
else:
pass
for key,value in fw_versions.items():
if value.split(',')[0]=="xx v1.0.0.0":
x_paired.append(key)
print(x_paired)
print("Total paired :", len(x_paired))`
Instead of the above procedure want to start with Pandas to read data easily and do some charts for daily updates to the team.

I serialize / deserialize to pyarrow or pickle and then use an additional key as meta data. This works across local, GCloud, AWS EB and Azure
import pandas as pd
import pyarrow as pa, os
import redis,json, os, pickle
import ebutils
from logenv import logenv
from pandas.core.frame import DataFrame
from redis.client import Redis
from typing import (Union, Optional)
class mycache():
__redisClient:Redis
CONFIGKEY = "cacheconfig"
def __init__(self) -> None:
try:
ep = os.environ["REDIS_HOST"]
except KeyError:
if os.environ["HOST_ENV"] == "GCLOUD":
os.environ["REDIS_HOST"] = "redis://10.0.0.3"
elif os.environ["HOST_ENV"] == "EB":
os.environ["REDIS_HOST"] = "redis://" + ebutils.get_redis_endpoint()
elif os.environ["HOST_ENV"] == "AZURE":
#os.environ["REDIS_HOST"] = "redis://ignore:password#redis-sensorvenv.redis.cache.windows.net"
pass # should be set in azure env variable
elif os.environ["HOST_ENV"] == "LOCAL":
os.environ["REDIS_HOST"] = "redis://127.0.0.1"
else:
raise "could not initialise redis"
return # no known redis setup
#self.__redisClient = redis.Redis(host=os.environ["REDIS_HOST"])
self.__redisClient = redis.Redis.from_url(os.environ["REDIS_HOST"])
self.__redisClient.ping()
# get config as well...
self.config = self.get(self.CONFIGKEY)
if self.config is None:
self.config = {"pyarrow":True, "pickle":False}
self.set(self.CONFIGKEY, self.config)
self.alog = logenv.alog()
def redis(self) -> Redis:
return self.__redisClient
def exists(self, key:str) -> bool:
if self.__redisClient is None:
return False
return self.__redisClient.exists(key) == 1
def get(self, key:str) -> Union[DataFrame, str]:
keytype = "{k}.type".format(k=key)
valuetype = self.__redisClient.get(keytype)
if valuetype is None:
if (key.split(".")[-1] == "pickle"):
return pickle.loads(self.redis().get(key))
else:
ret = self.redis().get(key)
if ret is None:
return ret
else:
return ret.decode()
elif valuetype.decode() == str(pd.DataFrame):
# fallback to pickle serialized form if pyarrow fails
# https://issues.apache.org/jira/browse/ARROW-7961
try:
return pa.deserialize(self.__redisClient.get(key))
except pa.lib.ArrowIOError as err:
self.alog.warning("using pickle from cache %s - %s - %s", key, pa.__version__, str(err))
return pickle.loads(self.redis().get(f"{key}.pickle"))
except OSError as err:
if "Expected IPC" in str(err):
self.alog.warning("using pickle from cache %s - %s - %s", key, pa.__version__, str(err))
return pickle.loads(self.redis().get(f"{key}.pickle"))
else:
raise err
elif valuetype.decode() == str(type({})):
return json.loads(self.__redisClient.get(key).decode())
else:
return self.__redisClient.get(key).decode() # type: ignore
def set(self, key:str, value:Union[DataFrame, str]) -> None:
if self.__redisClient is None:
return
keytype = "{k}.type".format(k=key)
if str(type(value)) == str(pd.DataFrame):
self.__redisClient.set(key, pa.serialize(value).to_buffer().to_pybytes())
if self.config["pickle"]:
self.redis().set(f"{key}.pickle", pickle.dumps(value))
# issue should be transient through an upgrade....
# once switched off data can go away
self.redis().expire(f"{key}.pickle", 60*60*24)
elif str(type(value)) == str(type({})):
self.__redisClient.set(key, json.dumps(value))
else:
self.__redisClient.set(key, value)
self.__redisClient.set(keytype, str(type(value)))
if __name__ == '__main__':
os.environ["HOST_ENV"] = "LOCAL"
r = mycache()
rr = r.redis()
for k in rr.keys("cache*"):
print(k.decode(), rr.ttl(k))
print(rr.get(k.decode()))

Related

Custom Python HP ILO Node Exporter not changing hostname by request

Im trying to edit this project in Python to have HP ILO exporter for Prometheus, so far I read a few articles here on stackoverflow and tried to implement some functionalities, eventually I came up to partialy working script but the hostname is not changing after first request, is there a way to dump collector?
I have tried it with try&except but it just does not work.
The goal is to use curl like this
curl localhost:9116/metrics?hostname=ip
And what will happen if there will be 10 requests at the same time with different hostname? Should it create somekind of a queue?
Can someone help me? Thanks
Original Project : https://github.com/JackWindows/ilo-exporter
My code :
#!/usr/bin/env python
import collections
import os
import time
import flask
import redfish
import waitress
from flask import Flask
from prometheus_client import make_wsgi_app
from prometheus_client.core import GaugeMetricFamily, REGISTRY
from werkzeug.middleware.dispatcher import DispatcherMiddleware
from flask import request
from time import sleep
from flask import Flask, Response, request
import traceback
from werkzeug.wsgi import ClosingIterator
class AfterResponse:
def __init__(self, app=None):
self.callbacks = []
if app:
self.init_app(app)
def __call__(self, callback):
self.callbacks.append(callback)
return callback
def init_app(self, app):
# install extension
app.after_response = self
# install middleware
app.wsgi_app = AfterResponseMiddleware(app.wsgi_app, self)
def flush(self):
for fn in self.callbacks:
try:
fn()
except Exception:
traceback.print_exc()
class AfterResponseMiddleware:
def __init__(self, application, after_response_ext):
self.application = application
self.after_response_ext = after_response_ext
def __call__(self, environ, after_response):
iterator = self.application(environ, after_response)
try:
return ClosingIterator(iterator, [self.after_response_ext.flush])
except Exception:
traceback.print_exc()
return iterator
class ILOCollector(object):
def __init__(self, hostname: str, port: int = 443, user: str = 'admin', password: str = 'password') -> None:
self.ilo = redfish.LegacyRestClient(base_url=hostname, username=user, password=password)
self.ilo.login()
system = self.ilo.get('/redfish/v1/Systems/1/').obj
self.label_names = ('hostname', 'product_name', 'sn')
self.label_values = (hostname, system.Model, system.SerialNumber.strip())
def collect(self):
embedded_media = self.ilo.get('/redfish/v1/Managers/1/EmbeddedMedia/').obj
smart_storage = self.ilo.get('/redfish/v1/Systems/1/SmartStorage/').obj
thermal = self.ilo.get('/redfish/v1/Chassis/1/Thermal/').obj
power = self.ilo.get('/redfish/v1/Chassis/1/Power/').obj
g = GaugeMetricFamily('hpilo_health',
'iLO health status, -1: Unknown, 0: OK, 1: Degraded, 2: Failed.',
labels=self.label_names + ('component',))
def status_to_code(status: str) -> int:
status = status.lower()
ret = -1
if status == 'ok':
ret = 0
elif status == 'warning':
ret = 1
elif status == 'failed':
ret = 2
return ret
g.add_metric(self.label_values + ('embedded_media',), status_to_code(embedded_media.Controller.Status.Health))
g.add_metric(self.label_values + ('smart_storage',), status_to_code(smart_storage.Status.Health))
for fan in thermal.Fans:
g.add_metric(self.label_values + (fan.FanName,), status_to_code(fan.Status.Health))
yield g
g = GaugeMetricFamily('hpilo_fan_speed', 'Fan speed in percentage.',
labels=self.label_names + ('fan',), unit='percentage')
for fan in thermal.Fans:
g.add_metric(self.label_values + (fan.FanName,), fan.CurrentReading)
yield g
sensors_by_unit = collections.defaultdict(list)
for sensor in thermal.Temperatures:
if sensor.Status.State.lower() != 'enabled':
continue
reading = sensor.CurrentReading
unit = sensor.Units
sensors_by_unit[unit].append((sensor.Name, reading))
for unit in sensors_by_unit:
g = GaugeMetricFamily('hpilo_temperature', 'Temperature sensors reading.',
labels=self.label_names + ('sensor',), unit=unit.lower())
for sensor_name, sensor_reading in sensors_by_unit[unit]:
g.add_metric(self.label_values + (sensor_name,), sensor_reading)
yield g
g = GaugeMetricFamily('hpilo_power_current', 'Current power consumption in Watts.', labels=self.label_names,
unit='watts')
g.add_metric(self.label_values, power.PowerConsumedWatts)
yield g
label_values = self.label_values + (str(power.PowerMetrics.IntervalInMin),)
g = GaugeMetricFamily('hpilo_power_average', 'Average power consumption in Watts.',
labels=self.label_names + ('IntervalInMin',), unit='watts')
g.add_metric(label_values, power.PowerMetrics.AverageConsumedWatts)
yield g
g = GaugeMetricFamily('hpilo_power_min', 'Min power consumption in Watts.',
labels=self.label_names + ('IntervalInMin',), unit='watts')
g.add_metric(label_values, power.PowerMetrics.MinConsumedWatts)
yield g
g = GaugeMetricFamily('hpilo_power_max', 'Max power consumption in Watts.',
labels=self.label_names + ('IntervalInMin',), unit='watts')
g.add_metric(label_values, power.PowerMetrics.MaxConsumedWatts)
yield g
# Create Flask app
app = Flask('iLO Exporter')
#app.route('/')
def root():
return '''<html>
<head><title>iLO Exporter</title></head>
<body>
<h1>iLO Exporter</h1>
<p><a href='/metrics'>Metrics</a></p>
</body>
</html>'''
AfterResponse(app)
#app.after_response
def say_hi():
print("hi")
#app.route("/metrics")
def home():
try:
REGISTRY.unregister(collector)
except:
print("An exception occurred")
pass
port = int(os.getenv('ILO_PORT', 443))
user = os.getenv('ILO_USER', 'admin')
password = os.getenv('ILO_PASSWORD', 'password')
hostname = request.args.get('hostname')
app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {
'/metrics': make_wsgi_app()
})
collector = ILOCollector(hostname, port, user, password)
REGISTRY.register(collector)
if __name__ == '__main__':
exporter_port = int(os.getenv('LISTEN_PORT', 9116))
waitress.serve(app, host='0.0.0.0', port=exporter_port)

fast API add module (pandas) for the command uvicorn main:app --reload

import json
import uvicorn
from fastapi import FastAPI
from starlette.requests import Request
from typing import Dict
import sqlite3
from MockAPIAssureCare.csvToDB.csvToDB import csv_to_db_cursor
import os
app = FastAPI()
def create_json(cur: sqlite3.Cursor, query_without_where_clause: str, path_params: Dict) -> Dict:
columns = []
data = cur.execute(query_without_where_clause)
for (column) in data.description:
columns.append(column[0])
where_clause = ""
for key, value in path_params.items():
if key not in columns:
continue
if where_clause == "" and value.lstrip('-').isdigit():
where_clause += f"WHERE {key} = {value} "
elif where_clause == "" and not value.lstrip('-').isdigit():
where_clause += f"WHERE {key} = '{value}' "
elif where_clause != "" and value.lstrip('-').isdigit():
where_clause += f"AND {key} = {value} "
else:
where_clause += f"AND {key} = '{value}' "
query = query_without_where_clause + where_clause
data = cur.execute(query)
dict_list = []
for row in data:
dict = {}
for i in range(len(row)):
# omit the index part
if i == 0:
continue
dict[columns[i]] = row[i]
dict_list.append(dict)
return dict_list
#app.get("/")
async def root():
return {"message": "Hello World"}
#app.get("/hello")
async def hello():
return {"message": "hello"}
def request_handler(cur, table):
#app.get(f"/{table}")
async def request(request: Request):
path_param_key_value_pair = {}
for key in request.query_params:
path_param_key_value_pair[key] = request.query_params[key]
return create_json(cur, f"Select * from {table} ", path_param_key_value_pair)
def main_wrapper():
csv_file_list = list(map(lambda file: file[:file.index('.')], os.listdir('./csvFiles')))
cur = csv_to_db_cursor(csv_file_list)
for csv_file in csv_file_list:
#app.get("/debug")
async def debug():
return "for loop has been called"
request_handler(cur, csv_file)
main_wrapper()
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
main_wrapper()
uvicorn.run(app, host="127.0.0.1", port=8000)
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
I created the following python codes for python fast API. When I run the python script directly, everything works as I expected. However, when I run the code with the command
"uvicorn main:app --reload" on the terminal, I found the module not found error:
File "..../root/csvToDB/csvToDB.py", line 2, in <module>
import pandas as pd
ModuleNotFoundError: No module named 'pandas'
I think this messes up my deployment of the API server, and I wonder if there is a way to solve it.
Thank you in advance!

Change JIRA status on code merge python code

I want to execute a Python script that closes all the tickets of JIRA once my branch is merged with master. Can any one please help me how to solve the problem?
from __future__ import with_statement
from jira import JIRA, JIRAError
from requests.exceptions import ConnectionError
import cProfile
import logging
import sys
import os
import shutil
import logging.handlers
jiraEnabled = True
dashes = "---------------------------------------------------------------------"
import contextlib
import subprocess
import re
import collections
import getpass
import traceback
import pprint
import pdb
import stat
import cookielib
import subprocess
import urllib2
import ConfigParser
import string
def main():
global username, password, loglevel, jiraCheckEnabled, url, allowed_states, check_assignee, check_state, disabled_on_branches
configure_logging(loglevel)
config_file = get_config_file("config.ini")
error_code = handle_pre_receive()
if error_code != 0:
logging.error(“Hook failed please try later\n”)
return error_code
# Performs the git "pre-receive" hook
def handle_pre_receive():
line = sys.stdin.read()
try:
(old_commit_id, new_commit_id, ref) = line.strip().split()
except ValueError:
logging.error("\n%s", dashes)
return -1
if new_commit_id == "0000000000000000000000000000000000000000":
logging.debug("Branch was deleted, going to skip commit")
return 0
if disabled_on_branch(git_get_branchname_from_ref(ref)):
return 0
commit_id_array = git_get_array_of_commit_ids(old_commit_id, new_commit_id)
if commit_id_array == None or len(commit_id_array)==0:
if old_commit_id == "0000000000000000000000000000000000000000":
logging.debug("Branch was created, going to skip commit processing")
return 0
logging.error("No new commits found!")
return -1
if jiraEnabled:
try:
jira = JIRA(url,basic_auth=(username,password))
except ConnectionError, e:
logging.error("Failed to connect to JIRA")
return 0
except JIRAError, e:
logging.error("JIRA has rejected connection” )
return 0;
else:
jira = None
def get_shell_cmd_output(cmd):
try:
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
return proc.stdout.read().rstrip('\n')
except KeyboardInterrupt:
logging.info("... interrupted")
except Exception, e:
logging.error("Failed trying to execute '%s'", cmd)
def disabled_on_branch(current_branchname):
logging.debug("Test if '%s' is disabled...", current_branchname)
if disabled_on_branches == None or string.strip(disabled_on_branches) == "":
logging.debug("All branches enabled")
return False
branchlist = string.split(disabled_on_branches, ',')
for branch in branchlist:
branch = string.strip(branch)
if current_branchname == branch:
logging.debug("Current branch '%s' is disabled", current_branchname)
return True
logging.debug("Current branch '%s' is enabled", current_branchname)
return False
def git_get_curr_branchname():
buf = get_shell_cmd_output("git branch --no-color")
# buf is a multiline output, each line containing a branch name
# the line that starts with a "*" contains the current branch name
m = re.search("^\* .*$", buf, re.MULTILINE)
if m == None:
return None
return buf[m.start()+2 : m.end()]
def git_get_branchname_from_ref(ref):
# "refs/heads/<branchname>"
if string.find(ref, "refs/heads") != 0:
logging.error("Invalid ref '%s'", ref)
exit -1
return string.strip(ref[len("refs/heads/"):])
def git_get_commit_msg(commit_id):
return get_shell_cmd_output("git rev-list --pretty --max-count=1 " + commit_id)
#----------------------------------------------------------------------------
# python script entry point. Dispatches main()
if __name__ == "__main__":
cProfile.run('main()')
exit(0)
handle_pre_receive method checks if the branch is still enabled or not. If branch is disabled we have to close all the JIRA tickets related to that branch.

Only one user can connect to websocket server with tornado at a time

I am trying to develop a websocket server with Python and tornado. This websocket server streams a large database result to the client for some visualization.
The problem that I am facing is that no client can connect until the long process (send_data) is finished. It is as if only one client can connect at a time.
Is websocket already an async process or should I implement an async process?
The following is my code:
import time
import random
import json
import datetime
import os
import sys
import cx_Oracle
import string
import re
import subprocess
import asyncio
from tornado import websocket, web, ioloop, escape
from datetime import timedelta
from random import randint
from pprint import pprint
from tornado.web import RequestHandler
os.environ['ORACLE_HOME'] = 'pathToOracleHome'
os.environ['LD_LIBRARY_PATH'] = "$ORACLE_HOME/lib"
def is_hex(a):
printable = set(string.printable) - set("\x0b\x0c")
return any(c not in printable for c in a)
def json_print(d):
print(json.dumps(d, indent=4))
def printf (format,*args):
sys.stdout.write (format % args)
def db(database_name='localhost/database'):
return cx_Oracle.connect('user', 'pwd', database_name)
def query_db(query, args=(), one=False):
cur = db().cursor()
cur.arraysize = 1500
cur.execute(query, args)
return cur
class SummaryWebSocketHandler(websocket.WebSocketHandler):
clients = []
def check_origin(self, origin):
return True
def on_message(self, message):
print ('message received')
def closeDbConn(self,cur):
cur.connection.close()
def query(self, sql):
cursor = query_db(sql)
self.send_data(cursor)
### THIS IS THE LONG PROCESS ###
def send_data(self, cur):
results = {}
columns = [column[0] for column in cur.description]
total = 0
while True:
Res = []
rows = cur.fetchmany()
if rows == []:
print('no more rows')
break;
for row in rows:
results = {}
for i, value in enumerate(row):
if value == None:
value = '-'
results[cur.description[i][0]] = value
Res.append(results)
self.write_message(json.dumps(Res))
total = total + len(rows)
print('total rows send', total)
self.write_message("finished sending all data")
self.on_close(cur)
def open(self, table):
print ('Connection established. \n')
print ('Query string '+table+'\n')
p = re.compile(r'fields=')
m = p.match(table)
matches = table.split("&")
print (matches)
param_string = ''
params = []
if matches:
for m in matches:
print('m', m);
param = ''
items = m.split('=')
if items[1] != '':
param = '--'+items[0]+' '+items[1]
params.append(param)
param_string = " ".join(params)
script = "php getStmt.php "+param_string
print (script)
proc = subprocess.Popen(script, shell=True,stdout=subprocess.PIPE)
sql = proc.stdout.read()
print (sql)
self.query(sql)
def on_close(self, cursor):
print ('Connection closed.')
cursor.close()
settings = {'auto_reload': True, 'debug': True}
if __name__ == "__main__":
print ("Starting websocket server program. Awaiting client requests to open websocket ...")
application = web.Application([(r"/\/table\/(.*)",SummaryWebSocketHandler),
]
,**settings)
application.listen(3001)
ioloop.IOLoop.instance().start()

How realtime capture logs of query from HiveServer2 with python client?

I use modified version of pyhs2 (https://pypi.python.org/pypi/pyhs2) with ability run async queries and additional methods from TCLIService.Client (GetLog, send_GetLog, recv_GetLog) in sources of Hue (https://github.com/cloudera/hue/blob/master/apps/beeswax/gen-py/TCLIService/TCLIService.py#L739)
But when I run TCLIService.Client.GetLog method, there is an error:
$ python example.py
Traceback (most recent call last):
File "example.py", line 85, in <module>
rq = client.GetLog(lq)
File "/Users/toly/hive_streaming/libs/pyhs4/TCLIService/TCLIService.py", line 757, in GetLog
return self.recv_GetLog()
File "/Users/toly/hive_streaming/libs/pyhs4/TCLIService/TCLIService.py", line 773, in recv_GetLog
raise x
thrift.Thrift.TApplicationException: Invalid method name: 'GetLog'
In script I use HiveServer2 from Cloudera VM. Same server, as I quess, used by Hue and it successfully works. In addition I try client_protocol in range from 0 to 7 for creating session.
import time
import sasl
from thrift.protocol.TBinaryProtocol import TBinaryProtocol
from thrift.transport.TSocket import TSocket
from thrift.transport.TTransport import TBufferedTransport
from libs.pyhs4.cloudera.thrift_sasl import TSaslClientTransport
from libs.pyhs4.TCLIService import TCLIService
from libs.pyhs4.TCLIService.ttypes import TOpenSessionReq, TGetTablesReq, TFetchResultsReq,\
TStatusCode, TGetResultSetMetadataReq, TGetColumnsReq, TType, TTypeId, \
TExecuteStatementReq, TGetOperationStatusReq, TFetchOrientation, TCloseOperationReq, \
TCloseSessionReq, TGetSchemasReq, TCancelOperationReq, TGetLogReq
auth = 'PLAIN'
username = 'apanin'
password = 'none'
host = 'cloudera'
port = 10000
test_hql1 = 'select count(*) from test_text'
def sasl_factory():
saslc = sasl.Client()
saslc.setAttr("username", username)
saslc.setAttr("password", password)
saslc.init()
return saslc
def get_type(typeDesc):
for ttype in typeDesc.types:
if ttype.primitiveEntry is not None:
return TTypeId._VALUES_TO_NAMES[ttype.primitiveEntry.type]
elif ttype.mapEntry is not None:
return ttype.mapEntry
elif ttype.unionEntry is not None:
return ttype.unionEntry
elif ttype.arrayEntry is not None:
return ttype.arrayEntry
elif ttype.structEntry is not None:
return ttype.structEntry
elif ttype.userDefinedTypeEntry is not None:
return ttype.userDefinedTypeEntry
def get_value(colValue):
if colValue.boolVal is not None:
return colValue.boolVal.value
elif colValue.byteVal is not None:
return colValue.byteVal.value
elif colValue.i16Val is not None:
return colValue.i16Val.value
elif colValue.i32Val is not None:
return colValue.i32Val.value
elif colValue.i64Val is not None:
return colValue.i64Val.value
elif colValue.doubleVal is not None:
return colValue.doubleVal.value
elif colValue.stringVal is not None:
return colValue.stringVal.value
sock = TSocket(host, port)
transport = TSaslClientTransport(sasl_factory, "PLAIN", sock)
client = TCLIService.Client(TBinaryProtocol(transport))
transport.open()
res = client.OpenSession(TOpenSessionReq(username=username, password=password))
session = res.sessionHandle
query1 = TExecuteStatementReq(session, statement=test_hql1, confOverlay={}, runAsync=True)
response1 = client.ExecuteStatement(query1)
opHandle1 = response1.operationHandle
while True:
time.sleep(1)
q1 = TGetOperationStatusReq(operationHandle=opHandle1)
res1 = client.GetOperationStatus(q1)
lq = TGetLogReq(opHandle1)
rq = client.GetLog(lq)
if res1.operationState == 2:
break
req = TCloseOperationReq(operationHandle=opHandle1)
client.CloseOperation(req)
req = TCloseSessionReq(sessionHandle=session)
client.CloseSession(req)
How realtime capture logs of hive query from HiveServer2?
UPD Hive version - 1.2.1
For getting logs of operation used method FetchResults with param fetchType=1 - returning logs.
Example usage:
query1 = TExecuteStatementReq(session, statement=test_hql1, confOverlay={}, runAsync=True)
response1 = client.ExecuteStatement(query1)
opHandle1 = response1.operationHandle
while True:
time.sleep(1)
q1 = TGetOperationStatusReq(operationHandle=opHandle1)
res1 = client.GetOperationStatus(q1)
request_logs = TFetchResultsReq(operationHandle=opHandle1, orientation=0, maxRows=10, fetchType=1)
response_logs = client.FetchResults(request_logs)
print response_logs.results
if res1.operationState == 2:
break

Categories

Resources