Error retrieving data from hive using python

Error retrieving data from hive using python - python

I use python to connect hive & retrieve the data into pandas, but its giving an error:
pyhive.exc.OperationalError: TExecuteStatementResp
my code:
# -*- coding: utf-8 -*-
from pyhive import hive
from impala.util import as_pandas
from string import Template
config = {
'host': '127.0.0.1',
'database': 'default'
}
def get_conn(conf):
conn = hive.connect(**conf)
return conn
def execute_hql(hql, params = None):
conn = get_conn(config)
cursor = conn.cursor()
hql = Template(hql).substitute(params)
cursor.execute(hql)
df = as_pandas(cursor)
return df
test.py
# -*- coding: utf-8 -*-
from pyhive import hive
from impala.util import as_pandas
import DB.hive_engines
hql = """
SELECT
keywords,
count(keywords)
FROM
table
WHERE
eventname = 'xxx' AND
cdate >= '$start_date' AND
cdate <= '$end_date'
GROUP BY
keywords
"""
if __name__ == '__main__':
params = {'start_date': '2016-04-01', 'end_date': '2016-04-03'}
df = DB.hive_engines.execute_hql(hql, params)
print df
exception message:
pyhive.exc.OperationalError: TExecuteStatementResp(status=TStatus(errorCode=1, errorMessage='Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask', sqlState='08S01', infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:326', 'org.apache.hive.service.cli.operation.SQLOperation:runQuery:SQLOperation.java:146', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:173', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:268', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:410', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:391', 'sun.reflect.GeneratedMethodAccessor31:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:606', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:415', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1671', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy27:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:245', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:509', 'org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1313', 'org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1298', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:285', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1145', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:615', 'java.lang.Thread:run:Thread.java:745'], statusCode=3), operationHandle=None)
Thanks!

Following this discussion, I used a valid username while creating the connection and that solved the problem.
For the sake of completeness of this answer, I am copy pasting the suggested code from the above mentioned forum. Please note the valid username there.
from pyhive import hive
conn = hive.Connection(host='<myhost>',
port='<myport>',
database='spin1',
username='<a valid user>') # IMPORTANT**
cursor = conn.cursor()
print cursor.fetchall()
In absence of the valid username, I was hitting the same exception mentioned in the question.

Related

Issue implementing the python code around cx_Oracle library

I am getting an error while implementing the below code:
'''
from pandas.core.frame import DataFrame
import cx_Oracle
import pandas as pd
import sys
class IFTDataCore:
def __init__(self, accountCode):
i = 0
all_Procedures = []
dns_tns = cx_Oracle.makedsn("gbhenora06vd.corp.amvescap.net", "1525", "INVU")
db=cx_Oracle.connect("CORP-SVC-IFT", "C$Rp$vc1ftUat",dns_tns)
cursor = db.cursor()
cursor.execute("select procedure_name from all_procedures where object_name = 'PK_IVZ_IFT_EXTRACT' ")
rows = cursor.fetchall()
procedureName = ['PK_IVZ_IFT_EXTRACT.'+str(list(rows[indexRow])[0]) for indexRow in range(0,len(list(rows)))]
l_cur = cursor.var(cx_Oracle.CURSOR)
while i < len(procedureName):
if procedureName[i] == 'PK_IVZ_IFT_EXTRACT.SP_IVZ_IFT_EXTRACT_ACCOUNTS':
ret_cursor = cursor.callproc(procedureName[i],(l_cur,))
dfx = pd.DataFrame(ret_cursor[0])
all_Procedures.append(dfx)
else:
ret_cursor = cursor.callproc(procedureName[i],(l_cur,accountCode))
dfx = pd.DataFrame(ret_cursor[0])
all_Procedures.append(dfx)
i += 1
self.all_Procedures = all_Procedures
cursor.close()
db.close()
#property
def getallProcedures(self):
return self.all_Procedures
if __name__ == '__main__':
Procedures = []
all_Proc = IFTDataCore('TOUHI')
Procedures = all_Proc.getallProcedures()
print(Procedures[0])
PS: The code works fine if I do not put the logic in init and call the def logic directly in code. Please let me know the possible reason why when class initialization is done in main, the definition starts throwing error.

The solution works fine now as per the below code:
from pandas.core.frame import DataFrame
import cx_Oracle
import pandas as pd
import sys
import json
from pathlib import Path
import os
class IFTDataCore:
def __init__(self):
try:
db = cx_Oracle.connect('invest/invest#INVD.WORLD')
cursor = db.cursor()
cursor.execute("select procedure_name from all_procedures where object_name = 'PK_IVZ_IFT_EXTRACT' ")
rows = cursor.fetchall()
procedureName = ['PK_IVZ_IFT_EXTRACT.'+str(list(rows[indexRow])[0]) for indexRow in range(0,len(list(rows))-1)]
# To convert Accounts procedure to JSON format
l_cur_Account = cursor.var(cx_Oracle.CURSOR)
ret_cursor_Account = cursor.callproc(procedureName[1],(l_cur_Account,))
self.dfx_Account = pd.DataFrame(ret_cursor_Account[0])
self.dfx_Account.columns = ['fundCode', 'fundName', 'legalEntitiyIdentifier','isin']
result_Account = self.dfx_Account.to_json(orient='records')
except BaseException as e:
raise
def lambda_handler(event, context):
positional_data = IFTDataCore()
df_acct = positional_data.dfx_Account
df_acct=df_acct.fillna("")
Json=df_acct.to_json(orient='records')
lambda_response = __lambda_response__('200', Json)
return lambda_response
def __lambda_response__(status_code, response_body):
return {
'statusCode': status_code,
'headers': {
'Access-Control-Allow-Headers': 'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'OPTIONS,GET'
},
'body': response_body
}

Mocking cursor.fetchone() returns None istead of returning a value in python

I wrote a function for my project which fetches data from MSSQL server using Pyodbc. The function works fine. When I write unittest cases using unittest and mock library and mocked the cursor.fetchone and returned a predefined value but while running the test case it returns None instead of returning the value.
Here are my code.
Store.py
import os
from datetime import date
from datetime import timedelta
import logging
logging.basicConfig(filename="source_monitor.log", format='%(name)s - %(levelname)s - %(asctime)s %(message)s', filemode='a')
logger=logging.getLogger()
class KMExtracter:
def __init__(self,metric_collected_date):
self.metric_collected_date = metric_collected_date
# argument conn is a db connection which will passed in seperate program
def get_metrics_by_technology(self, conn, technology):
try:
cursor = conn.cursor()
cursor.execute(
"SELECT COUNT(*) FROM URL_STORE WHERE Technology='{0}' firstExtractionDate BETWEEN '{1} 00:00:00' AND '{1} 23:59:59'".format(self.technology[technology],
self.metric_collected_date
))
count = cursor.fetchone()
return count[0]
except Exception as e:
logging.error("{0} at get_metrics_by_technology()".format(e))
test_store.py
class TestKM(unittest.TestCase):
def test_get_metrics_by_technology(self):
mock_data_interface = Mock()
mock_data_interface.cursor.return_value.execute.return_value.fetchone.return_value(23987,)
km = KMExtracter('2021-04-03')
print(km.get_metrics_by_technology(mock_data_interface, 'SOME'))
self.assertEqual(23987,km.get_metrics_by_technology(mock_data_interface, 'SOME'))
Error I got:
AssertionError: 23987 != None

class TestKM(unittest.TestCase):
def test_get_metrics_by_technology(self):
mock_data_interface = Mock()
# execute.return_value was removed from the below line.
mock_data_interface.cursor.return_value.fetchone.return_value(23987,)
km = KMExtracter('2021-04-03')
print(km.get_metrics_by_technology(mock_data_interface, 'SOME'))
self.assertEqual(23987,km.get_metrics_by_technology(mock_data_interface, 'SOME'))

Problem converting varchar to datetime with SQLAlchemy execute

I can successfully connect to SQL Server Management Studio from my jupyter notebook with this script :
from sqlalchemy import create_engine
import pyodbc
import csv
import time
import urllib
params = urllib.parse.quote_plus('''DRIVER={SQL Server Native Client 11.0};
SERVER=SV;
DATABASE=DB;
TRUSTED_CONNECTION=YES;''')
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
Just for an example, the following script perfectly works :
engine.execute("delete from table_name_X")
However, I failed to get the following script to work. For information, it works when I execute its adaptation in SQL Server Management Studio :
cde = 5
reportDate = df.loc[df.index[0],'Report Date'] # when you execute reportDate it returns 2019-11-15 00:00:00
req = "DELETE table_name_Y "
req+= "WHERE code = " + str(cde)
req+= " AND report_date = '" + str(reportDate.strftime('%Y-%m-%d')) + "'"
engine.execute(req)
According to the error message, there is a problem with the conversion of a varchar to a datetime, which created a value out of range. However, independently executed, the script str(reportDate.strftime('%Y-%m-%d')) works.
Could you please help me to understand why this previous script does not work ?

As #Ilja mentions in the comments to the question, you really should not be using dynamic SQL to construct your statement. It is error-prone and potentially unsafe. If you use a proper parameterized query many of your issues will simply go away.
For what it's worth, this works for me:
import pandas as pd
import sqlalchemy as sa
# ...
with engine.begin() as conn:
# set up test environment
conn.execute(sa.text("CREATE TABLE #table_name_Y (code int, report_date date)"))
conn.execute(sa.text("INSERT INTO #table_name_Y (code, report_date) VALUES (5, '2019-11-15')"))
# verify test environment
result = conn.execute(sa.text("SELECT * FROM #table_name_Y")).fetchall()
print(result) # [(5, datetime.date(2019, 11, 15))]
# test code
df = pd.DataFrame([(5, datetime.datetime(2019, 11, 15),), ], columns=['code', 'Report Date'])
cde = int(df.loc[df.index[0],'code'])
print(type(cde)) # <class 'int'>
reportDate = df.loc[df.index[0],'Report Date']
print(type(reportDate)) # <class 'pandas._libs.tslibs.timestamps.Timestamp'>
sql = sa.text("DELETE FROM #table_name_Y WHERE code = :p0 AND report_date = :p1")
params = {'p0': cde, 'p1': reportDate}
conn.execute(sql, params)
# verify outcome
result = conn.execute(sa.text("SELECT * FROM #table_name_Y")).fetchall()
print(result) # []

CherryPy WS is not returning string in UTF-8

I'm trying to build a REST Web Service with CherryPy and Python. It works, but when I access it through Chrome, it's not displaying in UTF-8.
This web service queries a MongoDB and then gives the results in a list of dictionaries. I did a print(ticketME) and it's showing the right characters:
But when it displays in Chrome, is not displaying right(and I'm also realizing that "solucion" or "problema" are not showing...):
As you can see in the code, I set the charset to UTF-8:
import cherrypy
import pymongo
import urllib
import pyodbc
import mysql.connector
from datetime import datetime
import time
import sys
import numpy as np
class HelloWorld(object):
#cherrypy.expose
#cherrypy.tools.json_out()
def index(self):
password = * password *
myclient = pymongo.MongoClient(*mongoDB connection string*)
mydb = myclient["moica2"]
mycol = mydb["moicaTickets"]
myquery = *mongoDB aggregate query*
mydoc = mycol.aggregate(myquery)
mydb = None
myclient.close()
mycol = None
resultadoTicketsME = []
for x in mydoc:
try:
asunto = x['pAsunto']
nrotkt = x['pTicket']
estado = x['pEstado']
fechaCreacion = x['pFechaCreacion']
fechaCierre = x['pFechaCierre']
nodoCRM = x['pNodoCRM']
nodoCMTS = x['pNodoCMTS']
if ('COMPLETO' in nodoCMTS):
nodoCMTS = "Completo"
RTs = x['pRTs']
notas = x['pNotas']
asuntoCierre = x['pAsuntoCierre']
estadoEtaClick = x['pEstadoEtaClick']
afectacion = x['pAfectacion']
problema = x['pProblema']
solucion = x['pSolucion']
arbolCreacion = x['pElArbolCreacion']
arbolActual = x['pElarbolActual']
idFuente = int(x['pFuente']['idFuente'])
ticketME = {
'nrotkt': nrotkt,
'asunto': asunto,
'estado': estado,
'fechaCreacion': fechaCreacion,
'fechaCierre': fechaCierre,
'nodoCRM': nodoCRM,
'nodoCMTS': nodoCMTS,
'RTs': RTs,
'notas': notas,
'asuntoCierre': asuntoCierre,
'estadoEtaClick': estadoEtaClick,
'afectacion': afectacion,
'problema': problema,
'solucion': solucion,
'arbolCreacion': arbolCreacion,
'arbolActual': arbolActual,
'idFuente': idFuente
}
print(ticketME)
resultadoTicketsME.append(ticketME)
except:
lf = open("error.log", "a+")
lf.write("MOICA2FUENTESME %s : No se pudo insertar el tkt %s\n" % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"),x['pTicket']))
lf.close()
cherrypy.response.headers['Content-Type'] = "text/html;charset=utf-8"
return resultadoTicketsME
USERS = {'ngabioud': 'password'}
def validate_password(realm, username, password):
if username in USERS and USERS[username] == password:
return True
return False
cherrypy.config.update({'tools.encode.on': True,
'tools.encode.encoding': 'utf-8',
'tools.decode.on': True,
'tools.auth_basic.on': True,
'tools.auth_basic.realm': 'localhost',
'tools.auth_basic.checkpassword': validate_password,
'tools.auth_basic.accept_charset': 'UTF-8',
})
cherrypy.quickstart(HelloWorld())
Is there anything else I could try?
Thank you,
Best regards

As stated by snakecharmerb in the comment, it was a Chrome representing issue. I did a .php seting encoding to utf-8 and it showed correctly.

PyODBC SQL type error when reading query in Pandas

Looking for some help with a specific error when I write out from a pyodbc connection. How do I fix the error:
ODBC SQL type -360 is not yet supported. column-index=1 type=-360', 'HY106' error from PYODBC
Here is my code:
import pyodbc
import pandas as pd
import sqlparse
## Function created to read SQL Query
def create_query_string(sql_full_path):
with open(sql_full_path, 'r') as f_in:
lines = f_in.read()
# remove any common leading whitespace from every line
query_string = textwrap.dedent("""{}""".format(lines))
## remove comments from SQL Code
query_string = sqlparse.format(query_string, strip_comments=True)
return query_string
query_string = create_query_string("Bad Code from R.sql")
## initializes the connection string
curs = conn.cursor()
df=pd.read_sql(query_string,conn)
df.to_csv("TestSql.csv",index=None)
We are using the following SQL code in query string:
SELECT loss_yr_qtr_cd,
CASE
WHEN loss_qtr_cd <= 2 THEN loss_yr_num
ELSE loss_yr_num + 1
END AS LOSS_YR_ENDING,
snap_yr_qtr_cd,
CASE
WHEN snap_qtr_cd <= 2 THEN snap_yr_num
ELSE snap_yr_num + 1
END AS CAL_YR_ENDING,
cur_ctstrph_loss_ind,
clm_symb_grp_cd,
adbfdb_pol_form_nm,
risk_st_nm,
wrt_co_nm,
wrt_co_part_cd,
src_of_bus_cd,
rt_zip_dlv_ofc_cd,
cur_rst_rt_terr_cd,
Sum(xtra_cntrc_py_amt) AS XTRA_CNTRC_PY_AMT
FROM (SELECT DT.loss_yr_qtr_cd,
DT.loss_qtr_cd,
DT.loss_yr_num,
SNAP.snap_yr_qtr_cd,
SNAP.snap_qtr_cd,
SNAP.snap_yr_num,
CLM.cur_ctstrph_loss_ind,
CLM.clm_symb_grp_cd,
POL_SLCT.adbfdb_pol_form_nm,
POL_SLCT.adbfdb_pol_form_cd,
CVR.bsic_cvr_ind,
POL_SLCT.priv_pass_ind,
POL_SLCT.risk_st_nm,
POL_SLCT.wrt_co_nm,
POL_SLCT.wrt_co_part_cd,
POL_SLCT.src_of_bus_cd,
TERR.rt_zip_dlv_ofc_cd,
TERR.cur_rst_rt_terr_cd,
LOSS.xtra_cntrc_py_amt
FROM ahshdm1d.vmaloss_day_dt_dim DT,
ahshdm1d.vmasnap_yr_mo_dim SNAP,
ahshdm1d.tmaaclm_dim CLM,
ahshdm1d.tmaapol_slct_dim POL_SLCT,
ahshdm1d.tmaacvr_dim CVR,
ahshdm1d.tmaart_terr_dim TERR,
ahshdm1d.tmaaloss_fct LOSS,
ahshdm1d.tmaaprod_bus_dim BUS
WHERE SNAP.snap_yr_qtr_cd BETWEEN '20083' AND '20182'
AND TRIM(POL_SLCT.adbfdb_lob_cd) = 'A'
AND CVR.bsic_cvr_ind = 'Y'
AND POL_SLCT.priv_pass_ind = 'Y'
AND POL_SLCT.adbfdb_pol_form_cd = 'V'
AND POL_SLCT.src_of_bus_cd NOT IN ( 'ITC', 'INV' )
AND LOSS.xtra_cntrc_py_amt > 0
AND LOSS.loss_day_dt_id = DT.loss_day_dt_dim_id
AND LOSS.cvr_dim_id = CVR.cvr_dim_id
AND LOSS.pol_slct_dim_id = POL_SLCT.pol_slct_dim_id
AND LOSS.rt_terr_dim_id = TERR.rt_terr_dim_id
AND LOSS.prod_bus_dim_id = BUS.prod_bus_dim_id
AND LOSS.clm_dim_id = CLM.clm_dim_id
AND LOSS.snap_yr_mo_dt_id = SNAP.snap_yr_mo_dt_id) AS TABLE1
GROUP BY loss_yr_qtr_cd,
loss_qtr_cd,
loss_yr_num,
snap_yr_qtr_cd,
snap_qtr_cd,
snap_yr_num,
cur_ctstrph_loss_ind,
clm_symb_grp_cd,
adbfdb_pol_form_nm,
risk_st_nm,
wrt_co_nm,
wrt_co_part_cd,
src_of_bus_cd,
rt_zip_dlv_ofc_cd,
cur_rst_rt_terr_cd
FOR FETCH only
Just looking how to properly write out the database.
Thanks,
Justin

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Error retrieving data from hive using python - python

Related

Issue implementing the python code around cx_Oracle library

Mocking cursor.fetchone() returns None istead of returning a value in python

Problem converting varchar to datetime with SQLAlchemy execute

CherryPy WS is not returning string in UTF-8

PyODBC SQL type error when reading query in Pandas

Categories

Resources