cassandra.cluster.NoHostAvailable unknown exception - python

Below is my python code I read the keys from a CSV file and delete them in the database.It's running fine for a while and throwing me this timeout error. I don't see any GC issue and health of the node is working fine.
Traceback (most recent call last):
File "/Users/XXX/Downloads/XXX/XXX", line 65, in <module>
parse_file(datafile)
File "/Users/XXX/Downloads/XXX/XXX", line 49, in parse_file
session = cluster.connect('XXX')
File "cassandra/cluster.py", line 1193, in cassandra.cluster.Cluster.connect (cassandra/cluster.c:17796)
File "cassandra/cluster.py", line 1240, in cassandra.cluster.Cluster._new_session (cassandra/cluster.c:18952)
File "cassandra/cluster.py", line 1980, in cassandra.cluster.Session.__init__ (cassandra/cluster.c:35191)
cassandra.cluster.NoHostAvailable: ("Unable to connect to any servers using keyspace 'qualys_ioc'", ['127.0.0.1'])
Python Code:
import argparse
import sys
import itertools
import codecs
import uuid
import os
import subprocess
try:
import cassandra
import cassandra.concurrent
except ImportError:
sys.exit('Python Cassandra driver not installed. You might try \"pip install cassandra-driver\".')
from cassandra.cluster import Cluster, ResultSet, Session
from cassandra.policies import DCAwareRoundRobinPolicy
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import ConsistencyLevel
from cassandra import ReadTimeout
datafile = "/Users/XXX/adf.csv"
if os.path.exists(datafile):
os.remove(datafile)
def dumptableascsv():
os.system(
"sh /Users/XXX/Documents/dse-5.0.14/bin/cqlsh 127.0.0.1 9042 -u cassandra -p cassandra -e \" COPY XXX.agent_delta_fragment(agent_id,delta_id ,last_fragment_id ,processed) TO \'/Users/XXX/adf.csv\' WITH HEADER = true;\"\n"
" ")
#print datafile
def parse_file(datafile):
global fields
data = []
with open(datafile, "rb") as f:
header = f.readline().split(",")
# Loop through remaining lines in file object f
for line in f:
fields = line.split(",") # Split line into list
#print fields[3]
if fields[3]:
print "connect"
print fields[0],fields[1],fields[2],fields[3]
auth_provider = PlainTextAuthProvider(username='cassandra', password='cassandra')
cluster = Cluster(['127.0.0.1'],
load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='Cassandra'),
port=9042, auth_provider=auth_provider, connect_timeout=10000,)
session = cluster.connect('XXX')
#session = cluster.connect('XXX')
# session.execute("select * from XXX.agent_delta_fragment LIMIT 1")
#rows = session.execute('select agent_id from XXX.agent_delta_fragment LIMIT 1')
#for row in rows:
# print row.agent_id
#batch = BatchStatement("DELETE FROM XXX.agent_delta_fragment_detail_test WHERE agent_id=%s and delta_id=%s and fragment_id=%s", (uuid.UUID(fields[0]), uuid.UUID(fields[1]), int(fields[3])))
session.execute("DELETE FROM XXX.agent_delta_fragment_detail WHERE agent_id=%s and delta_id=%s and fragment_id=%s", (uuid.UUID(fields[0]), uuid.UUID(fields[1]), int(fields[2])), timeout=1000000)
#session.execute(batch)
else:
print fields[3]
print "connect-False"
# print fields[3]
dumptableascsv()
parse_file(datafile)

Related

Semaphore stuck in python code, whats wrong?

I am trying to copy some files to an OCI bucket (Oracle Cloud Infrastructure).
The fist 5 files are succefully copied, but then the script hangs and the processes on the task manager dies, remaining only the main one.
from array import array
from pathlib import Path
import oci
import datetime
from multiprocessing import Process
import threading
import logging
from oci.object_storage import UploadManager
from oci.object_storage.models import CreateBucketDetails
from oci.object_storage.transfer.constants import MEBIBYTE
logging.basicConfig(filename=r'############',filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO)
# Number of max processes allowed at a time
concurrency= 5
sema = threading.BoundedSemaphore(concurrency)
# The root directory path, Replace with your path
p = Path(r"#####")
# The Compartment OCID
compartment_id = "#######"
# The Bucket name where we will upload
bucket_name = "######"
config = oci.config.from_file()
object_storage_client = oci.object_storage.ObjectStorageClient(config)
part_size = 2 * MEBIBYTE
today = datetime.date.today()
today = str(today)
def upload_to_object_storage(path:str,name:str,namespace):
#upload_manager = UploadManager(object_storage_client, allow_parallel_uploads=False)
with open(path, "rb") as in_file:
logging.info("Starting upload {}".format(name))
object_storage_client.put_object(namespace,bucket_name,name,in_file)
#upload_manager.upload_file(namespace, bucket_name, name, in_file.name, part_size=part_size)
logging.info("Finished uploading {}".format(name))
sema.release()
return
def createUploadProcess(object:Path,object_storage_client,namespace,proc_list):
name = object.relative_to(p).as_posix()
sema.acquire()
process = Process(target=upload_to_object_storage, args=(object.as_posix(),name,namespace))
proc_list.append(process)
process.start()
def processDirectoryObjects(object:Path,object_storage_client,namespace,proc_list):
if object.is_file():
createUploadProcess(object,object_storage_client,namespace,proc_list)
def processDirectory(path:Path,object_storage_client,namespace,proc_list):
if path.exists():
logging.info("in directory ---- " + path.relative_to(p).as_posix())
for objects in path.iterdir():
if objects.is_dir():
processDirectory(objects,object_storage_client,namespace,proc_list)
else:
if today in objects.name:
processDirectoryObjects(objects,object_storage_client,namespace,proc_list)
if __name__ == '__main__':
config = config
object_storage_client = object_storage_client
sema = sema
namespace = object_storage_client.get_namespace().data
proc_list: array = []
if p.exists() and p.is_dir():
processDirectory(p,object_storage_client,namespace,proc_list)
for job in proc_list:
job.join()
I have aproximaly 50 files to copy, but it uploads 5 and then hangs. The execution presents the following error for the 5 processes:
Process Process-1:
Traceback (most recent call last):
File "C:\Users\#######\AppData\Local\Programs\Python\Python36\lib\multiprocessing\process.py", line 258, in _bootstrap
self.run()
File "C:\Users\#######\AppData\Local\Programs\Python\Python36\lib\multiprocessing\process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\#######\Documents\copia_bkp_oci2.py", line 49, in upload_to_object_storage
sema.release()
File "C:\Users\#######\AppData\Local\Programs\Python\Python36\lib\threading.py", line 482, in release
raise ValueError("Semaphore released too many times")
ValueError: Semaphore released too many times

i am trying to unload data from snowflake internal stage to unix file path using COPY INTO and GET command, but getting error

I am running all the sql scripts under the scripts path in a for loop and copying the data into #priya_stage area in snowflake and then using GET command , i am unloading data from stage area to my Unix path in csv format. But I am getting error.
Note: this same code works on my MAC but not on unix server.
import logging
import os
import snowflake.connector
from snowflake.connector import DictCursor as dict
from os import walk
try:
conn = snowflake.connector.connect(
account = 'xxx' ,
user = 'xxx' ,
password = 'xxx' ,
database = 'xxx' ,
schema = 'xxx' ,
warehouse = 'xxx' ,
role = 'xxx' ,
)
conn.cursor().execute('USE WAREHOUSE xxx')
conn.cursor().execute('USE DATABASE xxx')
conn.cursor().execute('USE SCHEMA xxx')
take = []
scripts = '/xxx/apps/xxx/xxx/scripts/snow/scripts/'
os.chdir('/xxx/apps/xxx/xxx/scripts/snow/scripts/')
for root , dirs , files in walk(scripts):
for file in files:
inbound = file[0:-4]
sql = open(file , 'r').read()
# file_number = 0
# file_number += 1
file_prefix = 'bridg_' + inbound
file_name = file_prefix
result_query = conn.cursor(dict).execute(sql)
query_id = result_query.sfqid
sql_copy_into = f'''
copy into #priya_stage/{file_name}
from (SELECT * FROM TABLE(RESULT_SCAN('{query_id}')))
DETAILED_OUTPUT = TRUE
HEADER = TRUE
SINGLE = FALSE
OVERWRITE = TRUE
max_file_size=4900000000'''
rs_copy_into = conn.cursor(dict).execute(sql_copy_into)
for row_copy in rs_copy_into:
file_name_in_stage = row_copy["FILE_NAME"]
sql_get_to_local = f"""
GET #priya_stage/{file_name_in_stage} file:///xxx/apps/xxx/xxx/inbound/zip_files/{inbound}/"""
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
except snowflake.connector.errors.ProgrammingError as e:
print('Error {0} ({1}): {2} ({3})'.format(e.errno , e.sqlstate , e.msg , e.sfqid))
finally:
conn.cursor().close()
conn.close()
Error
Traceback (most recent call last):
File "Generic_local.py", line 52, in <module>
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
File "/usr/local/lib64/python3.6/site-packages/snowflake/connector/cursor.py", line
746, in execute
sf_file_transfer_agent.execute()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 379, in execute
self._transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 671, in
_transfer_accelerate_config
self._use_accelerate_endpoint = client.transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 572, in
transfer_accelerate_config
url=url, verb="GET", retry_id=retry_id, query_parts=dict(query_parts)
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 353, in _.
send_request_with_authentication_and_retry
verb, generate_authenticated_url_and_args_v4, retry_id
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/storage_client.py", line 313, in
_send_request_with_retry
f"{verb} with url {url} failed for exceeding maximum retries."
snowflake.connector.errors.RequestExceedMaxRetryError: GET with url b'https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate' failed for exceeding maximum retries.
This link redirects me to a error message .
https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate
Access Denied error :
<Error>
<Code>AccessDenied</Code>
<Message>Access Denied</Message>
<RequestId>1X1Z8G0BTX8BAHXK</RequestId>
<HostId>QqdCqaSK7ogAEq3sNWaQVZVXUGaqZnPv78FiflvVzkF6nSYXTSKu3iSiYlUOU0ka+0IMzErwGC4=</HostId>
</Error>

Pycharm Memory Error when I read a 7GB sqlite3 File with pandas

I am trying to count number of duplicate rows from a file "train.db" comprising of 7GB. My laptop has 8GB RAM. Below is the code I have used to obtain the results. When I run the code, I get the Error as below:
Traceback (most recent call last):
File "C:/Users/tahir/PycharmProjects/stopwordsremovefile/stopwordsrem.py", line 13, in <module>
df_no_dup = pd.read_sql_query('SELECT Title, Body, Tags, COUNT(*) as cnt_dup FROM trainingdata GROUP by Title, Body, Tags', con)
File "C:\Users\tahir\PycharmProjects\stopwordsremovefile\venv\lib\site-packages\pandas\io\sql.py", line 332, in read_sql_query
chunksize=chunksize,
File "C:\Users\tahir\PycharmProjects\stopwordsremovefile\venv\lib\site-packages\pandas\io\sql.py", line 1658, in read_query
data = self._fetchall_as_list(cursor)
File "C:\Users\tahir\PycharmProjects\stopwordsremovefile\venv\lib\site-packages\pandas\io\sql.py", line 1671, in _fetchall_as_list
result = cur.fetchall()
MemoryError
Process finished with exit code 1
Following is the code I am using:-
import os
import sqlite3
import pandas as pd
from datetime import datetime
from pandas import DataFrame
if os.path.isfile('train.db'):
start = datetime.now()
con = sqlite3.connect('train.db')
con.text_factory = lambda x: str(x, 'iso-8859-1')
df_no_dup = pd.read_sql_query('SELECT Title, Body, Tags, COUNT(*) as cnt_dup FROM trainingdata GROUP by Title, Body, Tags', con)
con.close()
print("Time taken to run this cell:", datetime.now() - start)
else:
print("Please download train.db file")

.apply(lambda..., strftime produces None

I am trying to change the format of the date and time values I am receiving from the sensor. I initially receive it as string and i convert it into datetime and then try to apply strftime. When I do this in Jupyter notebook on set of values it works fine but when I implement it in my code it breaks. Here is my code:
import json
import socket
from pandas.io.json import json_normalize
from sqlalchemy import create_engine
import pandas as pd
import datetime
# Establish connection with Database
engine = create_engine('sqlite:///Production.db', echo=False)
# Establish connecton with Spider
server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
server_socket.bind(('192.168.130.35', 8089))
# Receive data while sensor is live
while True:
message, address = server_socket.recvfrom(1024)
# Create empty list to hold data of interest
objs_json = []
# Record only data where tracked_objects exist within json stream
if b'tracked_objects' in message:
stream = json.loads(message)
if not stream:
break
# Append all data into list and process through parser
objs_json += stream
print("Recording Tracked Object")
# Parsing json file with json_normalize object
objs_df = json_normalize(objs_json, record_path='tracked_objects',
meta=[['metadata', 'serial_number'], 'timestamp'])
# Renaming columns
objs_df = objs_df.rename(
columns={"id": "object_id", "position.x": "x_pos", "position.y": "y_pos",
"person_data.height": "height",
"metadata.serial_number": "serial_number", "timestamp": "timestamp"})
# Selecting columns of interest
objs_df = objs_df.loc[:, ["timestamp", "serial_number", "object_id", "x_pos", "y_pos", "height"]]
# Converting datatime into requested format
objs_df["timestamp"] = pd.to_datetime(objs_df["timestamp"])
objs_df["timestamp"].apply(lambda x: x.strftime("%d-%m-%Y %Hh:%Mm:%Ss.%f")[:-3])
# Writting the data into SQlite db
objs_df.to_sql('data_object', con=engine, if_exists='append', index=False)
# In case there is no tracks, print No Tracks in console.
else:
print("No Object Tracked")
# Empty the list and prepare it for next capture
objs_json = []
Here is the error message i am getting:
Exception in thread Thread-1:
Traceback (most recent call last):
File "C:\Program Files (x86)\Python37-32\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "C:\Users\slavi\PycharmProjects\ProRail_FInal_POC\pythreads\runner.py", line 15, in run
self.function(*self.args, **self.kwargs)
File "C:\Users\slavi\PycharmProjects\ProRail_FInal_POC\ObjectStream.py", line 46, in objectstream
objs_df["timestamp"].apply(lambda x: x.strftime("%d-%m-%Y %Hh:%Mm:%Ss.%f")[:-3])
File "C:\Users\slavi\PycharmProjects\ProRail_FInal_POC\venv\lib\site-packages\pandas\core\series.py", line 4049, in apply
return self._constructor(mapped, index=self.index).__finalize__(self)
File "C:\Users\slavi\PycharmProjects\ProRail_FInal_POC\venv\lib\site-packages\pandas\core\series.py", line 299, in __init__
"index implies {ind}".format(val=len(data), ind=len(index))
ValueError: Length of passed values is 0, index implies 1
Any idea how do I resolve this error?

pymssql ValueError: list.remove(x): x not in list

I have a problem with the follwoing code:
import pandas as pd
import tensorflow
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
from keras.models import model_from_json
import pymssql
load json and create model
json_file = open('model_Messe_Dense.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("model_Messe_Dense.h5")
Import Values
import pickle
y_scaler = pickle.load( open( "y_scaler.p", "rb" ))
x_scaler = pickle.load( open( "x_scaler.p","rb"))
Connecting to server and update values
while True:
try:
conn = pymssql.connect(
server='SHS_Messe',
user='sa',
password='sa',
database='ChillWARE_Transfer'
)
stmt = "SELECT screw_speed,\
ID,\
Cylinder_Temperatur_Zone_1,\
Cylinder_Temperatur_Zone_2,\
Cylinder_Temperatur_Zone_3,\
Cylinder_Temperatur_Zone_4,\
Cylinder_Temperatur_Zone_5,\
Cylinder_Temperatur_Zone_6,\
mass_pressure,\
Update_Done\
FROM to_ChillWARE where ID= (SELECT MAX(ID) FROM
to_ChillWARE)"
# Excute Query here
df = pd.read_sql(stmt,conn)
except pymssql.Error as e:
print (e)
break
feature_col_names = ['screw_speed','Cylinder_Temperatur_Zone_1','Cylinder_Temperatur_Zone_2','Cylinder_Temperatur_Zone_3',\
'Cylinder_Temperatur_Zone_4','Cylinder_Temperatur_Zone_5','Cylinder_Temperatur_Zone_6']
predicted_class_names = ['mass_pressure']
Update = ['Update_Done']
x = df[feature_col_names].values
Update = df[Update].values
x_scaled = x_scaler.transform(x)
x_test = x_scaled
predicted = loaded_model.predict(x_test)
predicted = y_scaler.inverse_transform(predicted)
predicted=predicted.reshape(-1)
predicted.shape
predicted=predicted * 51
value=str(predicted)
value=value.replace('[','')
value=value.replace(']','')
Update = str(Update)
Update=Update.replace('[','')
Update=Update.replace(']','')
if Update == "False":
cursor = conn.cursor()
query = "UPDATE to_ChillWARE SET [mass_pressure] ="
query = query + value + ",[Update_Done] = 1"
query = query + " where ID= (SELECT MAX(ID) FROM to_ChillWARE)"
cursor.execute(query)
conn.commit()
I want to check if i have a connection with a mssql server and if Update == False i want to update values.
On my pc everything works just fine. I executed the code via python and via exe (pyinstaller). But if i want to transfer this to another pc i get the error:
Traceback (most recent call last):
File "Test.py", line 29, in <module>
File "src\pymssql.pyx", line 636, in pymssql.connect
File "src\_mssql.pyx", line 1957, in _mssql.connect
File "src\_mssql.pyx", line 675, in _mssql.MSSQLConnection.__init__
ValueError: list.remove(x): x not in list
I think there is a problem with the pymssql function.
I found the same error here but i don't understand the solution:
https://github.com/sqlmapproject/sqlmap/issues/3035
If anyone could help that would be amazing.
Thanks everybody
According to the comment in the link you provided it looks like a connection error.
Have you checked that from the machine you are trying to use the code you have access to the DB server with the server name provided and those credentials?
Edit with solution from comments below:
You can reuse the connection by defining "conn = pymssql.connect..." outside the while loop and always use that variable, so you are not creating a connection on each iteration.

Categories

Resources