twisted get cipher name - python

from twisted.internet.protocol import ClientFactory
from twisted.internet.protocol import Protocol
from twisted.internet.defer import Deferred
from twisted.internet import reactor
from twisted.internet.defer import inlineCallbacks
from twisted.internet.defer import returnValue
from twisted.internet.ssl import CertificateOptions
from twisted.internet.ssl import AcceptableCiphers
from ssl import PROTOCOL_SSLv23
from ssl import DER_cert_to_PEM_cert
from OpenSSL.crypto import FILETYPE_PEM
from OpenSSL.crypto import load_certificate
import time
import json
normalCyphers = AcceptableCiphers.fromOpenSSLCipherString(
'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:'
'!eNULL:!MD5'
)
normalCtxFac = CertificateOptions(acceptableCiphers=normalCyphers, method=PROTOCOL_SSLv23)
weakCiphers = AcceptableCiphers.fromOpenSSLCipherString('ALL:!aNULL:!eNULL')
weakCtxFac = CertificateOptions(acceptableCiphers=weakCiphers, method=PROTOCOL_SSLv23)
def asn1DateToTimestamp(asn1Date):
expirationDate = time.strptime(asn1Date[:8], '%Y%m%d')
return int(time.mktime(expirationDate))
class CertCheckProtocol(Protocol):
def __init__(self, dfd, isWeakSsl):
self.dfd = dfd
self.isWeakSsl = isWeakSsl
def connectionMade(self):
reactor.callLater(0.01, self.getCert, 20)
def getCert(self, depth):
cert = self.transport.getPeerCertificate()
transportHandle = self.transport.getHandle()
if cert is None or transportHandle is None:
if depth <= 0:
self.transport.loseConnection()
return
reactor.callLater(0.01, self.getCert, depth - 1)
else:
cipherName = transportHandle.get_cipher_name()
key = DER_cert_to_PEM_cert(cert)
targetCert = load_certificate(FILETYPE_PEM, key)
timestamp = asn1DateToTimestamp(targetCert.get_notAfter())
expiresIn = timestamp - time.time()
try:
usedCipher = ' '.join(map(str, cipherName))
except Exception:
usedCipher = str(cipherName)
self.dfd.callback({
'name': 'certificate',
'expiresIn': expiresIn,
'sha1Digest': targetCert.digest('sha1'),
'signatureAlgorithm': targetCert.get_signature_algorithm(),
'issuer': targetCert.get_issuer().CN,
'notAfter': timestamp,
'notBefore': asn1DateToTimestamp(targetCert.get_notBefore()),
'serialNumber': targetCert.get_serial_number(),
'subject': targetCert.get_subject().CN,
'sslVersion': targetCert.get_version(),
'usedCipher': usedCipher,
'weakCipher': self.isWeakSsl
})
def connectionLost(self, reason):
if not self.dfd.called:
self.dfd.errback(Exception('Connection lost'))
class CertCheckFactory(ClientFactory):
def __init__(self, dfd, isWeakSsl):
self.dfd = dfd
self.isWeakSsl = isWeakSsl
def clientConnectionFailed(self, connector, reason):
self.dfd.errback(reason)
def buildProtocol(self, addr):
return CertCheckProtocol(self.dfd, self.isWeakSsl)
#inlineCallbacks
def getCertificateInfo(ip, port=443):
dfd = Deferred()
factory = CertCheckFactory(dfd, isWeakSsl=False)
reactor.connectSSL(ip, int(port), factory, contextFactory=normalCtxFac)
try:
res = yield dfd
except Exception as ex:
if hasattr(ex, 'reason') and 'HANDSHAKE_FAILURE' in ex.reason:
dfd = Deferred()
factory = CertCheckFactory(dfd, isWeakSsl=True)
reactor.connectSSL(ip, int(port), factory, contextFactory=weakCtxFac)
res = yield dfd
else:
raise
returnValue(res)
#inlineCallbacks
def testit(ip):
res = yield getCertificateInfo(ip)
print json.dumps(res)
reactor.stop()
if __name__ == '__main__':
testit('x.x.x.x')
reactor.run()
I'm not sure whether catching the handshake failure is correct for twisted. Still have to test that part with a server that has a weaker cipher.
This is the stacktrace to indicate that self._socket is none for the transport handle
File "C:\Python27\lib\site-packages\twisted\internet\base.py", line 825, in runUntilCurrent
call.func(*call.args, **call.kw)
File "C:\Users\sjuul\workspace\meuk\soCertQuestion.py", line 50, in getCert
cipherName = transportHandle.get_cipher_name()
File "C:\Python27\lib\site-packages\OpenSSL\SSL.py", line 838, in __getattr__
return getattr(self._socket, name)
exceptions.AttributeError: 'NoneType' object has no attribute 'get_cipher_name'

It's not exposed fully publicly - feel free to file a bug on Twisted for this - but you can get to it via the pyOpenSSL API escape-hatch, with self.transport.getHandle().get_cipher_name().
When I modify your example to remove spurious imports from the standard library ssl and pyOpenSSL OpenSSL modules, it works fine, and tells me google.com is using ECDHE-RSA-AES128-GCM-SHA256:
from twisted.internet.protocol import ClientFactory
from twisted.internet.protocol import Protocol
from twisted.internet.defer import Deferred
from twisted.internet import reactor
from twisted.internet.defer import inlineCallbacks
from twisted.internet.defer import returnValue
from twisted.internet.ssl import CertificateOptions
from twisted.internet.ssl import AcceptableCiphers
import time
import json
normalCyphers = AcceptableCiphers.fromOpenSSLCipherString(
'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:'
'!eNULL:!MD5'
)
normalCtxFac = CertificateOptions(acceptableCiphers=normalCyphers)
weakCiphers = AcceptableCiphers.fromOpenSSLCipherString('ALL:!aNULL:!eNULL')
weakCtxFac = CertificateOptions(acceptableCiphers=weakCiphers)
def asn1DateToTimestamp(asn1Date):
expirationDate = time.strptime(asn1Date[:8], '%Y%m%d')
return int(time.mktime(expirationDate))
class CertCheckProtocol(Protocol):
def __init__(self, dfd, isWeakSsl):
self.dfd = dfd
self.isWeakSsl = isWeakSsl
def connectionMade(self):
reactor.callLater(0.01, self.getCert, 20)
def getCert(self, depth):
cert = self.transport.getPeerCertificate()
transportHandle = self.transport.getHandle()
if cert is None or transportHandle is None:
if depth <= 0:
self.transport.loseConnection()
return
reactor.callLater(0.01, self.getCert, depth - 1)
else:
cipherName = transportHandle.get_cipher_name()
timestamp = asn1DateToTimestamp(cert.get_notAfter())
expiresIn = timestamp - time.time()
self.dfd.callback({
'name': 'certificate',
'expiresIn': expiresIn,
'sha1Digest': cert.digest('sha1'),
'signatureAlgorithm': cert.get_signature_algorithm(),
'issuer': cert.get_issuer().CN,
'notAfter': timestamp,
'notBefore': asn1DateToTimestamp(cert.get_notBefore()),
'serialNumber': cert.get_serial_number(),
'subject': cert.get_subject().CN,
'sslVersion': cert.get_version(),
'usedCipher': cipherName,
'weakCipher': self.isWeakSsl
})
def connectionLost(self, reason):
if not self.dfd.called:
self.dfd.errback(Exception('Connection lost'))
class CertCheckFactory(ClientFactory):
def __init__(self, dfd, isWeakSsl):
self.dfd = dfd
self.isWeakSsl = isWeakSsl
def clientConnectionFailed(self, connector, reason):
self.dfd.errback(reason)
def buildProtocol(self, addr):
return CertCheckProtocol(self.dfd, self.isWeakSsl)
#inlineCallbacks
def getCertificateInfo(ip, port=443):
dfd = Deferred()
factory = CertCheckFactory(dfd, isWeakSsl=False)
reactor.connectSSL(ip, int(port), factory, contextFactory=normalCtxFac)
try:
res = yield dfd
except Exception as ex:
if hasattr(ex, 'reason') and 'HANDSHAKE_FAILURE' in ex.reason:
dfd = Deferred()
factory = CertCheckFactory(dfd, isWeakSsl=True)
reactor.connectSSL(ip, int(port), factory,
contextFactory=weakCtxFac)
res = yield dfd
else:
raise
returnValue(res)
#inlineCallbacks
def testit(ip):
res = yield getCertificateInfo(ip)
print json.dumps(res)
reactor.stop()
if __name__ == '__main__':
testit('google.com')
reactor.run()

Related

Custom Python HP ILO Node Exporter not changing hostname by request

Im trying to edit this project in Python to have HP ILO exporter for Prometheus, so far I read a few articles here on stackoverflow and tried to implement some functionalities, eventually I came up to partialy working script but the hostname is not changing after first request, is there a way to dump collector?
I have tried it with try&except but it just does not work.
The goal is to use curl like this
curl localhost:9116/metrics?hostname=ip
And what will happen if there will be 10 requests at the same time with different hostname? Should it create somekind of a queue?
Can someone help me? Thanks
Original Project : https://github.com/JackWindows/ilo-exporter
My code :
#!/usr/bin/env python
import collections
import os
import time
import flask
import redfish
import waitress
from flask import Flask
from prometheus_client import make_wsgi_app
from prometheus_client.core import GaugeMetricFamily, REGISTRY
from werkzeug.middleware.dispatcher import DispatcherMiddleware
from flask import request
from time import sleep
from flask import Flask, Response, request
import traceback
from werkzeug.wsgi import ClosingIterator
class AfterResponse:
def __init__(self, app=None):
self.callbacks = []
if app:
self.init_app(app)
def __call__(self, callback):
self.callbacks.append(callback)
return callback
def init_app(self, app):
# install extension
app.after_response = self
# install middleware
app.wsgi_app = AfterResponseMiddleware(app.wsgi_app, self)
def flush(self):
for fn in self.callbacks:
try:
fn()
except Exception:
traceback.print_exc()
class AfterResponseMiddleware:
def __init__(self, application, after_response_ext):
self.application = application
self.after_response_ext = after_response_ext
def __call__(self, environ, after_response):
iterator = self.application(environ, after_response)
try:
return ClosingIterator(iterator, [self.after_response_ext.flush])
except Exception:
traceback.print_exc()
return iterator
class ILOCollector(object):
def __init__(self, hostname: str, port: int = 443, user: str = 'admin', password: str = 'password') -> None:
self.ilo = redfish.LegacyRestClient(base_url=hostname, username=user, password=password)
self.ilo.login()
system = self.ilo.get('/redfish/v1/Systems/1/').obj
self.label_names = ('hostname', 'product_name', 'sn')
self.label_values = (hostname, system.Model, system.SerialNumber.strip())
def collect(self):
embedded_media = self.ilo.get('/redfish/v1/Managers/1/EmbeddedMedia/').obj
smart_storage = self.ilo.get('/redfish/v1/Systems/1/SmartStorage/').obj
thermal = self.ilo.get('/redfish/v1/Chassis/1/Thermal/').obj
power = self.ilo.get('/redfish/v1/Chassis/1/Power/').obj
g = GaugeMetricFamily('hpilo_health',
'iLO health status, -1: Unknown, 0: OK, 1: Degraded, 2: Failed.',
labels=self.label_names + ('component',))
def status_to_code(status: str) -> int:
status = status.lower()
ret = -1
if status == 'ok':
ret = 0
elif status == 'warning':
ret = 1
elif status == 'failed':
ret = 2
return ret
g.add_metric(self.label_values + ('embedded_media',), status_to_code(embedded_media.Controller.Status.Health))
g.add_metric(self.label_values + ('smart_storage',), status_to_code(smart_storage.Status.Health))
for fan in thermal.Fans:
g.add_metric(self.label_values + (fan.FanName,), status_to_code(fan.Status.Health))
yield g
g = GaugeMetricFamily('hpilo_fan_speed', 'Fan speed in percentage.',
labels=self.label_names + ('fan',), unit='percentage')
for fan in thermal.Fans:
g.add_metric(self.label_values + (fan.FanName,), fan.CurrentReading)
yield g
sensors_by_unit = collections.defaultdict(list)
for sensor in thermal.Temperatures:
if sensor.Status.State.lower() != 'enabled':
continue
reading = sensor.CurrentReading
unit = sensor.Units
sensors_by_unit[unit].append((sensor.Name, reading))
for unit in sensors_by_unit:
g = GaugeMetricFamily('hpilo_temperature', 'Temperature sensors reading.',
labels=self.label_names + ('sensor',), unit=unit.lower())
for sensor_name, sensor_reading in sensors_by_unit[unit]:
g.add_metric(self.label_values + (sensor_name,), sensor_reading)
yield g
g = GaugeMetricFamily('hpilo_power_current', 'Current power consumption in Watts.', labels=self.label_names,
unit='watts')
g.add_metric(self.label_values, power.PowerConsumedWatts)
yield g
label_values = self.label_values + (str(power.PowerMetrics.IntervalInMin),)
g = GaugeMetricFamily('hpilo_power_average', 'Average power consumption in Watts.',
labels=self.label_names + ('IntervalInMin',), unit='watts')
g.add_metric(label_values, power.PowerMetrics.AverageConsumedWatts)
yield g
g = GaugeMetricFamily('hpilo_power_min', 'Min power consumption in Watts.',
labels=self.label_names + ('IntervalInMin',), unit='watts')
g.add_metric(label_values, power.PowerMetrics.MinConsumedWatts)
yield g
g = GaugeMetricFamily('hpilo_power_max', 'Max power consumption in Watts.',
labels=self.label_names + ('IntervalInMin',), unit='watts')
g.add_metric(label_values, power.PowerMetrics.MaxConsumedWatts)
yield g
# Create Flask app
app = Flask('iLO Exporter')
#app.route('/')
def root():
return '''<html>
<head><title>iLO Exporter</title></head>
<body>
<h1>iLO Exporter</h1>
<p><a href='/metrics'>Metrics</a></p>
</body>
</html>'''
AfterResponse(app)
#app.after_response
def say_hi():
print("hi")
#app.route("/metrics")
def home():
try:
REGISTRY.unregister(collector)
except:
print("An exception occurred")
pass
port = int(os.getenv('ILO_PORT', 443))
user = os.getenv('ILO_USER', 'admin')
password = os.getenv('ILO_PASSWORD', 'password')
hostname = request.args.get('hostname')
app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {
'/metrics': make_wsgi_app()
})
collector = ILOCollector(hostname, port, user, password)
REGISTRY.register(collector)
if __name__ == '__main__':
exporter_port = int(os.getenv('LISTEN_PORT', 9116))
waitress.serve(app, host='0.0.0.0', port=exporter_port)

Stomp Consumer using deferred.inlinecallback

I am implementing stomp consumer as a library. By calling this library in other application i should be able to get the data in ActiveMQ. I am implementing it as below, but I have a problem in returning the frame.body. I am not able to retrieve the data from outside the class.
from twisted.internet import defer
from stompest.async import Stomp
from stompest.async.listener import SubscriptionListener
from stompest.config import StompConfig
from socket import gethostname
from uuid import uuid1
import json
class Consumer(object):
def __init__(self, amq_uri):
self.amq_uri = amq_uri
self.hostname = gethostname()
self.config = StompConfig(uri=self.amq_uri)
#defer.inlineCallbacks
def run(self, in_queue):
client = yield Stomp(self.config)
headers = {
StompSpec.ACK_HEADER: StompSpec.ACK_CLIENT_INDIVIDUAL,
StompSpec.ID_HEADER: self.hostname,
'activemq.prefetchSize': '1000',
}
yield client.connect(headers=self._return_client_id())
client.subscribe(
in_queue,
headers,
listener=SubscriptionListener(self.consume)
)
try:
client = yield client.disconnected
except StompConnectionError:
yield client.connect(headers=self._return_client_id())
client.subscribe(
in_queue,
headers,
listener=SubscriptionListener(self.consume)
)
while True:
try:
yield client.disconnected
except StompProtocolError:
pass
except StompConnectionError:
yield client.connect(headers=self._return_client_id())
client.subscribe(
in_queue,
headers,
listener=SubscriptionListener(self.consume)
)
def _return_client_id(self):
client_id = {}
client_id['client-id'] = gethostname() + '-' + str(uuid1())
return client_id
def consume(self, client, frame):
data = json.loads(frame.body)
print 'Received Message Type {}'.format(type(data))
print 'Received Message {}'.format(data)
## I want to return data here. I am able to print the frame.body here.
# Call from another application
import Queue
from twisted.internet import reactor
amq_uri = 'tcp://localhost:61613'
in_queue = '/queue/test_queue'
c = Consumer(amq_uri)
c.run(in_queue)
print "data is from outside function", data # Should be able to get the data which is returned by consume here
reactor.run()
Can someone please let me know how can i achieve this.
Thanks
I found a solution to my problem. Instead of using async stomp library, i used sync stomp library. Implemented it as below,
class Consumer(object):
def __init__(self, amq_uri):
self.amq_uri = amq_uri
self.hostname = gethostname()
self.config = StompConfig(uri=self.amq_uri)
def run(self, in_queue, return_dict):
client = Stomp(self.config)
headers = {
StompSpec.ACK_HEADER: StompSpec.ACK_CLIENT_INDIVIDUAL,
StompSpec.ID_HEADER: self.hostname
}
client.connect()
client.subscribe(in_queue, headers)
try:
frame = client.receiveFrame()
data = json.dumps(frame.body)
except Exception as exc:
print exc
client.ack(frame)
client.disconnect()
return_dict['data'] = data
return data

How to make asyncore not block?

I am using below code from the below link. My understanding is asyncore.loop() will print LOOP_DONE instead of waiting for the data to be delivered. Instead, the LOOP_DONE is printed only after all the data has been read. Why is loop() blocking?
http://broadcast.oreilly.com/2009/03/pymotw-asyncore.html
import asyncore
import logging
import socket
from cStringIO import StringIO
import urlparse
class HttpClient(asyncore.dispatcher):
def __init__(self, url):
self.url = url
self.logger = logging.getLogger(self.url)
self.parsed_url = urlparse.urlparse(url)
asyncore.dispatcher.__init__(self)
self.write_buffer = 'GET %s HTTP/1.0\r\n\r\n' % self.url
self.read_buffer = StringIO()
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
address = (self.parsed_url.netloc, 80)
self.logger.debug('connecting to %s', address)
self.connect(address)
def handle_connect(self):
self.logger.debug('handle_connect()')
def handle_close(self):
self.logger.debug('handle_close()')
self.close()
def writable(self):
is_writable = (len(self.write_buffer) > 0)
if is_writable:
self.logger.debug('writable() -> %s', is_writable)
return is_writable
def readable(self):
self.logger.debug('readable() -> True')
return True
def handle_write(self):
sent = self.send(self.write_buffer)
self.logger.debug('handle_write() -> "%s"', self.write_buffer[:sent])
self.write_buffer = self.write_buffer[sent:]
def handle_read(self):
data = self.recv(8192)
self.logger.debug('handle_read() -> %d bytes', len(data))
self.read_buffer.write(data)
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG,
format='%(name)s: %(message)s',
)
clients = []
for i in range(10):
clients.append(HttpClient('http://www.python.org/'))
#clients = [
# HttpClient('http://www.python.org/'),
# HttpClient('http://www.doughellmann.com/PyMOTW/contents.html'),
#]
logging.debug('LOOP STARTING')
asyncore.loop()
logging.debug('LOOP DONE')
for c in clients:
response_body = c.read_buffer.getvalue()
print c.url, 'got', len(response_body), 'bytes'

Emailing items and logs with Scrapy

I'm trying to get Scrapy to send me an email when a crawler finishes or breaks. There's already a built-in extension for sending stats, but I'd like to attach the spider's errors as <spidername>-errors.log and the scraped items as <spidername>-items.json.
I've connected callbacks to each of the signals, but for some reason only the last one is firing:
from scrapy import signals
from scrapy.mail import MailSender
from scrapy.exceptions import NotConfigured
from scrapy.utils.serialize import ScrapyJSONEncoder
from collections import defaultdict
try:
from cStringIO import cStringIO as StringIO
except ImportError:
from StringIO import StringIO
class StatusMailer(object):
def __init__(self, recipients, mail, crawler):
self.recipients = recipients
self.mail = mail
self.files = defaultdict(StringIO)
self.encoder = ScrapyJSONEncoder(crawler=crawler)
#classmethod
def from_crawler(cls, crawler):
recipients = crawler.settings.getlist("STATUSMAILER_RCPTS")
if not recipients:
raise NotConfigured
mail = MailSender.from_settings(crawler.settings)
instance = cls(recipients, mail, crawler)
crawler.signals.connect(instance.item_scraped, signal=signals.item_scraped)
crawler.signals.connect(instance.spider_error, signal=signals.spider_error)
crawler.signals.connect(instance.spider_closed, signal=signals.spider_closed)
return instance
def item_scraped(self, item, response, spider):
self.files[spider.name + '.json'].write(self.encoder.encode(item) + '\n')
def spider_error(self, failure, response, spider):
self.files[spider.name + '-errors.log'].write(failure.getTraceback() + '\n')
def spider_closed(self, spider):
return self.mail.send(
to=self.recipients,
subject="Crawler for %s finished" % spider.name,
body="",
attachs=[(name, 'text/plain', contents) for name, contents in self.files.items()]
)
Is there any way to access the exported items and the spider's errors from within Scrapy (possibly making some kind of hook to intercept those messages before they're printed to the console)?
Well, it looks like the problem was much simpler than I had thought. You have to "rewind" StringIO instances after you're completely done writing to them:
def spider_closed(self, spider):
files = []
for name, contents in self.files.items():
contents.seek(0)
files.append((name, 'text/plain', contents))
return self.mail.send(
to=self.recipients,
subject="Crawler for %s finished" % spider.name,
body="",
attachs=files
)
For anyone that's interested, here's my email extension:
import gzip
import datetime
from scrapy import signals
from scrapy.mail import MailSender
from scrapy.exceptions import NotConfigured
from scrapy.utils.serialize import ScrapyJSONEncoder
from collections import defaultdict
try:
from cStringIO import cStringIO as StringIO
except ImportError:
from StringIO import StringIO
def format_size(size):
for x in ['bytes', 'KB', 'MB', 'GB']:
if size < 1024.0:
return "%3.1f %s" % (size, x)
size /= 1024.0
class GzipCompressor(gzip.GzipFile):
extension = '.gz'
mimetype = 'application/gzip'
def __init__(self):
super(GzipCompressor, self).__init__(fileobj=PlainCompressor(), mode='w')
self.read = self.fileobj.read
class PlainCompressor(StringIO):
extension = ''
mimetype = 'text/plain'
def read(self, *args, **kwargs):
self.seek(0)
return StringIO.read(self, *args, **kwargs)
#property
def size(self):
return len(self.getvalue())
class StatusMailer(object):
def __init__(self, recipients, mail, compressor, crawler):
self.recipients = recipients
self.mail = mail
self.encoder = ScrapyJSONEncoder(crawler=crawler)
self.files = defaultdict(compressor)
self.num_items = 0
self.num_errors = 0
#classmethod
def from_crawler(cls, crawler):
recipients = crawler.settings.getlist('STATUSMAILER_RECIPIENTS')
compression = crawler.settings.get('STATUSMAILER_COMPRESSION')
if not compression:
compressor = PlainCompressor
elif compression.lower().startswith('gz'):
compressor = GzipCompressor
else:
raise NotConfigured
if not recipients:
raise NotConfigured
mail = MailSender.from_settings(crawler.settings)
instance = cls(recipients, mail, compressor, crawler)
crawler.signals.connect(instance.item_scraped, signal=signals.item_scraped)
crawler.signals.connect(instance.spider_error, signal=signals.spider_error)
crawler.signals.connect(instance.spider_closed, signal=signals.spider_closed)
crawler.signals.connect(instance.request_received, signal=signals.request_received)
return instance
def item_scraped(self, item, response, spider):
self.files[spider.name + '-items.json'].write(self.encoder.encode(item))
self.num_items += 1
def spider_error(self, failure, response, spider):
self.files[spider.name + '.log'].write(failure.getTraceback())
self.num_errors += 1
def request_received(self, request, spider):
self.files[spider.name + '.log'].write(str(request) + '\n')
def spider_closed(self, spider, reason):
files = []
for name, compressed in self.files.items():
files.append((name + compressed.extension, compressed.mimetype, compressed))
try:
size = self.files[spider.name + '-items.json'].size
except KeyError:
size = 0
body='''Crawl statistics:
- Spider name: {0}
- Spider finished at: {1}
- Number of items scraped: {2}
- Number of errors: {3}
- Size of scraped items: {4}'''.format(
spider.name,
datetime.datetime.now(),
self.num_items,
self.num_errors,
format_size(size)
)
return self.mail.send(
to=self.recipients,
subject='Crawler for %s: %s' % (spider.name, reason),
body=body,
attachs=files
)
Add it to your settings.py:
EXTENSIONS = {
'your_package.extensions.StatusMailer': 80
}
And configure it:
STATUSMAILER_RECIPIENTS = []
STATUSMAILER_COMPRESSION = 'gzip'
#STATUSMAILER_COMPRESSION = None
MAIL_HOST = 'smtp.gmail.com'
MAIL_PORT = 587
MAIL_USER = ''
MAIL_PASS = ''

getting following error while scanning constantly database table using reactor in twisted

I am getting the following error after a few hours of successful running.
Traceback (most recent call last):
File "/usr/lib/python2.6/threading.py", line 484, in run
self.__target(*self.__args, **self.__kwargs)
File "/usr/lib/python2.6/dist-packages/twisted/python/threadpool.py", line 210, in _worker
result = context.call(ctx, function, *args, **kwargs)
File "/usr/lib/python2.6/dist-packages/twisted/python/context.py", line 59, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "/usr/lib/python2.6/dist-packages/twisted/python/context.py", line 37, in callWithContext
return func(*args,**kw)
--- <exception caught here> ---
File "/usr/lib/python2.6/dist-packages/twisted/enterprise/adbapi.py", line 436, in _runInteraction
conn.rollback()
File "/usr/lib/python2.6/dist-packages/twisted/enterprise/adbapi.py", line 52, in rollback
self._connection.rollback()
_mysql_exceptions.OperationalError: (2006, 'MySQL server has gone away')
My code is something like this...
from twisted.internet import reactor, defer,threads
from twisted.enterprise import adbapi
dbpool = adbapi.ConnectionPool("MySQLdb", '192.168.1.102','test', 'test', 'test')
class Scanner:
def _execQuery(self,txn):
sql="SELECT tool_id,tool_name FROM tool_master"
txn.execute(sql)
result = txn.fetchall()
return result
def objCursor(self):
return dbpool.runInteraction(self._execQuery)
def printResult(self,result):
print "resssssssssssssssssss",result
reactor.callLater(3,self.deferExecute)
def deferExecute(self):
self.objCursor().addCallback(self.printResult)
Scanner()
class MyApp(object):
reactor.callInThread(Scanner().deferExecute)
reactor.run()
MyApp()
Can anyone tell me why I am getting this error?
can anyone tell me why I am getting this error.. because you're doing it wrong.
runInteraction runs the supplied function with an argument of a cursor to a transaction which is run in a thread. You shouldn't be calling reactor.callInThread(Scanner().deferExecute).
It's better to use a twisted.internet.task.LoopingCall, it will make sure that the call completes before the next is fired.
You're just running a query in your example, so you could just use ConnectionPool.runQuery instead of ConnectionPool.runInteraction.
Use errorBack functions to report on Exceptions.
Attempting to correct for your badly formatted code, I think you've got this:
from twisted.internet import reactor, defer,threads
from twisted.enterprise import adbapi
dbpool = adbapi.ConnectionPool("MySQLdb", '192.168.1.102','test', 'test', 'test')
class Scanner:
def _execQuery(self,txn):
sql="SELECT tool_id,tool_name FROM tool_master"
txn.execute(sql)
result = txn.fetchall()
return result
def objCursor(self):
return dbpool.runInteraction(self._execQuery)
def printResult(self,result):
print "resssssssssssssssssss",result
reactor.callLater(3,self.deferExecute)
def deferExecute(self):
self.objCursor().addCallback(self.printResult)
Scanner()
class MyApp(object):
reactor.callInThread(Scanner().deferExecute)
reactor.run()
MyApp()
When you probably need something like the following instead. If you're planning on writing a twisted Application will be easy to modify this Scanner class to inherit from twisted.application.service.Service.
from twisted.internet import reactor, defer, task
from twisted.enterprise import adbapi
class Scanner(object):
def __init__(self,dbpool=None):
self.dbpool = dbpool
self.loopCall = task.LoopingCall(self.myQuery)
def start(self):
print "Started scanner"
self.loopCall.start(3)
def stop(self):
print "Stopping scanner"
self.loopCall.stop()
def myQuery(self):
def interact(txn):
sql="SELECT tool_id,tool_name FROM tool_master"
txn.execute(sql)
return txn.fetchall()
d = self.dbpool.runInteraction(interact)
d.addCallbacks(self.printResult,self.printError)
def printResult(self,result):
print "Got Result: %r" % result
def printError(self,error):
print "Got Error: %r" % error
error.printTraceback()
if __name__ == '__main__':
from twisted.internet import reactor
dbpool = adbapi.ConnectionPool("MySQLdb", '192.168.1.102','test', 'test', 'test')
s = Scanner(dbpool)
reactor.callWhenRunning(s.start)
reactor.addSystemEventTrigger('before','shutdown',s.stop)
reactor.run()
After all the suggestion & help by Matt I have following code which is running successfully:
#!usr/bin/python
# Using the "dbmodule" from the previous example, create a ConnectionPool
from twisted.internet import reactor
from twisted.enterprise import adbapi
from twisted.internet import reactor, defer,threads
from twisted.python.threadpool import ThreadPool
import itertools
from twisted.internet.threads import deferToThread
from twisted.internet import reactor, defer, task
from tools.printTime import *
from tools.getVersion import *
from sh_log import *
concurrent = 30
finished=itertools.count(1)
reactor.suggestThreadPoolSize(concurrent)
#Creating Global Instance variables
path="tools"
lo=Log()
class ToolsBuilder:
def build(self,txn,tool,asset_id):
if tool:
print "\n"
try:
sql="select tool_filename from tool_master where tool_id = %s" %(tool,)
sql_asset="select asset_url from asset_master where asset_id = %s" %(asset_id,)
txn.execute(sql_asset)
asset_url = txn.fetchall()
log_date=lo.log_date()
txn.execute(sql)
result = txn.fetchall()
log='\n'+log_date+"::"+str(result[0][0])+ " tool object is created......\n"
lo.wfile(log)
temp=(path +'/' + str(result[0][0]))
if result:
if temp:
f=open(temp).read()
obj_tool=compile(f, 'a_filename', 'exec')
return obj_tool
except:
lo.wfile("Error in creating executable tool object......")
tb=ToolsBuilder()
class ToolsVectorGenerator:
def generate(self,txn,res_set={}):
v1=[]
for asset_id in res_set.iterkeys():
try:
obj_tools=[]
if asset_id:
print "asset_id..............................",asset_id
log_date=lo.log_date()
log=log_date+"::"+" \nVector generation for the asset number...:"+str(asset_id)
lo.wfile(log)
vector=[]
tools_arr=[]
obj_tools=[]
for tool in res_set[asset_id]:
if tool:
print "tool..............",tool
temp_tool=tb.build(txn,tool,asset_id)
print "temp_tool..........",temp_tool
#fetch data of tool setting.....
sql_tool_setting="select * from tool_asset_settings where tool_id =%s" %(tool,)
txn.execute(sql_tool_setting)
result_tool_setting = txn.fetchall()
tool_id=result_tool_setting[0][1]
t_id=int(tool_id)
tool_id_arr=[]
tool_id_arr.append(t_id)
tool_id_arr.append(result_tool_setting)
tool_id_arr.append(temp_tool)
tools_arr.append(tool_id_arr)
#fetch data from asset master
sql_asset="select asset_name from asset_master where asset_id=%s" %(asset_id,)
txn.execute(sql_asset)
result_asset = txn.fetchall()
vector.append(result_asset)
vector.append(tools_arr)
except:
lo.wfile("\nError in getting asset,please check your database or network connection......")
tvm.executeVector(vector)
tvg=ToolsVectorGenerator()
class Tool:
def exectool(self,tool):
exec tool
return
def getResult(self,tool):
return deferToThread(self.exectool, tool)
to=Tool()
class StateMachine:
def setPriority(self,txn,tup):
temp=[]
arr=[]
for li in tup:
sql2="select tool_dependency from tool_asset_settings where tool_id =%s" %(li[1],)
txn.execute(sql2)
result12 = txn.fetchall()
arr=[]
if result12[0][0]!=None:
tup12=result12[0][0]
arr=(li[0],tup12)
# print "arr.........",arr
if arr in tup:
print "This element is already exist......."
else:
temp.append(arr)
temp.extend(tup)
return tuple(temp)
st=StateMachine()
class ToolsVectorExecutionManager(object):
def executeVector(self,toolsvector):
print "toolsvector================>",toolsvector
if toolsvector:
for tools in toolsvector[1]:
if tools[2] != None:
to.getResult(tools[2])
tvm=ToolsVectorExecutionManager()
class ToolsToExecuteAnalyzer:
def __init__(self,dbpool=None):
self.dbpool = dbpool
self.loopCall = task.LoopingCall(self.myQuery)
def start(self):
print "Started scanner"
self.loopCall.start(3)
def stop(self):
print "Stopping scanner"
self.loopCall.stop()
def myQuery(self):
def interact(txn):
sql="SELECT tool_asset_id,tool_execute_id FROM tool_to_execute where status='0'"
txn.execute(sql)
result=txn.fetchall()
if result:
tool_asset_id=tuple([int(e[0]) for e in result])
tool_execute_id=tuple([int(e[1]) for e in result])
if len(tool_asset_id)>1:
sql1="SELECT asset_id,tool_id FROM tool_in_assets WHERE tool_asset_id IN %s"%(tool_asset_id,)
else:
sql1="SELECT asset_id,tool_id FROM tool_in_assets WHERE tool_asset_id = (%s)"%(tool_asset_id)
txn.execute(sql1)
tup = txn.fetchall()
#dependency check for the selected tool
asset_tool=st.setPriority(txn,tup)
log_date=lo.log_date()
log=log_date+"::priority have been set for the tools......\n"
lo.wfile(log)
#creating group of asset with their tools
res={}
for element in asset_tool:
if element[0] in res:
res[element[0]].append(int(element[1]))
else:
res[int(element[0])] = [int(element[1])]
#Recored deletion from tool_to_execute table
if res!=None and res.keys()!=[]:
for asset_id in res.iterkeys():
if len(tool_execute_id)>1:
sql_del="delete from tool_to_execute where tool_execute_id in %s " %(tool_execute_id,)
else:
sql_del="delete from tool_to_execute where tool_execute_id = %s" %(tool_execute_id)
txn.execute(sql_del)
#New Addition of vector
tvg.generate(txn,res)
# return res
d = self.dbpool.runInteraction(interact)
d.addCallbacks(self.printResult,self.printError)
def printResult(self,res):
print "In printResult after generate...."
def printError(self,error):
print "Got Error: %r" % error
error.printTraceback()
ToolsToExecuteAnalyzer()
if __name__ == '__main__':
from twisted.internet import reactor
dbpool = adbapi.ConnectionPool("MySQLdb", 'localhost', 'test', 'test','test')
s = ToolsToExecuteAnalyzer(dbpool)
reactor.callWhenRunning(s.start)
reactor.addSystemEventTrigger('before','shutdown',s.stop)
reactor.run()
This is my whole code, I just wanted to know how many threads running, means for each tool new thread?
Anyway, thanks Matt for your help..:)
You may also want to take a look at this snippet which provides a ConnectionPool subclass that reconnects on "MySQL server has gone away".
http://www.gelens.org/2009/09/13/twisted-connectionpool-revisited/

Categories

Resources