In this case, how can I write the queue.put - python

I'm writing a program to get the domain in same server and it also can scan the web directory.
#!/usr/bin/env python
#encoding = utf-8
import threading
import urllib,urllib2,httplib
from urllib2 import Request, urlopen, URLError
import Queue,sys
import re
concurrent = 5
url = sys.argv[1]
class Scanner(threading.Thread):
def __init__(self, work_q):
threading.Thread.__init__(self)
self.work_q = work_q
def getdomains(self):
doreq = Request('http://www.logontube.com/website/'+ url)
response = urlopen(doreq)
html = response.read()
response.close()
domains = re.findall('<br><a href=\"(.*?)\" target=\"_blank\"',html)
return domains
def run(self):
alldomains = self.getdomains()
pathline = [line.rstrip() for line in open("path.txt")]
while True:
for aim in alldomains:
for path in pathline:
path = self.work_q.get()
req = Request(aim+path)
try:
response = urlopen(req)
except URLError, e:
if hasattr(e, 'reason'):
print aim+path,'Not Found'
elif hasattr(e,'code'):
print aim+path,'Not Found'
else:
try:
logs = open('log.txt',"a+")
except(IOError):
print "[x] Failed to create log file"
print aim+path,"Found"
logs.writelines(aim+path+"\n")
logs.close()
def main():
work_q = Queue.Queue()
paths = [line.rstrip() for line in open("path.txt")]
for i in range(concurrent):
t = Scanner(work_q)
t.setDaemon(True)
t.start()
for path in paths:
work_q.put(path)
work_q.join()
main()
The problem is this program only do the loop of the path, so i only can get the scan result of one website.
I've found the problem,
for path in paths:
work_q.put(path) # The program finishes when it puts all the path
If you want to help me to test this program, you may need some directory of website(save it as path.txt)
/default.asp
/index.asp
/index.htm
/index.html
/index.jsp
/index.php
/admin.asp
/admin.php
/admin.shtml
/admin.txt
/admin_admin.asp
/config.asp
/inc/
/login.asp
/login.jsp
/login.php
/login/
/phpinfo.php
/readme.txt
/robots.txt
/test.asp
/test.html
/test.txt
/test.php
/news/readme.txt
/addmember/

You need a:
while 1:
pass
or something that waits until your threads are completed then it exits.
What is happening is that you are starting the threads but you are terminating the main thread so you never get to see the results of your threads.

Related

Can i get the generated ip-address or domain name of flask_ngrok or py-ngrok and return it to 127.0.0.1/

I'm trying to get the generated domain name or IP-address of flask_ngrok or py-ngrok after been deploy. I want to deploy flask_app to localhost and get the new IP-address or domain name on the main page.
I.E: If I access 127.0.0.1/ I want it to return something like
You can now log in through https://aaf8447ee878.ngrok.io/
I have tried checking through the directories and read some help but I can't still get it. Thanks in advance ❤
add
import atexit
import json
import os
import platform
import shutil
import subprocess
import tempfile
import time
import zipfile
from pathlib import Path
from threading import Timer
import requests
def _run_ngrok():
ngrok_path = str(Path(tempfile.gettempdir(), "ngrok"))
_download_ngrok(ngrok_path)
system = platform.system()
if system == "Darwin":
command = "ngrok"
elif system == "Windows":
command = "ngrok.exe"
elif system == "Linux":
command = "ngrok"
else:
raise Exception(f"{system} is not supported")
executable = str(Path(ngrok_path, command))
os.chmod(executable, 777)
ngrok = subprocess.Popen([executable, 'http', '5000'])
atexit.register(ngrok.terminate)
localhost_url = "http://localhost:4040/api/tunnels" # Url with tunnel details
time.sleep(1)
tunnel_url = requests.get(localhost_url).text # Get the tunnel information
j = json.loads(tunnel_url)
tunnel_url = j['tunnels'][0]['public_url'] # Do the parsing of the get
tunnel_url = tunnel_url.replace("https", "http")
return tunnel_url
def _download_ngrok(ngrok_path):
if Path(ngrok_path).exists():
return
system = platform.system()
if system == "Darwin":
url = "https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-darwin-amd64.zip"
elif system == "Windows":
url = "https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-windows-amd64.zip"
elif system == "Linux":
url = "https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip"
else:
raise Exception(f"{system} is not supported")
download_path = _download_file(url)
with zipfile.ZipFile(download_path, "r") as zip_ref:
zip_ref.extractall(ngrok_path)
def _download_file(url):
local_filename = url.split('/')[-1]
r = requests.get(url, stream=True)
download_path = str(Path(tempfile.gettempdir(), local_filename))
with open(download_path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
return download_path
def start_ngrok():
global ngrok_address
ngrok_address = _run_ngrok()
print(f" * Running on {ngrok_address}")
print(f" * Traffic stats available on http://127.0.0.1:4040")
def run_with_ngrok(app):
"""
The provided Flask app will be securely exposed to the public internet via ngrok when run,
and the its ngrok address will be printed to stdout
:param app: a Flask application object
:return: None
"""
old_run = app.run
def new_run():
thread = Timer(1, start_ngrok)
thread.setDaemon(True)
thread.start()
old_run()
app.run = new_run
####################
dont import flask_ngrok
at the end at before name == 'main' add this function
def ngrok_url():
global tunnel_url
while True:
try:
print(ngrok_address)
except Exception as e:
print(e)
and after before app.run() put
thread = Timer(1, ngrok_url)
thread.setDaemon(True)
thread.start()
and run Warning: this will crash your code editor/ or terminal if u dont want that in the ngrok url function replace print with whatever you want to do with the url
and you dont need that
global tunnel_url
def ngrok_url():
while True:
try:
print(ngrok_address)
except Exception as e:
print(e)
you can delete the threading part before the name == 'main' too after the imports set
ngrok_address = ''
then you can accses the ngrok_address anywhere in your code
I found out the easiest way to do this is the just copy the url when the user is visiting the site. You can do this by...
#app.before_request
def before_request():
global url
url = request.url
# url = url.replace('http://', 'https://', 1)
url = url.split('.ngrok.io')[0]
url += '.ngrok.io'

No requests module error

I'm trying to write a web parser script using requests module. Here is my current code:
import requests
import subprocess
import json
import sys
import threading
import time
from Queue import Queue
numberOfViewers = int(sys.argv[1])
builderThreads = int(sys.argv[2])
startTime = time.time()
numberOfSockets = 0
concurrent = 25
urls = []
urlsUsed = []
def getURL(): # Get tokens
output = subprocess.Popen(["livestreamer", "twitch.tv/CHANNEL_NAME", "-j"],
stdout=subprocess.PIPE).communicate()[0]
return json.loads(output)['streams']['worst']['url'] # Parse json and return the URL parameter
def build(): # Builds a set of tokens, aka viewers
global numberOfSockets
global numberOfViewers
while True:
if numberOfSockets < numberOfViewers:
numberOfSockets += 1
print ("Building viewers " + str(numberOfSockets) + "/" + str(numberOfViewers))
urls.append(getURL())
def view(): # Opens connections to send views
global numberOfSockets
while True:
url=q.get()
requests.head(url)
if (url in urlsUsed):
urls.remove(url)
urlsUsed.remove(url)
numberOfSockets -= 1
else:
urlsUsed.append(url)
q.task_done()
if __name__ == '__main__':
for i in range(0, builderThreads):
threading.Thread(target = build).start()
while True:
while (numberOfViewers != numberOfSockets): # Wait until sockets are built
time.sleep(1)
q=Queue(concurrent*2)
for i in range(concurrent):
try:
t=threading.Thread(target=view)
t.daemon=True
t.start()
except:
print ('thread error')
try:
for url in urls:
print (url)
q.put(url.strip())
q.join()
except KeyboardInterrupt:
sys.exit(1)
But when I run the code, it says:
Traceback (most recent call last):
File "C:\Users\flamelier\Desktop\Twitch.py", line 1, in <module>
import requests
ImportError: No module named 'requests'
Why am I getting this error? How do I install this module?
Will this error keep repeating for all the scripts henceforth?
How can I prevent such similar errors in the future?
Requests is a 3rd party module. You should first install it to Python using PIP or easy_install.
You have to run pip3 install requests as requests doesn't come with Python by default, as it is a third party library.
Even after you have pip3-installed requests, the code shown won't do anything. The
if __name__ == "__main__"
test and everything after it is part of an else block in the view function. Back this line and the block that follows out to the left margin.

Python else issues making an FTP program

I am having an issue with the else statement of this program... I have checked my spacing and it seems to be correct. I keep getting syntax error on the else statement. The program creates and file then attempts to upload it to a ftp server but if it fails to not say anything to the user and just continue It will try again when the program loops. Any help you could provide would be greatly appreciated.
#IMPORTS
import ConfigParser
import os
import random
import ftplib
from ftplib import FTP
#LOOP PART 1
from time import sleep
while True:
#READ THE CONFIG FILE SETUP.INI
config = ConfigParser.ConfigParser()
config.readfp(open(r'setup.ini'))
path = config.get('config', 'path')
name = config.get('config', 'name')
#CREATE THE KEYFILE
filepath = os.path.join((path), (name))
if not os.path.exists((path)):
os.makedirs((path))
file = open(filepath,'w')
file.write('text here')
file.close()
#Create Full Path
fullpath = path + name
#Random Sleep to Accomidate FTP Server
sleeptimer = random.randrange(1,30+1)
sleep((sleeptimer))
#Upload File to FTP Server
try:
host = '0.0.0.0'
port = 3700
ftp = FTP()
ftp.connect(host, port)
ftp.login('user', 'pass')
file = open(fullpath, "rb")
ftp.cwd('/')
ftp.storbinary('STOR ' + name, file)
ftp.quit()
file.close()
else:
print 'Something is Wrong'
#LOOP PART 2
sleep(180.00)
else is valid as part of an exception block, but it is only run if an exception is not raised and there must be a except defined before it.
(edit) Most people skip the else clause and just write code after exiting (dedenting) from the try/except clauses.
The quick tutorial is:
try:
# some statements that are executed until an exception is raised
...
except SomeExceptionType, e:
# if some type of exception is raised
...
except SomeOtherExceptionType, e:
# if another type of exception is raised
...
except Exception, e:
# if *any* exception is raised - but this is usually evil because it hides
# programming errors as well as the errors you want to handle. You can get
# a feel for what went wrong with:
traceback.print_exc()
...
else:
# if no exception is raised
...
finally:
# run regardless of whether exception was raised
...

Mitmproxy load and unload scripts with python

I'm a running a proxy as suggested in Mitmproxy github examples:
from libmproxy import proxy, flow
class MitmProxy(flow.FlowMaster):
def run(self):
try:
flow.FlowMaster.run(self)
except KeyboardInterrupt:
self.shutdown()
def handle_request(self, r):
f = flow.FlowMaster.handle_request(self, r)
if f:
r.reply()
return f
def handle_response(self, r):
f = flow.FlowMaster.handle_response(self, r)
if f:
r.reply()
return f
config = proxy.ProxyConfig(
cacert = os.path.expanduser("~/.ssl/mitmproxy.pem")
)
state = flow.State()
server = proxy.ProxyServer(config, 8083)
m = MitmProxy(server, state)
try:
m.run()
except Exception, e:
print e.message
m.shutdown()
I want to handle each request/response without blocking the others,
for that i need to use the concurrent decorator and scripts
my question is: how do i load and unload scripts to the proxy running in this configuration?
You can use concurrent mode with script loading.
Here is an example for this kind of usage
I preferred to implement the mitmproxy logic in the flow level.
You can use this code
def handle_response(self, r):
reply = f.response.reply
f.response.reply = controller.DummyReply()
if hasattr(reply, "q"):
f.response.reply.q = reply.q
def run():
pass
threading.Thread(target=run)
You basically have to copy how handle_concurrent_reply works in libmproxy.script
f = flow.FlowMaster.handle_request(self,r)
if f:
def run():
request.reply() #if you forget this you'll end up in a loop and never reply
threading.Thread(target=run).start() #this will start run

Download files from url parallely in python

I have some links in a database which I want to download parallely. I tried doing it serially but it took too much time. I have around 1877 links.
I tried this code for running the downloads parallely but it throws an error: failed: 'tuple' object has no attribute 'read'
#!/usr/bin/env python
import urllib
from stream import ThreadPool
URLs = [
'http://www.cnn.com/',
'http://www.bbc.co.uk/',
'http://www.economist.com/',
'http://nonexistant.website.at.baddomain/',
'http://slashdot.org/',
'http://reddit.com/',
'http://news.ycombinator.com/'
]
def retrieve(urls):
for url in urls:
print url,' '
res = urllib.urlretrieve(url).read()
yield url, res
if __name__ == '__main__':
retrieved = URLs >> ThreadPool(retrieve, poolsize=7)
for url, content in retrieved:
print '%r is %d bytes' % (url, len(content))
for url, exception in retrieved.failure:
print '%r failed: %s' % (url, exception)
I tried this as well:
import urllib
import tldextract
from multiprocessing.pool import ThreadPool
URLs = [
'http://www.cnn.com/',
'http://www.bbc.co.uk/',
'http://www.economist.com/',
'http://nonexistant.website.at.baddomain/',
'http://slashdot.org/',
'http://reddit.com/',
'http://news.ycombinator.com/'
]
def dwld(url):
print url
res = urllib.urlopen(url).read()
filename = tldextract.extract(url)
with open(filename.domain, 'wb') as fh:
fh.write(res)
return url
pool = ThreadPool(processes = 4)
pool.map(dwld, URLs)
Gives me
Traceback (most recent call last):
File "dwld_thread.py", line 26, in
pool.map(dwld, URLs)
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/multiprocessing/pool.py", line 148, in map
return self.map_async(func, iterable, chunksize).get()
File "/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/multiprocessing/pool.py", line 422, in get
raise self._value
IOError: [Errno socket error] [Errno 8] nodename nor servname provided, or not known
I have no idea what that stream.ThreadPool is that you're using, or what its API is… but the problem is obvious:
res = urllib.urlretrieve(url).read()
If you look at the doc for urlretrieve:
Return a tuple (filename, headers) where filename is the local file name under which the object can be found…
You obviously can't call read on that. If you want to download to a local file, using this legacy API, and then read that file, you can:
filename, headers = urllib.urlretrieve(url)
with open(filename) as f:
res = f.read()
But why? Just use urllib2.urlopen, which "returns a file-like object with two additional methods", so you can just call read on it, and you won't be creating a temporary file, and you're not using an old function that wasn't quite designed right that nobody has maintained in years.
But Python has a nice ThreadPoolExecutor built into the standard library. And if you look at the very first example they show you, it's exactly what you're trying to do.
Unfortunately, you're using Python 2.x, which doesn't have the concurrent.futures module. Fortunately, there is a backport on PyPI that works with 2.5+.
Python also has multiprocessing.dummy.Pool (also available under the undocumented, but probably more readable, name multiprocessing.ThreadPool). But if you're willing to go outside the stdlib for some module that you apparently aren't sure how to use and that I've never heard of, I'm guessing you won't have any problem using futures. So:
import futures
import urllib2
URLs = [
'http://www.cnn.com/',
'http://www.bbc.co.uk/',
'http://www.economist.com/',
'http://nonexistant.website.at.baddomain/',
'http://slashdot.org/',
'http://reddit.com/',
'http://news.ycombinator.com/'
]
def load_url(url):
return urllib2.urlopen(url).read()
if __name__ == '__main__':
with futures.ThreadPoolExecutor(max_workers=7) as executor:
fmap = dict((executor.submit(load_url, url), url) for url in URLs)
for f in futures.as_completed(fmap):
url = fmap[f]
try:
content = f.result()
except Exception as exception:
print '%r failed: %s' % (url, exception)
else:
print '%r is %d bytes' % (url, len(content))
urllib.urlretrieve(url).read() should be urllib.urlopen(url).read()
from threading import *
from time import sleep
# if Python2:
import urllib
# if Python3:
# import urllib.request
URLs = [
'http://www.cnn.com/',
'http://www.bbc.co.uk/',
'http://www.economist.com/',
'http://nonexistant.website.at.baddomain/',
'http://slashdot.org/',
'http://reddit.com/',
'http://news.ycombinator.com/'
]
class worker(Thread):
def __init__(self, link):
Thread.__init__(self)
self.link = link
self.start()
def run(self):
# if Python2:
res = urllib.urlopen(url).read() # as mentioned by #DhruvPathak
# if Python3:
# res = urllib.request.urlopen(url).read()
with open(url, 'rb') as fh:
fh.write(res) # store fetched data in a file called <link>
for url in urls:
while len(enumerate()) > 500:
sleep(0.25)
worker(url)
while len(enumerate()) > 1:
sleep(0.25) # wait for all threads to finish
What about using multiprocessing ?
Sample code:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import urllib
from multiprocessing import Pool
import os
POOL = 8
PDFS_DOWNLOAD_DIR = 'pdfs'
PDF_LINKS = sys.argv[1]
class DownloadFiles(object):
def __init__(self):
self.pdf_links = self.read_links_from_file()
self.create_download_dir()
def create_download_dir(self):
try:
if not os.path.exists(PDFS_DOWNLOAD_DIR):
os.makedirs(PDFS_DOWNLOAD_DIR)
except IOError as e:
exit()
def read_links_from_file(self):
try:
with open(PDF_LINKS, 'r') as f:
return list(set([x.strip() for x in f]))
except (IndexError, IOError) as e:
exit()
def get_file(self, link):
filename = link.split('/')[-2]
print('Downloading file --> "{filename}"'.format(
filename=filename
))
urllib.urlretrieve(link, filename='{pdfs_data}/{filename}'.format(
pdfs_data=PDFS_DOWNLOAD_DIR,
filename=filename
))
def download(self):
pool = Pool(POOL)
pool.map(self.get_file, self.pdf_links)
pool.close()
pool.join()
print('\nSuccessfully downloaded files from given source!\n')
d = DownloadFiles()
d.download()

Categories

Resources