Im trying to make a Strawpoll Bot. I found something on Git Hub and took it, but its not able to connect to a proxy server. Either i get a timeout or it says "Couldn´t connect to proxy". I took https proxy´s from a free list.
Its not my code its frome here
My python skills are very limited
I hope someon can help me
This is the code or download to hole zip here
#!/usr/bin/env python
try:
from xml.dom import minidom
import xml.etree.cElementTree as ET
from optparse import OptionParser
import sys
import os
except ImportError as msg:
print("[-] Library not installed: " + str(msg))
exit()
try:
import requests except ImportError:
print("[-] Missing library 'requests'")
print("[*] Please install missing library with: pip install requests")
# Creator: Luis Hebendanz
class Main:
# SETTINGS
maxVotes = 1
voteFor = ""
surveyId = ""
# GLOBAL VARIABLES
proxyListFile = "D:\Feko Karels\Documents\Informatik Kram\Python\strawpoll-voting-bot-master\https-proxy-list.xml"
saveStateFile = "saveState.xml"
proxyTimeout = 10 # Seconds
currentProxyPointer = 0
successfulVotes = 0
def __init__(self):
try:
###
# Command line argument parser
###
parser = OptionParser()
parser.add_option("-v", "--votes", action="store", type="string", dest="votes",help="number of votes to give")
parser.add_option("-s", "--survey", action="store", type="string", dest="survey",help="id of the survey")
parser.add_option("-t", "--target", action="store", type="string", dest="target", help="checkbox to vote for")
parser.add_option("-f", "--flush", action="store_true", dest="flush",help="Deletes skipping proxy list")
(options, args) = parser.parse_args()
if len(sys.argv) > 2:
if options.votes is None:
print("[-] Number of votes not defined with: -v ")
exit(1)
if options.survey is None:
print("[-] Survey id not defined with: -s")
exit(1)
if options.target is None:
print("[-] Target to vote for is not defined with: -t")
exit(1)
try:
self.maxVotes = int(options.votes)
except ValueError:
print("[-] Please define an integer for -v")
# Save arguments into global variable
self.voteFor = options.target
self.surveyId = options.survey
# Flush saveState.xml
if options.flush == True:
print("[*] Flushing saveState.xml file...")
os.remove(self.saveStateFile)
# Print help
else:
print("[-] Not enough arguments given")
print()
parser.print_help()
exit()
# Read proxy list file
alreadyUsedProxy = False
xmldoc = minidom.parse(self.proxyListFile)
taglist = xmldoc.getElementsByTagName('para')
tagList2 = None
# Check if saveState.xml exists and read file
if os.path.isfile(self.saveStateFile):
xlmSave = minidom.parse(self.saveStateFile)
tagList2 = xlmSave.getElementsByTagName("usedProxy")
# Print remaining proxies
if tagList2 is not None:
print("[*] Number of remaining proxies in list: " + str(len(taglist) - len(tagList2)))
print()
else:
print("[*] Number of proxies in new list: " + str(len(taglist)))
print()
# Go through proxy list
for tag in taglist:
# Check if max votes has been reached
if self.successfulVotes >= self.maxVotes:
break
# Increase number of used proxy integer
self.currentProxyPointer += 1
# Read value out of proxy list
tagValue = tag.childNodes[0].nodeValue
# Read in saveState.xml if this proxy has already been used
if tagList2 is not None:
for tag2 in tagList2:
if tagValue == tag2.childNodes[0].nodeValue:
alreadyUsedProxy = True
break
# If it has been used print message and continue to next proxy
if alreadyUsedProxy == True:
print("["+ str(self.currentProxyPointer) +"] Skipping proxy: " + tagValue)
alreadyUsedProxy = False
continue
# Print current proxy information
print("["+ str(self.currentProxyPointer) +"] New proxy: " + tagValue)
print("[*] Connecting... ")
# Connect to strawpoll and send vote
self.sendToWebApi('https://' + tagValue)
# Write used proxy into saveState.xml
self.writeUsedProxy(tagValue)
print()
# Check if max votes has been reached
if self.successfulVotes >= self.maxVotes:
print("[+] Finished voting: " + str(self.successfulVotes))
else:
print("[+] Finished every proxy!")
exit()
except IOError as ex:
print("[-] " + ex.strerror + ": " + ex.filename)
except KeyboardInterrupt as ex:
print("[*] Saving last proxy...")
print("[*] Programm aborted")
exit()
def getClientIp(self, httpProxy):
proxyDictionary = {"https": httpProxy}
rsp = requests.get("https://api.ipify.org/", proxies=proxyDictionary)
return str(rsp.text)
def writeUsedProxy(self, proxyIp):
if os.path.isfile(self.saveStateFile):
# Read file
tree = ET.parse(self.saveStateFile)
# Get <root> tag
root = tree.getroot()
child = ET.Element("usedProxy")
child.text = str(proxyIp)
root.append(child)
# Write to file
tree.write(self.saveStateFile, encoding="UTF-8")
else:
# Create <root> tag
root = ET.Element("article")
# Get element tree
tree = ET.ElementTree(root)
# Write to file
tree.write(self.saveStateFile, encoding="UTF-8")
# Now write defined entry into file
self.writeUsedProxy(proxyIp)
def sendToWebApi(self, httpsProxy):
try:
headers = \
{
'Host': 'strawpoll.de',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0',
'Accept': '*/*',
'Accept-Language': 'de,en-US;q=0.7,en; q=0.3',
'Referer': 'https://strawpoll.de/' + self.surveyId,
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Content-Length': '29',
'Cookie': 'lang=de',
'DNT': '1',
'Connection': 'close'
}
payload = {'pid': self.surveyId, 'oids': self.voteFor}
proxyDictionary = {"https": httpsProxy}
# Connect to server
r = requests.post('https://strawpoll.de/vote', data=payload, headers=headers, proxies=proxyDictionary, timeout=self.proxyTimeout) #
json = r.json()
# Check if succeeded
if(bool(json['success'])):
print("[+] Successfully voted.")
self.successfulVotes += 1
return True
else:
print("[-] Voting failed. This Ip already voted.")
return False
except requests.exceptions.Timeout as ex:
print("[-] Timeout")
return False
except requests.exceptions.ConnectionError as ex:
print("[-] Couldn't connect to proxy")
return False
except Exception as ex:
print(str(ex))
return False
# Execute main
Main()
Related
I am trying to troubleshoot a script for Mimecast's API. The script runs fine for the most part, but a few times, I have noticed that it stops pulling logs and generally appears to be a hung process. After restarting the script and manually pushing logs to the syslog server, it starts working again without issue. I am not able to reproduce this issue at will.
The script is supposed to do the following:
Authenticate against Mimecast's API
Sign responses
download, extract and save log files to log dir
utilize a tokenized header to determine which file was downloaded in the last request. Should save the token ID within a file in the checkpoint directory
Push files to remote syslog server
Output any errors and info to console
Below is the sample code from Mimecast.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging.handlers
import json
import os
import requests
import base64
import uuid
import datetime
import hashlib
import shutil
import hmac
import time
from zipfile import ZipFile
import io
# Set up variables
APP_ID = "YOUR DEVELOPER APPLICATION ID"
APP_KEY = "YOUR DEVELOPER APPLICATION KEY"
URI = "/api/audit/get-siem-logs"
EMAIL_ADDRESS = 'EMAIL ADDRESS OF YOUR ADMINISTRATOR'
ACCESS_KEY = 'ACCESS KEY FOR YOUR ADMINISTRATOR'
SECRET_KEY = 'SECRET KEY FOR YOUR ADMINISTRATOR'
LOG_FILE_PATH = "FULLY QUALIFIED PATH TO FOLDER TO WRITE LOGS"
CHK_POINT_DIR = 'FULLY QUALIFIED PATH TO FOLDER TO WRITE PAGE TOKEN'
# Set True to output to syslog, false to only save to file
syslog_output = False
# Enter the IP address or hostname of your syslog server
syslog_server = 'localhost'
# Change this to override default port
syslog_port = 514
# delete files after fetching
delete_files = True
# Set threshold in number of files in log file directory
log_file_threshold = 10000
# Set up logging (in this case to terminal)
log = logging.getLogger(__name__)
log.root.setLevel(logging.DEBUG)
log_formatter = logging.Formatter('%(levelname)s %(message)s')
log_handler = logging.StreamHandler()
log_handler.setFormatter(log_formatter)
log.addHandler(log_handler)
# Set up syslog output
syslog_handler = logging.handlers.SysLogHandler(address=(syslog_server, syslog_port))
syslog_formatter = logging.Formatter('%(message)s')
syslog_handler.setFormatter(syslog_formatter)
syslogger = logging.getLogger(__name__)
syslogger = logging.getLogger('SysLogger')
syslogger.addHandler(syslog_handler)
# Supporting methods
def get_hdr_date():
return datetime.datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UTC")
def read_file(file_name):
try:
with open(file_name, 'r') as f:
data = f.read()
return data
except Exception as e:
log.error('Error reading file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
def write_file(file_name, data_to_write):
if '.zip' in file_name:
try:
byte_content = io.BytesIO(data_to_write)
zip_file = ZipFile(byte_content)
zip_file.extractall(LOG_FILE_PATH)
except Exception as e:
log.error('Error writing file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
else:
try:
with open(file_name, 'w') as f:
f.write(data_to_write)
except Exception as e:
log.error('Error writing file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
def get_base_url(email_address):
# Create post body for request
post_body = dict()
post_body['data'] = [{}]
post_body['data'][0]['emailAddress'] = email_address
# Create variables required for request headers
request_id = str(uuid.uuid4())
request_date = get_hdr_date()
headers = {'x-mc-app-id': APP_ID, 'x-mc-req-id': request_id, 'x-mc-date': request_date}
# Send request to API
log.debug('Sending request to https://api.mimecast.com/api/discover-authentication with request Id: ' +
request_id)
try:
r = requests.post(url='https://api.mimecast.com/api/login/discover-authentication',
data=json.dumps(post_body), headers=headers)
# Handle Rate Limiting
if r.status_code == 429:
log.warning('Rate limit hit. sleeping for ' + str(r.headers['X-RateLimit-Reset'] * 1000))
time.sleep(r.headers['X-RateLimit-Reset'] * 1000)
except Exception as e:
log.error('Unexpected error getting base url. Cannot continue.' + str(e))
quit()
# Handle error from API
if r.status_code != 200:
log.error('Request returned with status code: ' + str(r.status_code) + ', response body: ' +
r.text + '. Cannot continue.')
quit()
# Load response body as JSON
resp_data = json.loads(r.text)
# Look for api key in region region object to get base url
if 'region' in resp_data["data"][0]:
base_url = resp_data["data"][0]["region"]["api"].split('//')
base_url = base_url[1]
else:
# Handle no region found, likely the email address was entered incorrectly
log.error(
'No region information returned from API, please check the email address.'
'Cannot continue')
quit()
return base_url
def post_request(base_url, uri, post_body, access_key, secret_key):
# Create variables required for request headers
request_id = str(uuid.uuid4())
request_date = get_hdr_date()
unsigned_auth_header = '{date}:{req_id}:{uri}:{app_key}'.format(
date=request_date,
req_id=request_id,
uri=uri,
app_key=APP_KEY
)
hmac_sha1 = hmac.new(
base64.b64decode(secret_key),
unsigned_auth_header.encode(),
digestmod=hashlib.sha1).digest()
sig = base64.encodebytes(hmac_sha1).rstrip()
headers = {
'Authorization': 'MC ' + access_key + ':' + sig.decode(),
'x-mc-app-id': APP_ID,
'x-mc-date': request_date,
'x-mc-req-id': request_id,
'Content-Type': 'application/json'
}
try:
# Send request to API
log.debug('Sending request to https://' + base_url + uri + ' with request Id: ' + request_id)
r = requests.post(url='https://' + base_url + uri, data=json.dumps(post_body), headers=headers)
# Handle Rate Limiting
if r.status_code == 429:
log.warning('Rate limit hit. sleeping for ' + str(r.headers['X-RateLimit-Reset'] * 1000))
time.sleep(r.headers['X-RateLimit-Reset'] * 1000)
r = requests.post(url='https://' + base_url + uri, data=json.dumps(post_body), headers=headers)
# Handle errors
except Exception as e:
log.error('Unexpected error connecting to API. Exception: ' + str(e))
return 'error'
# Handle errors from API
if r.status_code != 200:
log.error('Request to ' + uri + ' with , request id: ' + request_id + ' returned with status code: ' +
str(r.status_code) + ', response body: ' + r.text)
return 'error'
# Return response body and response headers
return r.content, r.headers
def get_mta_siem_logs(checkpoint_dir, base_url, access_key, secret_key):
uri = "/api/audit/get-siem-logs"
# Set checkpoint file name to store page token
checkpoint_filename = os.path.join(checkpoint_dir, 'get_mta_siem_logs_checkpoint')
# Build post body for request
post_body = dict()
post_body['data'] = [{}]
post_body['data'][0]['type'] = 'MTA'
post_body['data'][0]['compress'] = True
if os.path.exists(checkpoint_filename):
post_body['data'][0]['token'] = read_file(checkpoint_filename)
# Send request to API
resp = post_request(base_url, uri, post_body, access_key, secret_key)
now = datetime.datetime.now().strftime("%a %b %d %H:%M:%S %Y")
# Process response
if resp != 'error':
resp_body = resp[0]
resp_headers = resp[1]
content_type = resp_headers['Content-Type']
# End if response is JSON as there is no log file to download
if content_type == 'application/json':
log.info('No more logs available')
return False
# Process log file
elif content_type == 'application/octet-stream':
file_name = resp_headers['Content-Disposition'].split('=\"')
file_name = file_name[1][:-1]
# Save files to LOG_FILE_PATH
write_file(os.path.join(LOG_FILE_PATH, file_name), resp_body)
# Save mc-siem-token page token to check point directory
write_file(checkpoint_filename, resp_headers['mc-siem-token'])
try:
if syslog_output is True:
for filename in os.listdir(LOG_FILE_PATH):
file_creation_time = time.ctime(os.path.getctime(LOG_FILE_PATH + "/" + filename))
if now < file_creation_time or now == file_creation_time:
log.info('Loading file: ' + filename + ' to output to ' + syslog_server + ':' + str(syslog_port))
with open(file=os.path.join(LOG_FILE_PATH, filename), mode='r', encoding='utf-8') as log_file:
lines = log_file.read().splitlines()
for line in lines:
syslogger.info(line)
log.info('Syslog output completed for file ' + filename)
except Exception as e:
log.error('Unexpected error writing to syslog. Exception: ' + str(e))
# return true to continue loop
return True
else:
# Handle errors
log.error('Unexpected response')
for header in resp_headers:
log.error(header)
return False
def run_script():
# discover base URL
try:
base_url = get_base_url(email_address=EMAIL_ADDRESS)
except Exception as e:
log.error('Error discovering base url for ' + EMAIL_ADDRESS + ' . Exception: ' + str(e))
quit()
# Request log data in a loop until there are no more logs to collect
try:
log.info('Getting MTA log data')
while get_mta_siem_logs(checkpoint_dir=CHK_POINT_DIR, base_url=base_url, access_key=ACCESS_KEY,
secret_key=SECRET_KEY) is True:
log.info('Getting more MTA log files')
except Exception as e:
log.error('Unexpected error getting MTA logs ' + (str(e)))
file_number = len([name for name in os.listdir(LOG_FILE_PATH) if os.path.isfile(name)])
if delete_files or file_number >= log_file_threshold:
for filename in os.listdir(LOG_FILE_PATH):
file_path = os.path.join(LOG_FILE_PATH, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))
quit()
# Run script
run_script()
It seems like it may potentially be a race condition but I am not sure how to confirm since I can't reproduce it. I notice that SumoLogic has a modified version of this script as well with a different methodology for managing the files/paths. If this script works better than the main sample script above, would anybody be able to explain WHY? I haven't had any issues with it yet.
https://github.com/SumoLogic/sumologic-content/blob/master/MimeCast/SumoLogic-Mimecast-Data-Collection/siem_collection.py
I have the following script working properly. It is to check IP information from netbox API. I would like to know what to add so I can import a list of IPs and run it against the script:
#!/bin/python3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) #Disable warning for SSL Error
ip_address = input("Enter the IP Address you want to search: ")
apiBaseUrl = "https://netbox.local/api"
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': 'Token 5c915999999998ad82112b3b5880199769894421' #Here you can add your own token
}
def get_hostInterfaceDescription(ip4):
resp = requests.get(apiBaseUrl + '/ipam/ip-addresses/?q=' + ip4,headers=headers,verify=False).json()
return resp['results'][0]["description"] #this gets the description information
try:
desc = get_hostInterfaceDescription(ip_address)
print("")
print("Description found in Netbox: " + desc)
except TypeError or IndexError:
print("Description Not found")
def get_hostInterfaceTenant(ip4):
resp = requests.get(apiBaseUrl + '/ipam/ip-addresses/?q=' + ip4,headers=headers,verify=False).json()
return resp['results'][0]["tenant"]["name"] #this gets the description information
try:
tenant = get_hostInterfaceTenant(ip_address)
print("")
print("Tenant found in Netbox: " + tenant)
except TypeError or IndexError:
print("Tenant Not found")
def get_hostInterfaceVRF(ip4):
resp = requests.get(apiBaseUrl + '/ipam/ip-addresses/?q=' + ip4, headers=headers, verify=False).json()
return resp['results'][0]["tenant"]["name"] # this gets the description information
try:
vrf = get_hostInterfaceVRF(ip_address)
print("")
print("VRF found in Netbox: " + vrf)
except TypeError or IndexError:
print("VRF Not Found")
Loading a text file and looping over its contents is simple in Python:
with open("ip_list.txt") as ip_list:
for address in ip_list:
address = address.strip() # remove trailing newline
do_something_with(address)
In your case, do_something_with might look like this:
def do_something_with(ip4):
try:
desc = get_hostInterfaceDescription(ip4)
...
except ...:
...
try:
tenant = get_hostInterfaceTenant(ip4)
...
except ...:
...
...
I am observing that with python requests module, HTTP keep-alive is not being honored.
I dont see Acks for keep-alive being sent from the host where i am running the python script.
Please let me know how it can be fixed.Following is my code:
import json
import requests
import logging
import sys
import time
from threading import Thread
logging.basicConfig(level=logging.DEBUG)
class NSNitro:
def __init__(self,*args):
if len(args) > 2:
self.ip = args[0]
self.username = args[1]
self.password = args[2]
self.session_id = None
url = 'http://'+self.ip+'/nitro/v1/config/login'
payload = { "login": { "username":"nsroot", "password":"nsroot" }}
headers = {"Content-type": "application/json", 'Connection': 'keep-alive'}
try:
r = requests.post(url=url,headers=headers,data=json.dumps(payload),timeout=5)
logging.info(r.json()["sessionid"])
if(r.json()["sessionid"] != None):
self.session_id = r.json()["sessionid"]
except requests.exceptions.RequestException:
logging.critical("Some error occurred during connection")
else:
logging.error("Not sufficient parameters provided.Required : ipaddress , username , password")
def install_build(self,build_url):
url = 'http://ip/nitro/v1/config/install'
headers = {"Content-type": "application/json","Connection": "keep-alive"}
payload = {"install": {"url": build_url}}
try:
cookie = {"NITRO_AUTH_TOKEN": self.session_id}
r = requests.post(timeout=5, url=url, data=json.dumps(payload), headers=headers,cookies=cookie)
except requests.exceptions.RequestException:
print("Connection Error occurred")
raise '''this will give details of exception'''
else:
assert r.status_code == 201, "Status code seen: " + str(r.status_code) + "\n" + "Error message from system: " + \
r.json()["message"]
print("Successfully triggered job on device to install build")
def __del__(self):
logging.debug("Deleted the object")
if __name__ == '__main__':
ns_session = NSNitro(ip,username,password)
url_i = 'https://myupload-server.net/build-13.0-480.16.tgz'
t1 = Thread(target=ns_session.install_build,args=(url_i,))
t1.start()
''' while t1.is_alive():
t2 = Thread(target=ns_session.get_installed_version,)
t2.start()
t2.join()'''
time.sleep(100)
logging.info("Install thread completed")
t1.join()
ns_session.logout()
When the request is posted using curl command, the acks are sent in specified keep-alive intervals. Without ack being sent , server is resetting the connection.
I have problem with downloading videos from my server e.g. http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro%20USLUGE.mp4
All works perfectly when internet is OK, but when I disconnect LAN cable from Raspberry Pi and stay like that less than 10-15 seconds. But when internet is off more than 10-15 seconds, my download does not continue or videos are not properly downloaded (I merge them later with MP4Box and they need to be). If someone has suggestion how to solve this problem and help me I would appreciate it very much.
Here is my code:
import os
import urllib
import urllib2
import time
import commands
import requests
import shutil
from urllib2 import URLError
urls = ['http://screensfiles.dbtouch.com/screens2/Companies/89/HD/00 APPS OVERVIEW.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/ILIRIJA BIOGRAD 2016.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Restoran marina.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/HT Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Croatia Full of life.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/04 PROJECTS.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/05 ATTEND.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro Hotel.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4']
directory = "/home/pi/pythonSignage/current_playlist/videos_to_merge/"
i=1
for url in urls:
i += 1
print("current iter: ")
print(i)
if (len(urls) > 1):
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "") + " "
else:
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "")
url_formatted_name = url.split('/')[-1].replace(" ", "").replace("%20", "").rstrip()
while True:
print("inside while true")
try:
""" method 0 doesn't work """
print("try")
response = urllib2.urlopen(url, timeout=5)
content = response.read()
print("content")
f = open(directory + url_formatted_name, 'wb')
f.write(content)
f.close()
""" method 1 doesn't work """
#video_fancy_downloader = urllib.FancyURLopener()
#video_fancy_downloader.retrieve(url, directory + url_formatted_name)
""" method 2 - doesn't work """
#my_file = urllib.URLopener()
#my_file = retrieve(url, directory + url_formatted_name)
""" method 3 - doesn't work """
#response = requests.get(url, stream=True)
#response.raise_for_status()
#with open(directory + url_formatted_name, 'wb') as handle:
# for block in response.iter_content(1024):
# handle.write(block)
except:
print("error download, sleep 5 sec")
time.sleep(5)
print("end")
I have managed to solve my problem. Maybe this is not best approach but it works.
Here is function for downloading video and returns response:
def do_download(destination, url):
comm = ["wget", "-c", "-O", destination, "-t", "15000", "-T", "5", url]
proc = subprocess.Popen(comm, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
tmp = proc.stdout.read()
if "wget: unable to resolve host address" in tmp:
return False
else:
return True
core part of downloading function is almost the same, but now it calls do_download inside while loop and checks for response:
if os.path.isfile(directory+url_formatted_name) is False:
print("must download file!")
while downl_success is False:
print("inside while true")
try:
print("try")
while(do_download(directory + url_formatted_name, url) is False):
print(" ------- inside while for download ----------- ")
time.sleep(5)
downl_success = True
print("file downloaded fully!")
break
except HTTPError, e:
print "HTTPError", e.code, url
time.sleep(5)
except URLError, e:
print "URL Error", e.reason, url
time.sleep(5)
else:
print("file already downloaded no need to download it again!")
i have some text file which contain proxy ip .
which look like following
130.14.29.111:80
130.14.29.120:80
130.159.235.31:80
14.198.198.220:8909
141.105.26.183:8000
160.79.35.27:80
164.77.196.75:80
164.77.196.78:45430
164.77.196.78:80
173.10.134.173:8081
174.132.145.80:80
174.137.152.60:8080
174.137.184.37:8080
174.142.125.161:80
after processing check this proxy , then i want to marked as following
total number of '0' = 8
total number of 'x' = 6
percentage = alive 60% , dead 40%
x 130.14.29.111:80
0 130.14.29.120:80
0 130.159.235.31:80
0 14.198.198.220:8909
0 141.105.26.183:8000
0 160.79.35.27:80
x 164.77.196.75:80
x 164.77.196.78:45430
x 164.77.196.78:80
0 173.10.134.173:8081
0 174.132.145.80:80
0 174.137.152.60:8080
x 174.137.184.37:8080
x 174.142.125.161:80
how can be done with python? or some sample
if anyone would help me or enlight me much aprreciate!
i was edited
this is script source of what i have
finally check finished proxy list are saved to 'proxy_alive.txt'
in this file i want to mark whether proxy element alive or not.
import socket
import urllib2
import threading
import sys
import Queue
import socket
socket.setdefaulttimeout(7)
print "Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout())
#input_file = sys.argv[1]
#proxy_type = sys.argv[2] #options: http,s4,s5
#output_file = sys.argv[3]
input_file = 'proxylist.txt'
proxy_type = 'http'
output_file = 'proxy_alive.txt'
url = "www.seemyip.com" # Don't put http:// in here, or any /'s
check_queue = Queue.Queue()
output_queue = Queue.Queue()
threads = 20
def writer(f,rq):
while True:
line = rq.get()
f.write(line+'\n')
def checker(q,oq):
while True:
proxy_info = q.get() #ip:port
if proxy_info == None:
print "Finished"
#quit()
return
#print "Checking %s"%proxy_info
if proxy_type == 'http':
try:
listhandle = open("proxylist.txt").read().split('\n')
for line in listhandle:
saveAlive = open("proxy_alive.txt", 'a')
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
oq.put(proxy_info)
print '[+] alive proxy' , proxy_info
saveAlive.write(line)
saveAlive.close()
except urllib2.HTTPError,e:
print 'url open error? slow?'
pass
except Exception,detail:
print '[-] bad proxy' ,proxy_info
else:
# gotta be socks
try:
s = socks.socksocket()
if proxy_type == "s4":
t = socks.PROXY_TYPE_SOCKS4
else:
t = socks.PROXY_TYPE_SOCKS5
ip,port = proxy_info.split(':')
s.setproxy(t,ip,int(port))
s.connect((url,80))
oq.put(proxy_info)
print proxy_info
except Exception,error:
print proxy_info
threading.Thread(target=writer,args=(open(output_file,"wb"),output_queue)).start()
for i in xrange(threads):
threading.Thread(target=checker,args=(check_queue,output_queue)).start()
for line in open(input_file).readlines():
check_queue.put(line.strip('\n'))
print "File reading done"
for i in xrange(threads):
check_queue.put(None)
raw_input("PRESS ENTER TO QUIT")
sys.exit(0)
You can use a queue to queue all the list of address and their meta information.
After you are done with your operation on this ip addresses, you can write it back to the same file with 'w' mode.
[ ( ip-address-1,'x' ), ( ip-address-2, '0'), ...... ]