Before I give up and opt for Selenium route, I was trying to automate this page (yeezysupply.com/products/mens-crepe-boot-oil) via Python requests till checkout but got stuck at Checkout page as Credit Form and it's requests is loaded in iFrame and submit to different url https://elb.deposit.shopifycs.com/sessions where it starts giving 500 Internal Server error. Below is the code pertaining section which I tried and did not work:
payment_method_url = r.url.split('?')
payment_method_url = payment_method_url[0]
BILLING_FIRST_NAME = 'Jon'
BILLING_LAST_NAME = 'Norman'
BILLING_ADDRESS_1 = 'G-309'
BILLING_ADDRESS_2 = 'G-309'
BILLING_CITY = 'Chicago'
BILLING_COUNTRY = 'United States'
BILLING_PROVINCE = 'Illinois'
BILLING_ZIP = '60007'
BILLING_PHONE = '149584848485'
TOTAL_PRICE = 66500
# For final Checkout
s.options('https://elb.deposit.shopifycs.com/sessions', headers=headers)
session_url = 'https://elb.deposit.shopifycs.com/sessions'
session_data = '{"credit_card":{"number":"4242 4242 4242 4242","name":"Jon Norman","month":9,"year":2019,"verification_value":"123"}}'
sleep(3)
s.headers.update({
'referer': 'https://checkout.shopifycs.com/number?identifier=eeb4fe88a0fd4063043eeb5730d460f4&location=https%3A%2F%2Fpurchase.yeezysupply.com%2F17655971%2Fcheckouts%2Feeb4fe88a0fd4063043eeb5730d460f4'})
data = {
'utf8': 'utf8',
'_method': 'patch',
'authenticity_token': authenticity_token,
'previous_step': 'payment_method',
'step': '',
's': 'east-50fb8458975b56217d7317847efb9280',
'checkout[payment_gateway]': '117647559',
'checkout[credit_card][vault]': 'false',
'checkout[payment_gateway]': '117647559',
'checkout[different_billing_address]': 'true',
'checkout[billing_address][first_name]': BILLING_FIRST_NAME,
'checkout[billing_address][last_name]': BILLING_LAST_NAME,
'checkout[billing_address][address1]': BILLING_ADDRESS_1,
'checkout[billing_address][address2]': BILLING_ADDRESS_2,
'checkout[billing_address][city]': BILLING_CITY,
'checkout[billing_address][country]': BILLING_COUNTRY,
'checkout[billing_address][province]': BILLING_PROVINCE,
'checkout[billing_address][zip]': BILLING_ZIP,
'checkout[billing_address][remember_me]': 'false',
'checkout[billing_address][remember_me]': '0',
'checkout[billing_address][remember_me_country_code]': '',
'checkout[billing_address][remember_me_phone]': '',
'checkout[billing_address][total_price]': TOTAL_PRICE,
'complete': '1',
'checkout[client_details][browser_width]': '1280',
'checkout[client_details][browser_height]': '150',
'checkout[client_details][javascript_enabled]': '1',
}
sleep(2)
r = s.post(payment_method_url+'/processing', data=data, headers=headers)
r = s.get(payment_method_url, headers=headers)
print(r.text)
Related
I'm a python beginner. I would like to ask for help regarding the retrieve the response data. Here's my script:
import pandas as pd
import re
import time
import requests as re
import json
response = re.get(url, headers=headers, auth=auth)
data = response.json()
Here's a part of json response:
{'result': [{'display': '',
'closure_code': '',
'service_offer': 'Integration Platforms',
'updated_on': '2022-04-23 09:05:53',
'urgency': '2',
'business_service': 'Operations',
'updated_by': 'serviceaccount45',
'description': 'ALERT returned 400 but expected 200',
'sys_created_on': '2022-04-23 09:05:53',
'sys_created_by': 'serviceaccount45',
'subcategory': 'Integration',
'contact_type': 'Email',
'problem_type': 'Design: Availability',
'caller_id': '',
'action': 'create',
'company': 'aaaa',
'priority': '3',
'status': '1',
'opened': 'smith.j',
'assigned_to': 'doe.j',
'number': '123456',
'group': 'blabla',
'impact': '2',
'category': 'Business Application & Databases',
'caused_by_change': '',
'location': 'All Locations',
'configuration_item': 'Monitor',
},
I would like to extract the data only for one group = 'blablabla'. Then I would like to extract fields such as:
number = data['number']
group = data['group']
service_offer = data['service_offer']
updated = data['updated_on']
urgency = data['urgency']
username = data['created_by']
short_desc = data['description']
How it should be done?
I know that to check the first value I should use:
service_offer = data['result'][0]['service_offer']
I've tried to create a dictionary, but, I'm getting an error:
data_result = response.json()['result']
payload ={
number = data_result['number']
group = data_result['group']
service_offer = data_result['service_offer']
updated = data_result['updated_on']
urgency = data_result['urgency']
username = data_result['created_by']
short_desc = data_result['description']
}
TypeError: list indices must be integers or slices, not str:
So, I've started to create something like below., but I'm stuck:
get_data = []
if len(data) > 0:
for item in range(len(data)):
get_data.append(data[item])
May I ask for help?
If data is your decoded json response from the question then you can do:
# find group `blabla` in result:
g = next(d for d in data["result"] if d["group"] == "blabla")
# get data from the `blabla` group:
number = g["number"]
group = g["group"]
service_offer = g["service_offer"]
updated = g["updated_on"]
urgency = g["urgency"]
username = g["sys_created_by"]
short_desc = g["description"]
print(number, group, service_offer, updated, urgency, username, short_desc)
Prints:
123456 blabla Integration Platforms 2022-04-23 09:05:53 2 serviceaccount45 ALERT returned 400 but expected 200
I try to write a parser based on `urllib` and `beautifulsoup` libraries, but don't understand why sometimes I get a request status 200, but sometimes 404 (with the same URL of course), moreover the URL which had been requested by `urllib` and returned 404 opens perfect manually via a browser.
Could anyone explain that behavior?
url = 'https://zakupki.gov.ru/epz/order/extendedsearch/results.html'
params = {'searchString': 'Сакубитрил',
'morphology': 'on',
'pageNumber': 1,
'sortDirection': 'false',
'recordsPerPage': '_10',
'showLotsInfoHidden': 'false',
'sortBy': 'UPDATE_DATE',
'fz44': 'on',
'fz223': 'on',
'af': 'on',
'ca': 'on',
'pc': 'on',
'pa': 'on',
'currencyIdGeneral': -1,
'publishDateFrom': '01.02.2021',
'publishDateTo': '21.02.2021'}
def parser(url, params):
attempt = 0
while attempt < 10:
try:
data = urllib.parse.urlencode(params)
full_url = url + '?' + data
with urllib.request.urlopen(full_url, timeout = 10) as response:
the_page = response.read()
soup = BeautifulSoup(the_page, 'html.parser')
return soup
except Exception: # don't forget replace Exception with something more specific
attempt += 1
time.sleep(5)
continue
data = parser(url, params=params)
My code:
DEVICE = {
'instagram_version': '26.0.0.10.86',
'android_version': 24,
'android_release': '7.0',
'dpi': '640dpi',
'resolution': '1440x2560',
'manufacturer': 'samsung',
'device': 'SM-G930F',
'model': 'herolte',
'cpu': 'samsungexynos8890'
}
USER_AGENT_BASE = (
'Instagram {instagram_version} '
'Android ({android_version}/{android_release}; '
'{dpi}; {resolution}; {manufacturer}; '
'{device}; {model}; {cpu}; en_US)'
)
user_agent = USER_AGENT_BASE.format(**DEVICE)
LOGIN_URL = 'https://i.instagram.com/api/v1/accounts/login/'
sess = requests.Session()
sess.headers.update(REQUEST_HEADERS)
sess.headers.update({'User-Agent': user_agent})
response = sess.post(LOGIN_URL, data=data)
assert response.status_code == 200
It allows me to login into an Instagram account with my original ip address. How do I change or hide my ip?
I've been trying to make a scraper to get my grades from my schools website. Unfortunately i cannot log in. When i try to run the program, the return page validates the user/password fields, and since they are blank, it's not letting me proceed.
Also, i am not really sure if I am even coding this correctly.
from twill.commands import *
import requests
payload = {
'ctl00$cphMainContent$lgn$UserName':'user',
'ctl00$cphMainContent$lgn$Password':'pass',
}
cookie = {
'En_oneTime_ga_tracking_v2' : 'true',
'ASP.NET_SessionId' : ''
}
with requests.Session() as s:
p = s.post('schoolUrl', data=payload, cookies=cookie)
print p.text
Updated payload:
payload = {
'ctl00$cphMainContent$lgnEaglesNest$UserName':'user',
'ctl00$cphMainContent$lgnEaglesNest$Password':'pass',
'__LASTFOCUS': '',
'__EVENTTARGET':'',
'__EVENTARGUMENT':'',
'__VIEWSTATE': 'LONG NUMBER',
'__VIEWSTATEGENERATOR': 'C2EE9ABB',
'__EVENTVALIDATION' : 'LONG NUMBER',
'ctl00$cphMainContent$lgnEaglesNest$RememberMe': 'on',
'ctl00$cphMainContent$lgnEaglesNest$LoginButton':'Log+In'
}
How do i know if my POST was successful?
The returned page was saying that Username/Password cannot be blank.
Complete source:
from twill.commands import *
import requests
payload = {
'ctl00$cphMainContent$lgnEaglesNest$UserName':'user',
'ctl00$cphMainContent$lgnEaglesNest$Password':'pass',
'__LASTFOCUS': '',
'__EVENTTARGET':'',
'__EVENTARGUMENT':'',
'__VIEWSTATE': 'LONG NUMBER',
'__VIEWSTATEGENERATOR': 'C2EE9ABB',
'__EVENTVALIDATION' : 'LONG NUMBER',
'ctl00$cphMainContent$lgnEaglesNest$RememberMe': 'on',
'ctl00$cphMainContent$lgnEaglesNest$LoginButton':'Log In'
}
cookie = {
'En_oneTime_ga_tracking_v2' : 'true',
'ASP.NET_SessionId' : ''
}
with requests.Session() as s:
loginUrl = 'http://eaglesnest.pcci.edu/Login.aspx?ReturnUrl=%2f'
gradeUrl = 'http://eaglesnest.pcci.edu/StudentServices/ClassGrades/Default.aspx'
p = s.post( loginUrl, data=payload)
print p.text
Your payload uses the wrong keys, try
ctl00$cphMainContent$lgnEaglesNest$UserName
ctl00$cphMainContent$lgnEaglesNest$Password
You can check the names by watching the network traffic in your browser (e.g. in Firefox: inspect element --> network --> post --> params)
In addition you need to specify which command you want to perform, i.e. which button was pressed.
payload['ctl00$cphMainContent$lgnEaglesNest$LoginButton': 'Log In']
I'm trying to convert an module that interact with Cleverbot. It's on python 2, but I would like it to be working with python 3. But I encounter an error :
parsed = [ item.split('\r') for item in self.resp.split('\r\r\r\r\r\r')[:-1]]
TypeError: 'str' does not support the buffer interface
I don't know how to resolve it ! Please help !
Here is the entire code :
"""Python library allowing interaction with the Cleverbot API."""
import hashlib
import urllib.request, urllib.parse, urllib.error
from urllib.request import *
class Cleverbot:
"""
Wrapper over the Cleverbot API.
"""
HOST = "www.cleverbot.com"
PROTOCOL = "http://"
RESOURCE = "/webservicemin"
API_URL = PROTOCOL + HOST + RESOURCE
headers = {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)',
'Accept': 'text/html,application/xhtml+xml,'
'application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept-Language': 'en-us,en;q=0.8,en-us;q=0.5,en;q=0.3',
'Cache-Control': 'no-cache',
'Host': HOST,
'Referer': PROTOCOL + HOST + '/',
'Pragma': 'no-cache'
}
def __init__(self):
""" The data that will get passed to Cleverbot's web API """
self.data = {
'stimulus': '',
'start': 'y', # Never modified
'sessionid': '',
'vText8': '',
'vText7': '',
'vText6': '',
'vText5': '',
'vText4': '',
'vText3': '',
'vText2': '',
'icognoid': 'wsf', # Never modified
'icognocheck': '',
'fno': 0, # Never modified
'prevref': '',
'emotionaloutput': '', # Never modified
'emotionalhistory': '', # Never modified
'asbotname': '', # Never modified
'ttsvoice': '', # Never modified
'typing': '', # Never modified
'lineref': '',
'sub': 'Say', # Never modified
'islearning': 1, # Never modified
'cleanslate': False, # Never modified
}
# the log of our conversation with Cleverbot
self.conversation = []
self.resp = str()
def ask(self, question):
"""Asks Cleverbot a question.
Maintains message history.
Args:
q (str): The question to ask
Returns:
Cleverbot's answer
"""
# Set the current question
self.data['stimulus'] = question
# Connect to Cleverbot's API and remember the response
try:
self.resp = self._send()
except urllib.error.HTTPError:
# request failed. returning empty string
return str()
# Add the current question to the conversation log
self.conversation.append(question)
parsed = self._parse()
# Set data as appropriate
if self.data['sessionid'] != '':
self.data['sessionid'] = parsed['conversation_id']
# Add Cleverbot's reply to the conversation log
self.conversation.append(parsed['answer'])
return parsed['answer']
def _send(self):
"""POST the user's question and all required information to the
Cleverbot API
Cleverbot tries to prevent unauthorized access to its API by
obfuscating how it generates the 'icognocheck' token, so we have
to URLencode the data twice: once to generate the token, and
twice to add the token to the data we're sending to Cleverbot.
"""
# Set data as appropriate
if self.conversation:
linecount = 1
for line in reversed(self.conversation):
linecount += 1
self.data['vText' + str(linecount)] = line
if linecount == 8:
break
# Generate the token
enc_data = urllib.parse.urlencode(self.data)
digest_txt = enc_data[9:35]
token = hashlib.md5(digest_txt.encode('utf-8')).hexdigest()
self.data['icognocheck'] = token
# Add the token to the data
enc_data = urllib.parse.urlencode(self.data)
binary_data = enc_data.encode('utf-8')
req = urllib.request.Request(self.API_URL, binary_data, self.headers)
# POST the data to Cleverbot's API
conn = urllib.request.urlopen(req)
resp = conn.read()
# Return Cleverbot's response
return resp
def _parse(self):
"""Parses Cleverbot's response"""
parsed = [ item.split('\r') for item in self.resp.split('\r\r\r\r\r\r')[:-1]]
return {
'answer': parsed[0][0],
'conversation_id': parsed[0][1],
'conversation_log_id': parsed[0][2],
'unknown': parsed[1][-1]
}
quest=input("Question: ")
cb = Cleverbot()
quest=cb.ask(quest)
print(quest)
Your self.resp is a bytestring, and you probably want to decode it before you work on it.