I'm trying to write a Python-Script which makes it possible to submit responses in Google-Forms like this one:
https://docs.google.com/forms/d/152CTd4VY9pRvLfeACOf6SmmtFAp1CL750Sx72Rh6HJ8/viewform
But how to I actually send the POST and how can I find out, what this POST should actually contain?
First pip install requests
You have to post some specific form data to a specific url,you can use requests.The form_data dict params are correspondent to options,if you don't need some options,just remove it from form_data.
import requests
url = 'https://docs.google.com/forms/d/152CTd4VY9pRvLfeACOf6SmmtFAp1CL750Sx72Rh6HJ8/formResponse'
form_data = {'entry.2020959411':'18+ sollte absolute Pflicht sein',
'entry.2020959411':'Alter sollte garkeine Rolle spielen',
'entry.2020959411':'17+ wäre für mich vertretbar',
'entry.2020959411':'16+ wäre für mich vertretbar',
'entry.2020959411':'15+ wäre für mich vertretbar',
'entry.2020959411':'Ausnahmen von der Regel - Dafür?',
'entry.2020959411':'Ausnahmen von der Regel - Dagegen?',
'entry.2020959411':'__other_option__',
'entry.2020959411.other_option_response':'test',
'draftResponse':[],
'pageHistory':0}
user_agent = {'Referer':'https://docs.google.com/forms/d/152CTd4VY9pRvLfeACOf6SmmtFAp1CL750Sx72Rh6HJ8/viewform','User-Agent': "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.52 Safari/537.36"}
r = requests.post(url, data=form_data, headers=user_agent)
Based on the answer from #pigletfly I wrote a little script for harvesting the field names (for a text-field only form)
import urllib.request
from bs4 import BeautifulSoup
import requests, warnings
def get_questions(in_url):
res = urllib.request.urlopen(in_url)
soup = BeautifulSoup(res.read(), 'html.parser')
get_names = lambda f: [v for k,v in f.attrs.items() if 'label' in k]
get_name = lambda f: get_names(f)[0] if len(get_names(f))>0 else 'unknown'
all_questions = soup.form.findChildren(attrs={'name': lambda x: x and x.startswith('entry.')})
return {get_name(q): q['name'] for q in all_questions}
def submit_response(form_url, cur_questions, verbose=False, **answers):
submit_url = form_url.replace('/viewform', '/formResponse')
form_data = {'draftResponse':[],
'pageHistory':0}
for v in cur_questions.values():
form_data[v] = ''
for k, v in answers.items():
if k in cur_questions:
form_data[cur_questions[k]] = v
else:
warnings.warn('Unknown Question: {}'.format(k), RuntimeWarning)
if verbose:
print(form_data)
user_agent = {'Referer':form_url,
'User-Agent': "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.52 Safari/537.36"}
return requests.post(submit_url, data=form_data, headers=user_agent)
You can then use the get_questions function to get the fields which you can fill in
TEST_FORM_URL = "https://docs.google.com/forms/d/e/1FAIpQLSfBmvqCVeDA7IZP2_mw_HZ0OTgDk2a0JN4VlY5KScECWC-_yw/viewform"
anno_questions = get_questions(TEST_FORM_URL)
To get the questions (fields) as a dict
{'annotator': 'entry.756364489',
'task': 'entry.1368373366',
'item_id': 'entry.84713541',
'label': 'entry.2072511216',
'session': 'entry.2021127767',
'time': 'entry.1122475936'}
then use the submit_response with keyword arguments to submit
submit_response(TEST_FORM_URL, anno_questions, annotator="TestUser", item_id = 0)
Here is my script which works:
import urllib
import urllib2
user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
header={'User-Agent' : user_agent}
url = "http://....Your google form"
# values from your form. You will need to include any hidden variables if you want to..
values= {
'entry.asdfsdfsdasd': 'asdfasdfsd',
'draftResponse':'[,,"-asdfasdasdf"]',
'pageHistory':'0',
'fbzx':'-asdfasdfsd'
}
data = urllib.urlencode(values)
urllib2.Request(url, data, header)
I would use urllib2 and urllib to send the post.
Do something like this:
import urllib2, urllib
import cookielib
cookieJar = cookielib.LWPCookieJar()
opener = urllib2.build_opener(
urllib2.HTTPCookieProcessor(self.cookieJar), # Create Opener
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0))
# Add Headers
opener.addheaders = [('User-agent', "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36")]
forms = {
"formname": value, # The forms name and the selected value you want
"formname2": value2,
}
data = urllib.urlencode(forms) # Encode data
req = urllib2.Request('http://www.example.com',data) # Send Request
res = opener.open(req) # Open Request
html = res.read() # Read Response
you should structure it a bit like that.
To get the form names you need to look at the source code of the site and find the names of the forms you want to enter into and submit.
Hope this Helps
Good Luck:)
Related
This code is from a separate submission.
If you look at the lines :
each[AuthorString]
each[Title]
im wondering where the user got these variables from?
I navigated to the json page
Link
and could not find these variables? maybe im in the wrong page? Screenshots will help
here is the code
import requests
session_ids = ['13619' ,'13736']
for session_id in session_ids:
url = 'https://cdn-solr.asco.org/solr/ml/mlselect'
payload = '?_format=json&wt=json&indent=true&q=SessionId:' + session_id + '&start=0&rows=30&sort=score%20desc,%20SessionId%20asc&fq=RecordType:sessions&facet=true&f.Year.facet.sort=index&facet.field={!key=Year}Year&facet.field={!key=subject_thes}subject_thes&facet.field={!key=MediaTypes}MediaTypes&facet.field={!key=fctSessionType}fctSessionType&facet.pivot={!key=MeetingName}fctMeetingName,fctTrack&spellcheck.maxCollationTries=100'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36'}
jsonData = requests.get(url+payload, headers=headers).json()
sessionParticipationID = jsonData['response']['docs'][0]['SessionParticipationID']
session_id_list = '%20OR%20'.join(sessionParticipationID)
payload = '?_format=json&wt=json&indent=true&sort=PresentationOrderWithinSession%20asc,%20ISODateString%20asc,%20ISODateStringEnd%20asc&fl=_id,%20score,%20ISODateString,%20ISODateStringEnd,%20ISODateString_1,%20ISODateStringEnd_1,%20Year,%20Title,%20tempAbstractID,%20MediaID,%20VideoID,%20EdBookID,%20edBookTitle,%20PosterID,%20edBookTitle,%20SessionTitle,%20SessionTypeId,%20AuthorString,%20AbstID,%20Role,%20FullName,%20PosterBoard,%20Institution,%20ProgramTitle,%20MeetingName,%20FirstAuthor&q=_id:(' + session_id_list + ')&rows=' + str(len(sessionParticipationID))
jsonData = requests.get(url+payload, headers=headers).json()
title_auth = [] #<-- to make a list of {title:author} dictionary
for each in jsonData['response']['docs']:
title = each['Title'] #this line
author = each['AuthorString'] #and this
I'm getting 'HTTP Error 405: Method Not Allowed' error. My code is
import urllib.request
import urllib.parse
try:
url = 'https://www.google.com/search'
values = {'q': 'python programming tutorials'}
data = urllib.parse.urlencode(values)
data = data.encode('utf-8') # data should be bytes
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
req = urllib.request.Request(url, data, headers = headers)
resp = urllib.request.urlopen(req)
print("HERE")
respData = resp.read()
saveFile = open('withHeaders.txt', 'w')
saveFile.write(str(respData))
saveFile.close()
except Exception as e:
print(e)
The error I guess is in req = urllib.request.Request(url, data, headers = headers). What is the error, syntactical? What should be changed in code? And any conceptual mistake do correct me.
EDIT
Concept:
def URLRequest(url, params, method="GET"):
if method == "POST":
return urllib2.Request(url, data=urllib.urlencode(params))
else:
return urllib2.Request(url + "?" + urllib.urlencode(params))
You can use Requests library instead. It's much cleaner than urllib
import requests
q = 'Whatever you want to search'
url = 'https://www.google.com/search'
response = requests.get(url+'?'+'q='+q)
saveFile = open('response.txt', 'w')
savefile.write(response.text)
savefile.close()
Or if you want to stick to the urllib , you can do this:
import urllib.request
url = 'https://www.google.com/search'
q = 'Search Query'
headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"}
request = urllib.request.Request(url+'?'+'q='+q, headers=headers)
response = urllib.request.urlopen(request).read() # the text of the response is here
saveFile = open('withHeaders.txt', 'w')
saveFile.write(str(response))
saveFile.close()
Here in reference to www.pythonforbeginners
# Importing the module
import urllib.request
# your search text
text="hi google"
# Define the url
url = 'http://www.google.com/#q='+text
# Add your headers
headers = {'User-Agent' : 'Mozilla 5.10'}
# Create the Request.
request = urllib.request.Request(url, None, headers)
# Getting the response
response = urllib.request.urlopen(request)
# Print the headers
print (response.read())
How can I log in tumblr using requests in python3?
Here is my code but it dosn't work well and go back to the login page.
I used request.post to post a log-in form data, and failed.
import requests
from bs4 import BeautifulSoup
start_url = 'https://www.tumblr.com'
# set a session for request
s = requests.Session()
s.headers.update({'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0', 'accept-language': 'zh-CN,zh;'}
)
# get the form_key for login_in
r = s.get(start_url)
login_soup = BeautifulSoup(r.text, 'lxml')
hidden_div = login_soup.find('div', class_='form_row_hidden').find_all('input')
key_dict = {}
for input_tag in hidden_div:
tmp_dict = input_tag.attrs
key_dict.update({tmp_dict['name']: tmp_dict['value']})
user_data_dict = {'determine_email': '×××××××××',
'user[email]': '××××××××',
'user[password]': '××××××××',
'user[age]': '',
'tumblelog[name]': ''}
key_dict.update(user_data_dict)
# log in tumblr
r_login=s.post(start_url, headers=headers, data=key_dict)
home_soup=BeautifulSoup(r.text, 'lxml')
print(home_soup)
# the output is still the log-in page.
You're nearly to target.
Firstly, you have to make a request to tumblr login page (https://tumblr.com/login). (You did)
Then, you have to parse html page and get form_key value. This value is used to make a real login.
Finally, make a post request, with the payload:
{'user[email]': your_mail,
'user[password]': your_pass,
'form_key': form_key
}
Below is sample code in python 2, but I'm not using BeautifulSoup ( you asked to use requests only ;)
In [1]: import requests
In [2]: from lxml import html
In [3]: url = 'https://www.tumblr.com/login'
In [4]: ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36'
In [5]: headers = {'User-Agent': ua}
In [6]: s = requests.session()
In [7]: lg = s.post(url, headers=headers)
In [8]: lg_html = html.fromstring(str(lg.text))
In [9]: form_key = lg_html.xpath("//meta[#name='tumblr-form-key']/#content")[0]
In [10]: payload = {'user[email]': 'your_mail',
....: 'user[password]': 'your_pass',
....: 'form_key': form_key}
In [11]: # real login
In [12]: s.post(url, headers=headers, data=payload)
Out[12]: <Response [200]>
In [13]: print s.get('https://www.tumblr.com/svc/post/get_post_form_builder_data').text
{"meta":{"status":200,"msg":"OK"},"response":{"channels":[{"name":"your_name","tags":[]}],"limits":{"videoSecondsRemaining":300,"preuploadPhotoUsed":0,"preuploadAudioUsed":0,"inlineEmbedsPerPost":5}}}
import requests
from bs4 import BeautifulSoup
def findPosts():
url = 'http://espn.go.com/nba/scoreboard'
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.3"
soup = BeautifulSoup(requests.get(url, headers = headers).text, "html.parser")
team1 = soup.find_all('a',{'name' : "&lpos=nba:scoreboard:team"})
score1 = soup.find_all('td',{'class' : 'total'})
print(team1)
print(score1)
findPosts()
I am receiving an empty list, but I am sure that the url source code contains the elements that I specified. Is there anything in the BeautifulSoup Documentation that I am using?
The data on that page is dynamically created through Javascript. If you right click in your browser -> view source, and look for the anchors with the name you provided, you will find nothing.
From what I can tell, all of the JSON data for the page to be created is already on the page, so you don't need to make any extra requests to get the data you want.
To find the JSON data on the page, I searched for one of the team names (Mavericks) and saw a massive Javascript object containing what appears to be the data you want to scrape.
You can extract the json using regex and access the data using dict notation:
from bs4 import BeautifulSoup
import requests
import re
import json
url='http://espn.go.com/nba/scoreboard'
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.3"
soup = BeautifulSoup(requests.get(url, headers = headers).text, "html.parser")
script = soup.find_all('script')[7].get_text()
map_search = re.search('^.*?= (\{.*);window.*', script)
mapData = map_search.group(1)
mapDataObj = json.loads(mapData)
scores = mapDataObj['events'][0]['competitions'][0]['competitors'][1]['linescores']
name = mapDataObj['events'][0]['competitions'][0]['competitors'][1]['team']['shortDisplayName']
total_score = mapDataObj['events'][0]['competitions'][0]['competitors'][1]['score']
print 'Team: %s' % name
for score in scores:
print('Score: %s' % score['value'])
print('Total score: %s' % total_score)
Output :
Team: Pacers
Score: 19
Score: 24
Score: 27
Score: 30
Total score: 100
l feel puzzled.
My idea is that I want to send request to the url, and then extract the POST data in the web page, and then sent to the web page.When l used the urllib.request in python,l failed,but instead that l used the requests,it works!
Please tell me why....
Here is the code,and the annotation is code which l used urllib.request
import urllib.request
import http.cookiejar
import re
import requests
loginUrl='https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn'
#Here is the urllib.request code
#cookies=http.cookiejar.MozillaCookieJar()
#handler=urllib.request.HTTPCookieProcessor(cookies)
#opener=urllib.request.build_opener(handler)
headers={
'Origin': 'http://passport.csdn.net',
'Referer':'http://passport.csdn.net/account/login?from=http%3A%2F%2Fmy.csdn.net%2Fmy%2Fmycsdn',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36 LBBROWSER'
}
#Here is the requests code
s = requests.Session()
data = s.get(loginUrl)
data = data.text
#request = urllib.request.Request(loginUrl)
#response = urllib.request.urlopen(request)
#data = response.read().decode('utf-8')
#l get the value of lt and execution from the web page
pattern_lt = re.compile('<input type="hidden" name="lt" value="(.*?)" />',re.S)
lt = re.findall(pattern_lt,data)
lt = lt[0]
pattern_exe = re.compile('<input type="hidden" name="execution" value="(.*?)" />',re.S)
exe = re.findall(pattern_exe,data)
exe = exe[0]
postDict = {
'username':'qinyufeng_hdq#163.com',
'password':'csdn690076598',
'lt':lt,
'execution':exe,
'_eventId':'submit'
}
r = s.post(loginUrl, data=postDict)
#postData = urllib.parse.urlencode(postDict).encode()
#request = urllib.request.Request(loginUrl, postData,headers)
#response = opener.open(request)
#data = response.read().decode('UTF-8')
print (r.text)
l'm not good at English and l hope you get my idea and thank you for reading my problem.