edit page on Wikipedia, badtoken - python

I want to login and then edit a page. I can get edittoken, but when I pass it to action=edit, it says badtoken. Do I miss anything?
__apiJson__ = "https://zh.wikipedia.org/w/api.php?format=json&"
def login():
username = "???" #use your own
password = "???" #use your own
username = urllib.parse.quote(username)
password = urllib.parse.quote(password)
r = requests.post(__apiJson__ + "action=login&lgname={username}&lgpassword={password}".
format(username=username,password=password))
content = r.json()
content = content['login']
if(content['result'] == 'NeedToken'):
cookies = r.cookies
token = content['token']
r = requests.post(__apiJson__ + "action=login&lgname={username}&lgpassword={password}&lgtoken={token}".
format(username=username,password=password,token=token),cookies=cookies)
content = r.json()
content = content['login']
assert content['result']=="Success"
return r.cookies
def getEditToken(archivePage):
r = requests.post(__apiJson__ + "action=query&prop=info&intoken=edit&titles={title}".format(title=archivePage),cookies=cookies)
content = r.json()
content = content['query']['pages']
content = content[list(content.keys())[0]]
editToken = content['edittoken']
if(editToken == '+\\'):
raise Exception()
return editToken
cookies = login()
editToken = getEditToken("User:???/test") #use your own
editToken=urllib.parse.quote(editToken)
r = requests.post("https://zh.wikipedia.org/w/api.php?action=edit&format=json&title=User:Gqqnb/沙盒&text=helloworld&summary=test&token={token}".format(token=editToken),
cookies=cookies,headers={"Content-Type":"application/x-www-form-urlencoded"})
content = r.json()

I answer my own question. Just use requests.session(), rather than the sessionless requests.get and post.

Related

Export users with criteria - Python script

The script I have is exporting all users but I am looking to export users who have a type = xyz. There are two types of users in the directory such as type a and type b and i only want to export users who have type attribute matches b.
Please help me to add a clause/statement in the script so it should only pull users with Type "B" and ignore other users with ant other type.
import requests
import json
import re
import sys
import csv
orgName = ""
apiKey = ""
api_token = "SSWS "+ apiKey
headers = {'Accept':'application/json','Content-Type':'application/json','Authorization':api_token}
def GetPaginatedResponse(url):
response = requests.request("GET", url, headers=headers)
returnResponseList = []
responseJSON = json.dumps(response.json())
responseList = json.loads(responseJSON)
returnResponseList = returnResponseList + responseList
if "errorCode" in responseJSON:
print "\nYou encountered following Error: \n"
print responseJSON
print "\n"
return "Error"
else:
headerLink= response.headers["Link"]
while str(headerLink).find("rel=\"next\"") > -1:
linkItems = str(headerLink).split(",")
nextCursorLink = ""
for link in linkItems:
if str(link).find("rel=\"next\"") > -1:
nextCursorLink = str(link)
nextLink = str(nextCursorLink.split(";")[0]).strip()
nextLink = nextLink[1:]
nextLink = nextLink[:-1]
url = nextLink
response = requests.request("GET", url, headers=headers)
responseJSON = json.dumps(response.json())
responseList = json.loads(responseJSON)
returnResponseList = returnResponseList + responseList
headerLink= response.headers["Link"]
returnJSON = json.dumps(returnResponseList)
return returnResponseList
def DownloadSFUsers():
url = "https://"+orgName+".com/api/v1/users"
responseJSON = GetPaginatedResponse(url)
if responseJSON != "Error":
userFile = open("Only-Okta_Users.csv", "wb")
writer = csv.writer(userFile)
writer.writerow(["login","type"]).encode('utf-8')
for user in responseJSON:
login = user[u"profile"][u"login"]
type = user[u"credentials"][u"provider"][u"type"]
row = ("+login+","+type).encode('utf-8')
writer.writerow([login,type])
if __name__ == "__main__":
DownloadSFUsers()
Wrap your statement that writes a user to the csv file in an if statement that tests for the correct type.

How do I check if a certain tag is within content and if so don't access site

So what I am trying to do is access a site based on input from the user. And if that user inputs values that return no results then the browser shouldn't open. Here is the current code I have.
import requests
from bs4 import BeautifulSoup
import webbrowser
jobsearch = input("What type of job?: ")
location = input("What is your location: ")
url = ("https://ca.indeed.com/jobs?q=" + jobsearch + "&l=" + location)
newurl = url
r = requests.get(newurl)
rcontent = r.content
prettify = BeautifulSoup(rcontent, "html.parser")
def site():
rcontent = r.content
no_result = prettify.find("div.no_results")
if no_result == True:
pass
print("nothing")
else:
website = webbrowser.open_new(newurl);
return website
site()

Scrape facebook AttributeError

I am beginner for Python,
How I can solve
AttributeError: module 'urllib' has no attribute 'Request'
As I view other post, still can't understand how solve the problem
Here the screen capture of the error
And this is the code (I refer from https://github.com/minimaxir/facebook-page-post-scraper/blob/master/get_fb_posts_fb_page.py)
import urllib.request
import json, datetime, csv, time
app_id = "xxx"
app_secret = "xxx" # DO NOT SHARE WITH ANYONE!
access_token = "xxx"
page_id = 'xxx'
def testFacebookPageData(page_id, access_token):
# construct the URL string
base = "https://graph.facebook.com/v2.4"
node = "/" + page_id +'/feed'
parameters = "/?access_token=%s" % access_token
url = base + node + parameters
# retrieve data
response = urllib.request.urlopen(url)
data = json.loads(response.read().decode('utf-8'))
print (data)
def request_until_succeed(url):
req = urllib.request.urlopen(url)
success = False
while success is False:
try:
response = urllib.urlopen(req)
if response.getcode() == 200:
success = True
except Exception as e:
print (e)
time.sleep(5)
print (url, datetime.datetime.now())
return response.read()
def getFacebookPageFeedData(page_id, access_token, num_statuses):
# construct the URL string
base = "https://graph.facebook.com"
node = "/" + page_id + "/feed"
parameters = "/?fields=message,link,created_time,type,name,id,likes.limit(1).summary(true),comments.limit(1).summary(true),shares&limit=%s&access_token=%s" % (num_statuses, access_token) # changed
url = base + node + parameters
# retrieve data
data = json.loads(request_until_succeed(url))
return data
def processFacebookPageFeedStatus(status):
# The status is now a Python dictionary, so for top-level items,
# we can simply call the key.
# Additionally, some items may not always exist,
# so must check for existence first
status_id = status['id']
status_message = '' if 'message' not in status.keys() else status['message'].encode('utf-8')
link_name = '' if 'name' not in status.keys() else status['name'].encode('utf-8')
status_type = status['type']
status_link = '' if 'link' not in status.keys() else status['link']
# Time needs special care since a) it's in UTC and
# b) it's not easy to use in statistical programs.
status_published = datetime.datetime.strptime(status['created_time'],'%Y-%m-%dT%H:%M:%S+0000')
status_published = status_published + datetime.timedelta(hours=-5) # EST
status_published = status_published.strftime('%Y-%m-%d %H:%M:%S') # best time format for spreadsheet programs
# Nested items require chaining dictionary keys.
num_likes = 0 if 'likes' not in status.keys() else status['likes']['summary']['total_count']
num_comments = 0 if 'comments' not in status.keys() else status['comments']['summary']['total_count']
num_shares = 0 if 'shares' not in status.keys() else status['shares']['count']
# return a tuple of all processed data
return (status_id, status_message, link_name, status_type, status_link,
status_published, num_likes, num_comments, num_shares)
def scrapeFacebookPageFeedStatus(page_id, access_token):
with open('%s_facebook_statuses.csv' % page_id, 'w') as file:
w = csv.writer(file)
w.writerow(["status_id", "status_message", "link_name", "status_type", "status_link",
"status_published", "num_likes", "num_comments", "num_shares"])
has_next_page = True
num_processed = 0 # keep a count on how many we've processed
scrape_starttime = datetime.datetime.now()
print (page_id, scrape_starttime)
statuses = getFacebookPageFeedData(page_id, access_token, 100)
while has_next_page:
for status in statuses['data']:
w.writerow(processFacebookPageFeedStatus(status))
# output progress occasionally to make sure code is not stalling
num_processed += 1
if num_processed % 1000 == 0:
print (num_processed, datetime.datetime.now())
# if there is no next page, we're done.
if 'paging' in statuses.keys():
statuses = json.loads(request_until_succeed(statuses['paging']['next']))
else:
has_next_page = False
print (num_processed, datetime.datetime.now() - scrape_starttime)
if __name__ == '__main__':
scrapeFacebookPageFeedStatus(page_id, access_token)
There is no urllib.Request() in Python 3 - there is urllib.request.Request().
EDIT: you have url = urllib.Request(url) in error message but I don't see this line in your code - maybe you run wrong file.

Cannot update attribute in xml file

i want to ask something about update an attribute in xml file. i've created the codes but there's no errors. it's already read the parameter data, but its not post the updated value data.
my code :-
def user_submit(request):
baseurl = request.build_absolute_uri()
parsed = urlparse.urlparse(baseurl)
params = urlparse.parse_qs(parsed.query)
param = params.get('Users', [''])[0]
if request.method == 'POST':
form = UsersForm(request.POST)
passw = form.data['password']
real = form.data['realname']
# role = form.data['rolename']
files = os.path.join(settings.WSS, 'users.xml')
doc = minidom.parse(files)
items = doc.getElementsByTagName("UserRepository")
for items2 in items:
for items3 in items2.getElementsByTagName("User"):
username = items3.getAttribute('username')
# items3 = []
if username == param:
username = items3.getAttribute('username')
password = items3.getAttribute('password')
realname = items3.getAttribute('realname')
items3.attributes['password'].value = passw
items3.attributes['realname'].value = real
# for items4 in items3.getElementsByTagName("Role"):
# results2.append({
# items4.attributes['rolename'].value = role })
xml_file = open(files, "w")
doc.writexml(xml_file, encoding="utf-8")
xml_file.close()
return HttpResponseRedirect(reverse( 'users_list'))
param value:-
parameter's value
anyone can help me to solve this problem. 

More simple way to login into multiple site/pages

I am not quite happy the way i coded this. Is there a more simple and convenient way to code this in one function and return the output of multiple pages.
def login():
url = "http://192.168.2.45/pricelogin.php"
r = requests.get(url, auth=('pstats', 'pStats'))
page = r.text
return page
def loginhighpricingerror():
pricingerrorurl = "http://192.168.2.45/airline_error.pl"
peu = requests.get(pricingerrorurl, auth=('pstats', 'pstats'))
peupage = peu.text
return peupage
def loginsuccessfullbookings():
sucurl = "http://192.168.2.45/airlinessucbookings.php"
suc = requests.get(sucbookingurl, auth=('pstats', 'pstats'))
sucpage = suc.text
return sucpage
Use session instead of sessionless module functions:
s = requests.Session()
s.auth=('pstats', 'pStats')
def login():
url = "http://192.168.2.45/pricelogin.php"
r = s.get(url)
page = r.text
return page
def loginhighpricingerror():
pricingerrorurl = "http://192.168.2.45/airline_error.pl"
peu = s.get(pricingerrorurl)
peupage = peu.text
return peupage
def loginsuccessfullbookings():
sucurl = "http://192.168.2.45/airlinessucbookings.php"
suc = s.get(sucbookingurl)
sucpage = suc.text
return sucpage
Of course this should be refactored, but hopefully you can see what I mean.
I would generalize the login function, passing the url as parameter:
def login(url):
try:
r = requests.get(url, auth=('pstats', 'pStats'))
except requests.exceptions.RequestException as e:
print e
return '' # but maybe you want to do something else
page = r.text
return page
And then you can run it for each url accumulating the pages in an array for example:
urls = ["http://192.168.2.45/pricelogin.php", "http://192.168.2.45/airline_error.pl", "http://192.168.2.45/airlinessucbookings.php"]
pages = [] # resulting array
for url in urls:
pages.append(login(url))
Note: I added a check on an exception for requests.get since this might fail when there is a connection problem.

Categories

Resources