I have a code to check out whether that instagram account exist or not
exist=[]
url = []
for i in cli:
r = requests.get("https://www.instagram.com/"+i+"/")
if r.apparent_encoding == 'Windows-1252':
exist.append(i)
url.append("instagram.com/"+i+"/")
exist
['duolingoenglishtest',
'duolingo',
'duolingoespanol',
'duolingofrance']
I want to do a screenshot for each instagram account, and I think have found a way to screenshot each instagram account, but I don't know how to change the screenshots name for each image.
for ss in exist:
driver.get("https://www.instagram.com/"+ss+"/")
time.sleep(5)
screenshot = driver.save_screenshot('Pictures/Insta2.png')
driver.quit()
I really appreciate the help,
Thanks!
You could use your exist entries as filenames:
screenshot = driver.save_screenshot('Pictures/' + ss + '.png')
Or setup a numbering scheme:
i = 1
for ss in exist:
driver.get("https://www.instagram.com/"+ss+"/")
time.sleep(5)
screenshot = driver.save_screenshot('Pictures/Insta' + str(i) + '.png')
i += 1
driver.quit()
Related
I want to get the values for the fields as shown in the attached picture. This is my sample code and it's not fetching the required fields any corrections are welcomed.
span_xpath = "//div[#id='se-siteDetailsPanel-panel']"
name_xpath = "//div[#id='se-siteDetailsPanel-name']" + span_xpath
site_data.append(browser.find_element_by_xpath(name_xpath).text)
# address:
adrs1_xpath = "//div[#id='se-siteDetailsPanel-firstAddress']" + span_xpath
adrs2_xpath = "//div[#id='se-siteDetailsPanel-address']" + span_xpath
address = browser.find_element_by_xpath(adrs1_xpath).text + \
browser.find_element_by_xpath(adrs2_xpath).text
site_data.append(address)
# installed:
installed_xpath = "//div[#id='se-siteDetailsPanel-installationDate']" + span_xpath
site_data.append(browser.find_element_by_xpath(installed_xpath).text)
#updated
updated_xpath = "//div[#id='se-siteDetailsPanel-lastUpdateTime']" + span_xpath
site_data.append(browser.find_element_by_xpath(updated_xpath).text)
# peak:
peak_xpath = "//div[#id='se-siteDetailsPanel-peakPower']" + span_xpath
peak = browser.find_element_by_xpath(peak_xpath).text
site_data.append(peak.split()[0])
You can try using xpath or By_id
If you cannot find the XPath then try chropath extension in chrome, you will easily find the xpath.
#itronic1990... your hint worked actually i saw that the xspan was wrong i corrected the span path and it started fetching the values –
I am trying to take a full page screenshot on Internet Explorer using Selenium.
Looking through the Options.py code from selenium/webdriver/ie I found these lines:
class Options(object):
KEY = 'se:ieOptions'
SWITCHES = 'ie.browserCommandLineSwitches'
BROWSER_ATTACH_TIMEOUT = 'browserAttachTimeout'
ELEMENT_SCROLL_BEHAVIOR = 'elementScrollBehavior'
ENSURE_CLEAN_SESSION = 'ie.ensureCleanSession'
FILE_UPLOAD_DIALOG_TIMEOUT = 'ie.fileUploadDialogTimeout'
FORCE_CREATE_PROCESS_API = 'ie.forceCreateProcessApi'
FORCE_SHELL_WINDOWS_API = 'ie.forceShellWindowsApi'
**>>FULL_PAGE_SCREENSHOT = 'ie.enableFullPageScreenshot'**
IGNORE_PROTECTED_MODE_SETTINGS = 'ignoreProtectedModeSettings'
IGNORE_ZOOM_LEVEL = 'ignoreZoomSetting'
INITIAL_BROWSER_URL = 'initialBrowserUrl'
NATIVE_EVENTS = 'nativeEvents'
PERSISTENT_HOVER = 'enablePersistentHover'
REQUIRE_WINDOW_FOCUS = 'requireWindowFocus'
USE_PER_PROCESS_PROXY = 'ie.usePerProcessProxy'
VALIDATE_COOKIE_DOCUMENT_TYPE = 'ie.validateCookieDocumentType'
...
#property
def full_page_screenshot(self):
""" Returns the options Full Page Screenshot value """
return self._options.get(self.FULL_PAGE_SCREENSHOT)
#full_page_screenshot.setter
def full_page_screenshot(self, value):
"""
Sets the options Full Page Screenshot value
:Args:
- value: boolean value
"""
self._options[self.FULL_PAGE_SCREENSHOT] = value
However, I cannot for the life of me figure out how to use these. Any help would be appreciated. Or if you may have any other tips for taking a full page screenshot on IE would be helpful.
Thank you.
I use maximize_window() to full size the browser window, then use save_screenshot() to take the screenshot. You can refer to the code sample below. It works well in IE 11:
from selenium import webdriver
import time
url = "https://www.google.com/"
driver = webdriver.Ie(executable_path='IEDriverServer.exe')
driver.maximize_window()
driver.get(url)
time.sleep(3)
driver.save_screenshot("C:\\your\\path\\filename.png")
Please note to change the path to your owns.
I am trying to create a python-selenium project which checks whether the people in my whatsapp chat list are online or offline. Basically it bruteforces one by one to check whether someone is online or not and then it saves the data in a excel file. Also it gives a green background to the people who are online..
here is my code:
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from openpyxl import Workbook
from openpyxl.styles import PatternFill
import time
workbook = Workbook()
sheet = workbook.active
browser = webdriver.Chrome(executable_path=r"F:\software\chromedriver_win32\chromedriver.exe")
browser.get('https://web.whatsapp.com/')
print("Loading..\n")
for x in range(5,0,-1):
print(x)
time.sleep(1)
#the below function checks whether the 'online' element exists or not
#I got the class name by inspecting the WhatsappWeb page
def check_exists_by_xpath():
try:
browser.find_element_by_xpath('//span[#class="O90ur _3FXB1"]')
except NoSuchElementException:
return False
return True
count = 1
#the xpath gets the name of the persons on my chatlist
for iterator in browser.find_elements_by_xpath('//div[#class="_2wP_Y"]'):
iterator.click()
cellA = "A" + str(count)
cellB = "B" + str(count)
time.sleep(2)
name = browser.find_element_by_xpath('//div[#class="_3XrHh"]/span').text
if check_exists_by_xpath() == True:
sheet[cellA] = name
sheet[cellB] = " isOnline\n"
sheet[cellA].fill = PatternFill(start_color="a4d968", end_color="a4d968", fill_type = "solid")
sheet[cellB].fill = PatternFill(start_color="a4d968", end_color="a4d968", fill_type = "solid")
if check_exists_by_xpath() == False:
sheet[cellA] = name
sheet[cellB] = " isOffline\n"
count = count + 1
workbook.save(filename="WhatsApp_Data.xlsx")
print("Complete..!")
browser.close()
But I can't understand, why the code stops after collecting data of 18 people? Also can anyone find a better technique to achieve this, other than bruteforcing..
Actually the code just clicks on the names of the people in WhatsappWeb list and if the element which display the online message (beneath the name) - exists then returns true or else false..
This question already has answers here:
Website blocking Selenium : is there a way to bypass?
(2 answers)
Closed 3 years ago.
I read a lot of posts on the topic, and also tried some of this article's advice, but I am still blocked.
https://www.scraperapi.com/blog/5-tips-for-web-scraping
IP Rotation: done I'm using a VPN and often changing IP (but not DURING the script, obviously)
Set a Real User-Agent: implemented fake-useragent with no luck
Set other request headers: tried with SeleniumWire but how to use it at the same time than 2.?
Set random intervals in between your requests: done but anyway at the present time I even cannot access the starting home page !!!
Set a referer: same as 3.
Use a headless browser: no clue
Avoid honeypot traps: same as 4.
10: irrelevant
The website I want to scrape: https://www.winamax.fr/paris-sportifs/
Without Selenium: it goes smoothly to a page with some games and their odds, and I can navigate from here
With Selenium: the page shows a "Winamax est actuellement en maintenance" message and no games and no odds
Try to execute this piece of code and you might get blocked quite quickly :
from selenium import webdriver
import time
from time import sleep
import json
driver = webdriver.Chrome(executable_path="chromedriver")
driver.get("https://www.winamax.fr/paris-sportifs/") #I'm even blocked here now !!!
toto = driver.page_source.splitlines()
titi = {}
matchez = []
matchez_detail = []
resultat_1 = {}
resultat_2 = {}
taratata = 1
comptine = 1
for tut in toto:
if tut[0:53] == "<script type=\"text/javascript\">var PRELOADED_STATE = ": titi = json.loads(tut[53:tut.find(";var BETTING_CONFIGURATION = ")])
for p_id in titi.items():
if p_id[0] == "sports":
for fufu in p_id:
if isinstance(fufu, dict):
for tyty in fufu.items():
resultat_1[tyty[0]] = tyty[1]["categories"]
for p_id in titi.items():
if p_id[0] == "categories":
for fufu in p_id:
if isinstance(fufu, dict):
for tyty in fufu.items():
resultat_2[tyty[0]] = tyty[1]["tournaments"]
for p_id in resultat_1.items():
for tgtg in p_id[1]:
for p_id2 in resultat_2.items():
if str(tgtg) == p_id2[0]:
for p_id3 in p_id2[1]:
matchez.append("https://www.winamax.fr/paris-sportifs/sports/"+str(p_id[0])+"/"+str(tgtg)+"/"+str(p_id3))
for alisson in matchez:
print("compet " + str(taratata) + "/" + str(len(matchez)) + " : " + alisson)
taratata = taratata + 1
driver.get(alisson)
sleep(1)
elements = driver.find_elements_by_xpath("//*[#id='app-inner']/div/div[1]/span/div/div[2]/div/section/div/div/div[1]/div/div/div/div/a")
for elm in elements:
matchez_detail.append(elm.get_attribute("href"))
for mat in matchez_detail:
print("match " + str(comptine) + "/" + str(len(matchez_detail)) + " : " + mat)
comptine = comptine + 1
driver.get(mat)
sleep(1)
elements = driver.find_elements_by_xpath("//*[#id='app-inner']//button/div/span")
for elm in elements:
elm.click()
sleep(1) # and after my specific code to scrape what I want
I recommend using requests , I don’t see a reason to use selenium since you said requests works, and requests can work with pretty much any site as long as you are using appropriate headers, you can see the headers needed by looking at the developer console in chrome or Firefox and looking at the request headers.
I'm writing a crawler using Selenium, Python and PhantomJS to use Google's reverse image search. So far I've successfully been able to upload an image and crawl the search results on the first page. However, when I try to click on the search results navigation, I'm getting a StaleElementReferenceError. I have read about it in many posts but still I could not implement the solution. Here is the code that breaks:
ele7 = browser.find_element_by_id("nav")
ele5 = ele7.find_elements_by_class_name("fl")
count = 0
for elem in ele5:
if count <= 2:
print str(elem.get_attribute("href"))
elem.click()
browser.implicitly_wait(20)
ele6 = browser.find_elements_by_class_name("rc")
for result in ele6:
f = result.find_elements_by_class_name("r")
for line in f:
link = line.find_elements_by_tag_name("a")[0].get_attribute("href")
links.append(link)
parsed_uri = urlparse(link)
domains.append('{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri))
count += 1
The code breaks at print str(elem.get_attribute("href")) . How can I solve this?
Thanks in advance.
Clicking a link will cause the browser to go to another page; make references to the elements in old page (ele5, elem) invalid.
Modify the code not to reference invalid elements.
For example, you can get urls before you visit other pages:
ele7 = browser.find_element_by_id("nav")
ele5 = ele7.find_elements_by_class_name("fl")
urls = [elem.get_attribute('href') for elem in ele5] # <-----
browser.implicitly_wait(20)
for url in urls[:2]: # <------
print url
browser.get(url) # <------ used `browser.get` instead of `click`.
# ; using `element.click` will cause the error.
ele6 = browser.find_elements_by_class_name("rc")
for result in ele6:
f = result.find_elements_by_class_name("r")
for line in f:
link = line.find_elements_by_tag_name("a")[0].get_attribute("href")
links.append(link)
parsed_uri = urlparse(link)
domains.append('{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri))