svg tag scraping from funnels - python

I am trying to scrape data from here but getting error.
I have taken code from here Scraping using Selenium and python
This code was working perfectly fine but now I am getting error
wait.until(EC.visibility_of_element_located((By.LINK_TEXT, "All Boards")))
raise TimeoutException(message, screen, stacktrace)

After clicking on pe-funnel link , you can try with this code :
wait.until(EC.visibility_of_element_located((By.XPATH, "//*[name()='text' and #text-anchor='end']")))
all_data = driver.find_elements_by_xpath("//*[name()='text' and #text-anchor='end']")
print(len(all_data))
for data in all_data:
print(data.text)
UPDATE1 :
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.select import Select
import time
driver = webdriver.Chrome(executable_path = r'D:/Automation/chromedriver.exe')
driver.maximize_window()
driver.get("https://eu1.dashboard.clevertap.com/login.html")
wait = WebDriverWait(driver, 20)
action = ActionChains(driver)
driver.switch_to.default_content()
wait.until(EC.element_to_be_clickable((By.NAME, "email"))).send_keys("abhishe***")
wait.until(EC.element_to_be_clickable((By.NAME,"password"))).send_keys("***")
wait.until(EC.element_to_be_clickable((By.ID,"submitBtn"))).click()
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div.ct-breadcrumb')))
driver.switch_to.default_content()
action.move_to_element(driver.find_element_by_css_selector("div.sidebar__brand+ul>li:first-child>a")).perform()
wait.until(EC.visibility_of_element_located((By.LINK_TEXT, "All Boards")))
wait.until(EC.element_to_be_clickable((By.LINK_TEXT,"All Boards"))).click()
wait.until(EC.element_to_be_clickable((By.LINK_TEXT,"pe-funnel"))).click()
time.sleep(1)
driver.execute_script("window.scrollTo(0,100)")
wait.until(EC.presence_of_all_elements_located((By.XPATH,"//*[name()='svg' and #class='highcharts-root']")))
all_charts = driver.find_elements_by_xpath("//*[name()='svg' and #class='highcharts-root']")
length_of_list = len(all_charts)
print(length_of_list)
i=0
while(i<len(all_charts)):
wait.until(EC.presence_of_all_elements_located((By.XPATH,"//*[name()='svg' and #class='highcharts-root']")))
all_charts = driver.find_elements_by_xpath("//*[name()='svg' and #class='highcharts-root']")
all_charts[i].click()
i=i+1
try:
print("Switch to frame")
wait.until(EC.frame_to_be_available_and_switch_to_it((By.ID,"wiz-iframe-intent")))
print("Switched to frame")
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='OK' and #class='annoucement-popup__btn']"))).click()
driver.switch_to.default_content()
print("Clicked on Ok button")
except:
print("in catch block")
pass
print("last of CATCH BLOCK")
driver.execute_script("window.scrollTo(0,1100)")
ActionChains(driver).move_to_element(driver.find_element_by_css_selector("input[data-introp='View your analysis']")).click().perform()
#wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,""))).click()
#ActionChains(driver).move_to_element(driver.find_element_by_css_selector("label[for='funnelProgressionPercent']")).send_keys(Keys.END).perform()
wait.until(EC.presence_of_all_elements_located((By.XPATH,"//*[name()='tspan' and #class='highcharts-text-outline']")))
all_values = driver.find_elements_by_xpath("//*[name()='tspan' and #class='highcharts-text-outline']")
for values in all_values:
print(values.text)
driver.execute_script("window.history.go(-1)")
driver.refresh()

Related

how to click on link one by one which are saved in list selenium python?

I want to click on link and get back to main screen. After that click on second link and get back to main page and so on
When it goes to first link wait until i extract the title, phone number, location and name of the phone
my code is below
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Chrome(executable_path=r"C:\Users\Faheem\Downloads\New folder
(22)\chromedriver.exe")
driver.get(r"https://www.olx.com.pk/mobile-phones_c1453")
driver.find_element(By.XPATH, "//span[contains(text(),'Login')]").click()
time.sleep(2)
driver.find_element(By.XPATH, '//div[#class="_1075545d _21b291bd _42f36e3b d059c029
_858a64cf"]/button[4]').click()
time.sleep(3)
driver.find_element(By.ID, "phone").send_keys("3119407012")
time.sleep(2)
driver.find_element(By.XPATH, '//form[#class="a755fcd9"]/button/span').click()
time.sleep(2)
driver.find_element(By.ID, "password").send_keys("musliminstitute1")
time.sleep(2)
driver.find_element(By.XPATH, '//form[#class="a755fcd9"]/button').click()
time.sleep(2)
a = [elem.get_attribute('href') for elem in driver.find_elements(By.XPATH, '//div[#class =
"_1075545d _96d4439a d059c029 _858a64cf"]/ul/li/article/div[2]/a')]
b = driver.find_elements(By.XPATH, '//div[#class = "_1075545d _96d4439a d059c029
_858a64cf"]/ul/li/article/div[2]/a')
print(type(b))
for i in range(0,len(b)):
b = driver.find_elements(By.XPATH,
'//div[#class = "_1075545d _96d4439a d059c029
_858a64cf"]/ul/li/article/div[2]/a')
b[i].click()
time.sleep(3)
driver.back()
i am getting the following error
selenium.common.exceptions.ElementNotInteractableException: Message: element not
interactable: element has zero size

ElementClickInterceptedException Error won't fix

Ok this is a bit embarrassing because I've asked a similar question on here sometime ago, but I tried the suggested solution ie (wait till element clickable), but it didn't work. So here's my code snipped.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import ElementClickInterceptedException, ElementNotInteractableException, TimeoutException, WebDriverException, NoSuchElementException
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from time import sleep
import re
import pandas as pd
def get_links(link):
driver = webdriver.Firefox()
driver.get(link)
driver.implicitly_wait(50)
sleep(5)
_flag = True
knt = 0
while _flag:
try:
WebDriverWait(driver, 50).until(EC.invisibility_of_element((By.XPATH, "//a[contains(class='ot-buttons-fw')]")))
WebDriverWait(driver, 50).until(EC.element_to_be_clickable((By.XPATH, "//a[#class='event__more event__more--static']")))
driver.find_element_by_xpath("//*[contains(text(), 'Show more matches')]").click()
print("works here!")
print("clicked....................................")
sleep(5)
_flag = True
#tmp = driver.find_elements_by_xpath("//span[contains(text(), 'NBA - Pre-season')]")
#if len(tmp) > 0:
#print("Found them!")
#_flag = False
if knt > 5: # For testing
print("Nuff clicked")
_flag = False
except(ElementNotInteractableException):
print("Error!")
_flag = False
driver.close()
return None
link = "https://www.flashscore.com/basketball/usa/nba/results/"
_ = get_links(link)
For some reason I keep getting an ElementClickInterceptedException Error at the driver.find_element_by_xpath("//*[contains(text(), 'Show more matches')]").click() line. Any help can do please
Your element overlap with other element, it cause the ElementClickInterceptedException error appear.
Before perform you code, please close the cookies popup with this code snippet:
def get_links(link):
driver = webdriver.Firefox()
driver.get(link)
driver.implicitly_wait(50)
sleep(5)
#here, close popup
if(len(driver.find_elements_by_id('onetrust-accept-btn-handler'))>0):
driver.find_element_by_id('onetrust-accept-btn-handler').click()
_flag = True
knt = 0
while _flag:
....
....
And remove this line:
WebDriverWait(driver, 50).until(EC.invisibility_of_element((By.XPATH, "//a[contains(class='ot-buttons-fw')]")))
This is invalid xpath expression, and no needed, have handled by if(popup accept) condition the above.

Selenium For Loop Stuck on a popup message

I have an application that is almost working as intended. The problem arises after it runs through the loop on the 5th instance. The search states there are two results which results in the same end result. When this occurs I'd like to select the first of the two.
The popup messages looks like the following:
I'm using the following code to create the list and then loop:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#service = Service('C:\Program Files\Chrome Driver\chromedriver.exe')
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img+a")))]
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.propText")))]
dfObj = pd.DataFrame(category)
dfObj.columns =['Category']
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj['Count'] = dfObj['Count'].str.rstrip(')')
dfObj['IsNumber'] = dfObj['Count'].str.isnumeric()
dfObj = dfObj[(dfObj['IsNumber'] == True)]
searchcat = dfObj['New Category'].tolist()
print(searchcat)
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(8)
driver.quit()
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
time.sleep(4)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
time.sleep(4)
filename = search[0:30]+'table.csv'
pd.read_html(driver.page_source)[1].iloc[:,:-1].to_csv(filename,index=False)
time.sleep(4)
driver.quit()
The loop will continue to run if I manually click each search result. However, I would like for selenium to always select the first option. How would I go about this?
Updated Code:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
import time
with webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe') as driver:
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img+a")))]
dfObj = pd.DataFrame(category)
dfObj.columns =["Category"]
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj["Count"] = dfObj["Count"].str.rstrip(')')
dfObj["IsNumber"] = dfObj["Count"].str.isnumeric()
dfObj = dfObj[(dfObj["IsNumber"] == True)]
searchcat = dfObj["New Category"].tolist()
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(3)
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.visibility_of_element_located((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
filename = search[0:30]+'table.csv'
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(2)")))]
classname = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(1)")))]
classtype = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(3)")))]
df = pd.read_html(driver.page_source)[1].iloc[:,:-1]
df["ClassID"] = pd.Series(classid)
df["ClassName"] = pd.Series(classname)
df["ClassType"] = pd.Series(classtype)
df.to_csv(filename,index=False)
time.sleep(4)
driver.quit()
First of, I will suggest that you use the with context manager. It will handle opening/closing the driver (Chrome) by itself. This ensure if any exception is raised that it will still be closed.
To do so, use:
with webdriver.Chrome() as driver:
...
In your code I see you close/open a new browser for each URL. This is not needed and not doing so will speed up your script. Just use driver.get() to change the URL.
For your main issue, just add a portion of code that will detect the modal and chose the first option. Something along those lines
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.element_to_be_clickable((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
You must include the following imports:
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

Instagram Selenium Web scraping - #Followers

I am scraping an instagramm page where I need to get the user's:
number of posts
Number of followers
I managed to login on instagram then search for the user(in this example 'leonardodicaprio') then go to his page. I am not able to select the text though.
Can someone help please?
Thanks!
# -*- coding: utf-8 -*-
import scrapy
from scrapy_splash import SplashRequest
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import logging
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import time
class InstatestSpider(scrapy.Spider):
name = 'instatest'
allowed_domains = ['www.instagram.com']
start_urls = ['https://www.instagram.com/accounts/login']
def __init__(self):
chrome_option = Options()
#chrome_option.add_argument("--headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options = chrome_option)
driver.set_window_size(1920, 1080)
driver.get("https://www.instagram.com/accounts/login")
logging.info('Website opened...')
# username = driver.find_element_by_name("username")
# username = driver.find_element(By.XPATH, '//input[#name="username"]')
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="username"]')))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="password"]')))
username.clear()
username.send_keys("username")
logging.info('Typing Username...')
password.clear()
password.send_keys("password")
logging.info('Typing Password...')
Login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[#type="submit"]'))).click()
alert_1 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
logging.info('Do NOT save password...')
alert_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click() #search for a text="Not Now"
logging.info('Do NOT turn notifications on...')
logging.info('Logging Successful...')
influencer = "leonardodicaprio"
driver.get("https://www.instagram.com/" + influencer + "/")
time.sleep(5)
driver.save_screenshot('Influencer_Home_Page.png')
P.S: For the number of followers I want to get the exact number to the nearest digit as found in the title attribute in the selector. Please see picture below:
insta
Getting this error when running:
error in jupyterlab
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
error
Updated code:
# -*- coding: utf-8 -*-
import scrapy
from scrapy_splash import SplashRequest
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import logging
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import time
class InstatestSpider(scrapy.Spider):
name = 'instatest'
allowed_domains = ['www.instagram.com']
start_urls = ['https://www.instagram.com/accounts/login']
def __init__(self):
chrome_option = Options()
#chrome_option.add_argument("--headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options = chrome_option)
driver.set_window_size(1920, 1080)
driver.get("https://www.instagram.com/accounts/login")
logging.info('Website opened...')
# username = driver.find_element_by_name("username")
# username = driver.find_element(By.XPATH, '//input[#name="username"]')
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="username"]')))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="password"]')))
username.clear()
username.send_keys("username")
logging.info('Typing Username...')
password.clear()
password.send_keys("password")
logging.info('Typing Password...')
Login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[#type="submit"]'))).click()
alert_1 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
logging.info('Do NOT save password...')
alert_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click() #search for a text="Not Now"
logging.info('Do NOT turn notifications on...')
logging.info('Logging Successful...')
influencer = "leonardodicaprio"
driver.get("https://www.instagram.com/" + influencer + "/")
time.sleep(5)
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
driver.save_screenshot('Influencer_Home_Page.png')
You can use the below CSS_SELECTOR, to get number of posts, and Number of followers. to get title, you can use .get_attribute()
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Getting error while navigating to next pages and scraping all the data using selenium from the website?

Hi Here I am trying to scrape all the teacher jobs from https://www.naukri.com/ this url I want all the pages data but I am getting only one page data and getting this error
Traceback (most recent call last):
File "naukri.py", line 48, in <module>
driver.execute_script("arguments.click();", next_page)
File "/home/nyros/Documents/mypython/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
'args': converted_args})['value']
File "/home/nyros/Documents/mypython/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/home/nyros/Documents/mypython/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.JavascriptException: Message: javascript error: arguments.click is not a function
(Session info: chrome=80.0.3987.116)
The code which I wrote is:
import selenium.webdriver
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
url ='https://www.naukri.com/'
driver = webdriver.Chrome(r"mypython/bin/chromedriver_linux64/chromedriver")
driver.get(url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#qsbClick > span.blueBtn'))).click()
driver.find_element_by_xpath('//*[#id="skill"]/div[1]/div[2]/input').send_keys("teacher")
driver.find_element_by_xpath('//*[#id="qsbFormBtn"]').click()
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "srp_container.fl")))
result = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "row")))
for r in result:
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "srp_container.fl")))
result = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "row")))
for r in result:
try:
title=r.find_element_by_class_name("desig").text
print('title:',title)
school=r.find_element_by_class_name("org").text
print('school:',school)
location=r.find_element_by_class_name("loc").text
print("location:",location)
salary=r.find_element_by_class_name("salary").text
print("salary:",salary)
except:
pass
print('-------')
next_page = r.find_elements_by_xpath("/html/body/div[5]/div/div[3]/div[1]/div[59]/a/button")
driver.execute_script("arguments.click();", next_page)
Please help me anyone Thanks in advance!
Since the element index of the 'next' button changes from 59 in the first page to 60 in the next pages, you can just find all elements on the page which have class "grayBtn", and click on index [-1] of the list returned, as this will always provide the next button. I removed some unnecessary parts of your code too, like repeated importations as well as unnecessary button clicks. I instantly directed to the page containing the list of results for teachers, instead of entering "teacher" into the search field on the home page. I was left with the following:
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
Category = input("Category?")
Category = re.sub(" ", "%20", Category)
Type = re.sub(" ", "-", Category.lower())
url ='https://www.naukri.com/' + Type + '-jobs?k=' + Category
driver = webdriver.Chrome(r"mypython/bin/chromedriver_linux64/chromedriver")
driver.get(url)
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "srp_container.fl")))
result = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "row")))
for res in result:
data = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "srp_container.fl")))
jobs = WebDriverWait(data, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "row")))
for job in jobs:
try:
title=job.find_element_by_class_name("desig").text
print('title:',title)
school=job.find_element_by_class_name("org").text
print('school:',school)
location=job.find_element_by_class_name("loc").text
print("location:",location)
salary=job.find_element_by_class_name("salary").text
print("salary:",salary)
except:
pass
print('-------')
Button = driver.find_elements_by_class_name("grayBtn")[-1]
time.sleep(1)
driver.execute_script("window.scrollTo(0,document.body.scrollHeight - 1300)")
Button.click()
As requested, here is the modified code to append data to a pandas dataframe and convert the dataframe to excel:
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
import pandas as pd
df = pd.DataFrame(columns = ['Title', 'School', 'Location', 'Salary'])
Category = input("Category?")
Category = re.sub(" ", "%20", Category)
Type = re.sub(" ", "-", Category.lower())
url ='https://www.naukri.com/' + Type + '-jobs?k=' + Category
driver = webdriver.Chrome(r"mypython/bin/chromedriver_linux64/chromedriver")
driver.get(url)
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "srp_container.fl")))
result = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "row")))
i = 0
for res in result:
data = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "srp_container.fl")))
jobs = WebDriverWait(data, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "row")))
for job in jobs:
try:
title=job.find_element_by_class_name("desig").text
print('title:',title)
school=job.find_element_by_class_name("org").text
print('school:',school)
location=job.find_element_by_class_name("loc").text
print("location:",location)
salary=job.find_element_by_class_name("salary").text
print("salary:",salary)
df.loc[i] = [title, school, location, salary]
i += 1
except:
pass
print('-------')
Button = driver.find_elements_by_class_name("grayBtn")[-1]
time.sleep(1)
driver.execute_script("window.scrollTo(0,document.body.scrollHeight - 1300)")
Button.click()
df.to_excel("all_results.xlsx")

Categories

Resources