Python / Selenium : paste faster in browser - python

I have written a script in python to extract and paste 400-500 lines of text from one browser to another. I am using send_keys() to put the text content into the text area. It is writing line by line (2 lines / second) which is resulting in a few minutes to complete the operation. Is there any other method in Selenium to write faster (like how we paste manually in 1 second)?
My code
<code>
import time
import re
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementNotVisibleException
from selenium.webdriver.common.keys import Keys
def init_driver(uname,pwd):
driver = webdriver.Chrome()
driver.wait = WebDriverWait(driver, 5)
driver.get("https://ops.stg1.xxxxxyyyxxxx.com/login.jsp")
box = driver.wait.until(EC.presence_of_element_located((By.NAME, "j_username")))
box.send_keys(uname)
box = driver.wait.until(EC.presence_of_element_located((By.NAME, "j_password")))
box.send_keys(pwd)
button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "login")))
button.click()
return driver
def copy():
with open("Tag_input.txt") as f:
for line in f:
url = line.strip()
driver.get(url)
k=re.findall('\=(\d+)',url)
print(k[0])
a=k[0]
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
PC = driver.find_elements_by_xpath("//textarea[#name='messagingMap.PRIMARY_CONTENT.message']")
PC.send_keys(Keys.CONTROL, "a")
PC.send_keys(Keys.CONTROL, "c")
print("Copied Primary content !!")
for tag in PC:
varPC = tag.text
url1 = "http://jona.ca/blog/unclosed-tag-finder"
driver.get(url1)
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
text_area = driver.find_element_by_id("unclosed-tag-finder-input")
text_area.send_keys(Keys.CONTROL, "v")
button = driver.find_element_by_xpath("//input[#value='Submit']")
button.click()
result = driver.find_element_by_xpath("//pre[#id='unclosed-tag-finder-results']")
res_list = list(result)
print(res_list)
op = result.text
print(op)
writer = csv.writer(open('Tag_OP.csv','a+'))
z = zip(k,result)
print(z)
writer.writerows(k)
writer.writerows(result)
k = k.pop()
print("List cleared",k[0])
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
return driver
if __name__ == "__main__":
driver = init_driver("abdul.salam#xxxyyxxx.com","xxyyxx")
copy()
time.sleep(25)
driver.quit()
</code>

You might try using Ctrl+A to select the text, Ctrl+C to copy it, move to new browser Ctrl+A to select all text in your target field (so that you'll replace it), Ctrl+V to paste. I could imagine that it may be faster, but I haven't done any benchmarking myself.
This question popped right up when I did a search. It has more details, but, for instance, your paste would look like this:
driver.find_element_by_id("unclosed-tag-finder-input").sendKeys(Keys.chord(Keys.CONTROL,"v"));

Related

Webscraping Multiple Pages in Python with Selenium - loop not working

I'm quite new to python and have written a script using selenium to scrape a website. I've tried everything but can't get the loop to cycle through pages. It currently just repeats the data on the first page 5 times. I want to scrape all the pages for 'BR1' any help would be great, currently the script below only scrapes the first page 5 times.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
with open('rightmove.csv', 'w') as file:
file.write('PropertyCardcontent \n')
PATH = ("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(PATH)
driver.get("https://www.rightmove.co.uk/house-prices.html")
print(driver.title)
elem = driver.find_element(By.NAME, 'searchLocation') # Find the search box
elem.send_keys('BR1' + Keys.RETURN)
try:
content = WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.ID,'content'))
)
finally:
time.sleep(3)
for p in range(5):
sold = content.find_elements(By.CLASS_NAME, 'sold-prices-content-wrapper ')
for solds in sold:
address = solds.find_elements(By.CLASS_NAME, 'sold-prices-content ')
for addresses in address:
result = addresses.find_elements(By.CLASS_NAME, 'results ')
for results in result:
card = results.find_elements(By.CLASS_NAME,'propertyCard')
for propertyCard in card:
header = propertyCard.find_elements(By.CLASS_NAME,'propertyCard-content')
for propertyCardcontent in header:
road = propertyCardcontent.find_elements(By.CLASS_NAME,'title')
for propertyCardcontent in header:
road = propertyCardcontent.find_elements(By.CLASS_NAME,'subTitle')
for subtitle in road:
bed = subtitle.find_elements(By.CLASS_NAME, 'propertyType')
with open('rightmove.csv', 'a') as file:
for i in range(len(result)):
file.write(header[i].text + '\n')
button = driver.find_element(By.XPATH, '//*[#id="content"]/div[2]/div[2]/div[4]/div[27]/div[3]/div')
button.click()
file.close()
time.sleep(3)
driver.quit()
Since the website link has page number on it, I recommend you put the base url as "https://www.rightmove.co.uk/house-prices/br1.html?page=1", and loop through the pages while changing the last index of the url with methods like format string.
One other thing, you don't need to implement all those for loops, you can simply assign each variable to its specific value since everything you need is inside an html block which is easy to navigate on it.
Update:
I'm sorry for being late, had unexpected stuff(...).
I've made some changes as I use Brave, so make sure you select your browser, Chrome I believe, the chromedriver(ver:102) stays the same (or depending your Chrome version).
I've also got the Price and Date and stored them in a tuple.
Every record is stored in a list[Title, propertyType, tupleof(Price_Date)]
At the end, it creates a csv and stores everything inside with a ";" as delimter.
You can if you prefer split the price and date for later use, up to you.
Note: This looping method only applies to websites in which the number of page is included within the URL. In this case, both the key and number of page is included in the URL.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import time
import random
import itertools
options = Options()
options.binary_location = r'C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe'
driver = webdriver.Chrome(options = options, service = Service("chromedriver.exe"))
key_word = "BR1".lower()
base_url = f"https://www.rightmove.co.uk/house-prices/{key_word}.html?page=1"
driver.get(base_url)
#Number of pages
pages = driver.find_element(By.XPATH, '//span[#class="pagination-label"][2]').text
pages = int(pages.strip('of'))
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.CLASS_NAME, 'results '))
)
data = []
pc = 0
for p in range(1,pages+1):
driver.get(f"https://www.rightmove.co.uk/house-prices/{key_word}.html?page={p}")
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//div//div[#class="propertyCard"]'))
)
propertyCards = driver.find_elements(By.XPATH, '//div//div[#class="propertyCard"]')
for propertyCard in propertyCards:
title = propertyCard.find_element(By.CLASS_NAME, 'title').text
propertyType = propertyCard.find_element(By.CLASS_NAME, 'propertyType').text
price_list = propertyCard.find_elements(By.CLASS_NAME, 'price')
date_list = propertyCard.find_elements(By.CLASS_NAME, 'date-sold')
data.append([title,propertyType])
for p, d in itertools.zip_longest(price_list, date_list , fillvalue = None):
try:
price = p.text
date = d.text
data[pc].append((price, date))
except Exception as e:
print(e)
pc+=1
time.sleep(random.randint(1,4))
print(data)
with open('rightmove.csv', 'w') as file:
header = "Title;propertyType;Price_Date\n"
file.write(header)
for record in data:
file.write("{};{};{}\n".format(record[0],record[1],record[2:]))
driver.quit()
You don't have to go down to dom elem by elem, you can just use xpath or class_name (if it's unique, otherwise it's better xpath or css-selector) and get the item you are looking for.
Anyway follow this:
import time
import selenium.webdriver as webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get("https://www.rightmove.co.uk/house-prices.html")
# send query
query = "BR1"
search_bar = driver.find_element(By.XPATH, '//input[#class="searchBox ac_input"]')
search_bar.send_keys(query)
search_bar.send_keys(Keys.ENTER)
# wait to result been loaded
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, 'propertyCard'))
)
#get amount of pages
pages = driver.find_element(By.XPATH, '//span[#class="pagination-label"][2]').text
pages = int(pages.replace('of ', ''))
data = []
i = 1
while i <= pages:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//div[contains(text(), "Next")]'))
).click()
# wait page load result
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//div//div[#class="propertyCard"]'))
)
propertyCards = driver.find_elements(By.XPATH, '//div//div[#class="propertyCard"]')
# loop over result and store data
for propertyCard in propertyCards:
title = propertyCard.find_element(By.CLASS_NAME, 'title').text
propertyType = propertyCard.find_element(By.CLASS_NAME, 'propertyType').text
data.append((title, propertyType))
time.sleep(1)
i += 1
print("you reach the last page")
#get number of results
printf(data)
driver.close()
I use a list of tuple cause in your example you want store 2 item, if you want store more data you can use a dict and then convert into csv with Dictwriter directly. Enjoy.

How to get all comments in 9gag using selenium?

I'm working on scraping the memes and all their comments from 9gag.
I used this code below but I am only getting few extra comments.
actions = ActionChains(driver)
link = driver.find_element(By.XPATH, "//button[#class='comment-list__load-more']")
actions.move_to_element(link).click(on_element=link).perform()
I would also like to access the subcomments under a comment by simulating click on view more replies.
From the html I found this XPATH element = driver.find_element(By.XPATH, "//div[#class='vue-recycle-scroller ready page-mode direction-vertical']")holds the comments section but I'm not sure how to iterate through each comment in this element and simulate these clicks.
This code should work directly provided the necessary libraries are present in case you wanna test it.
Please help me with these following tasks:
Getting all the comments from view all comments
Iterating through each comment section and clicking on view more replies to get all the subcomments
My Code
import time
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import undetected_chromedriver as uc
if __name__ == '__main__':
options = Options()
# options.headless = True
options.add_argument("start-maximized") # ensure window is full-screen
driver = uc.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get("https://9gag.com/gag/a5EAv9O")
prev_h = 0
for i in range(10):
height = driver.execute_script("""
function getActualHeight() {
return Math.max(
Math.max(document.body.scrollHeight, document.documentElement.scrollHeight),
Math.max(document.body.offsetHeight, document.documentElement.offsetHeight),
Math.max(document.body.clientHeight, document.documentElement.clientHeight)
);
}
return getActualHeight();
""")
driver.execute_script(f"window.scrollTo({prev_h},{prev_h + 200})")
time.sleep(1)
prev_h += 200
if prev_h >= height:
break
time.sleep(5)
title = driver.title[:-7]
try:
upvotes_count = \
driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[0]
comments_count = \
driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[3]
upvotes_count = int(upvotes_count) if len(upvotes_count) <= 3 else int("".join(upvotes_count.split(',')))
comments_count = int(comments_count) if len(comments_count) <= 3 else int("".join(comments_count.split(',')))
date_posted = driver.find_element(By.XPATH, "//p[#class='message']")
date_posted = date_posted.text.split("·")[1].strip()
# actions = ActionChains(driver)
# link = driver.find_element(By.XPATH, "//button[#class='comment-list__load-more']")
# actions.move_to_element(link).click(on_element=link).perform()
element = driver.find_element(By.XPATH,
"//div[#class='vue-recycle-scroller ready page-mode direction-vertical']")
print(element.text)
driver.quit()
except NoSuchElementException or Exception as err:
print(err)
Output
Edit:
I managed to make the code work better. It scrolls through the page until it sees all the comments. It also clicks on view more replies if there are subcomments.
But it's only able to read comments from middle to end. Maybe as the page is scrolled down, the initial comments are hidden dynamically. I do not know how to overcome this. And clicking on view more replies stops after some clicks and is throwing the error
selenium.common.exceptions.MoveTargetOutOfBoundsException: Message: move target out of bounds
Here's the updated code
import driver as driver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import undetected_chromedriver as uc
def scroll_page(scrl_hgt):
prev_h = 0
for i in range(10):
height = driver.execute_script("""
function getActualHeight() {
return Math.max(
Math.max(document.body.scrollHeight, document.documentElement.scrollHeight),
Math.max(document.body.offsetHeight, document.documentElement.offsetHeight),
Math.max(document.body.clientHeight, document.documentElement.clientHeight)
);
}
return getActualHeight();
""")
driver.execute_script(f"window.scrollTo({prev_h},{prev_h + scrl_hgt})")
time.sleep(1)
prev_h += scrl_hgt
if prev_h >= height:
break
if __name__ == '__main__':
options = Options()
# options.headless = True
driver = uc.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
driver.get("https://9gag.com/gag/a5EAv9O")
time.sleep(5)
# click on I accept cookies
actions = ActionChains(driver)
consent_button = driver.find_element(By.XPATH, '//*[#id="qc-cmp2-ui"]/div[2]/div/button[2]')
actions.move_to_element(consent_button).click().perform()
scroll_page(150)
time.sleep(2)
# click on fresh comments sectin
fresh_comments = driver.find_element(By.XPATH, '//*[#id="page"]/div[1]/section[2]/section/header/div/button[2]')
actions.move_to_element(fresh_comments).click(on_element=fresh_comments).perform()
time.sleep(5)
# getting meta data
title = driver.title[:-7]
upvotes_count = driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[0]
comments_count = driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[3]
upvotes_count = int(upvotes_count) if len(upvotes_count) <= 3 else int("".join(upvotes_count.split(',')))
comments_count = int(comments_count) if len(comments_count) <= 3 else int("".join(comments_count.split(',')))
date_posted = driver.find_element(By.XPATH, "//p[#class='message']")
date_posted = date_posted.text.split("·")[1].strip()
time.sleep(3)
# click on lood more comments button to load all the comments
load_more_comments = driver.find_element(By.XPATH, "//button[#class='comment-list__load-more']")
actions.move_to_element(load_more_comments).click(on_element=load_more_comments).perform()
scroll_page(500)
print([my_elem.text for my_elem in driver.find_elements(By.CSS_SELECTOR, "div.comment-list-item__text")])
comments = driver.find_elements(By.CSS_SELECTOR, "div.vue-recycle-scroller__item-view")
for item in comments:
html = item.get_attribute("innerHTML")
if "comment-list-item__text" in html:
print(item.find_element(By.CSS_SELECTOR, "div.comment-list-item__text").text)
elif "comment-list-item__deleted-text" in html:
print(item.find_element(By.CSS_SELECTOR, "div.comment-list-item__deleted-text").text)
# get sub comments
if "comment-list-item__replies" in html:
#item.find_element(By.CSS_SELECTOR, "div.comment-list-item__replies").click()
sub_comments = item.find_element(By.CSS_SELECTOR, "div.comment-list-item__replies")
actions.move_to_element(sub_comments).click(on_element=sub_comments).perform()
time.sleep(2)
driver.quit()
PS: My goal is to get every single comments and all their sub comments (whether they are text, image, gif, etc) in the order they appear and save them somewhere so that I should be able to recreate the comments section again.
To extract and print the comment texts you need to induce WebDriverWait for visibility_of_all_elements_located() and you can use the following Locator Strategies:
driver.get("https://9gag.com/gag/a5EAv9O")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.comment-list__load-more"))).click()
print([my_elem.text for my_elem in driver.find_elements(By.CSS_SELECTOR, "div.comment-list-item__text")])
Console Output:
['Man, the battle of the cults is getting interesting now.', 'rent free in your head', 'Sorry saving all my money up for the Joe Biden Depends Multipack and the Karmella knee pads.', "It's basically a cult now.", "I'll take one. I'm not even American", '', 'that eagle looks familiar.', "Who doesn't want a trump card?"]
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

How do I press load more button while scraping comments on Instagram with Selenium Python

I'm working on a project that can scrape comments off posts on instagram and write them into an excel file.
Here's my code:
from selenium.webdriver.common.by import By
from selenium import webdriver
import time
import sys
import pandas as pd
from pandas import ExcelWriter
import os.path
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
url = [
"https://www.instagram.com/p/CcVTqRtJ2gj/",
"https://www.instagram.com/p/CcXpLHepve-/",
]
user_names = []
user_comments = []
driver = driver = webdriver.Chrome("C:\chromedriver.exe")
driver.get(url[0])
time.sleep(3)
username = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"input[name='password']")))
username.clear()
username.send_keys("username")
password.clear()
password.send_keys("pwd")
Login_button = (
WebDriverWait(driver, 2)
.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']")))
.click()
)
time.sleep(4)
not_now = (
WebDriverWait(driver, 30)
.until(
EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))
)
.click()
)
for n in url:
try:
driver.get(n)
time.sleep(3)
load_more_comment = driver.find_element_by_xpath("//button[class='wpO6b ']")
print("Found {}".format(str(load_more_comment)))
i = 0
while load_more_comment.is_displayed() and i < 10:
load_more_comment.click()
time.sleep(1.5)
load_more_comment = driver.find_element_by_xpath(
"//button[class='wpO6b ']"
)
print("Found {}".format(str(load_more_comment)))
i += 1
user_names.pop(0)
user_comments.pop(0)
except Exception as e:
print(e)
pass
comment = driver.find_elements_by_class_name("gElp9 ")
for c in comment:
container = c.find_element_by_class_name("C4VMK")
name = container.find_element_by_class_name("_6lAjh ").text
content = container.find_element_by_class_name("MOdxS ").text
content = content.replace("\n", " ").strip().rstrip()
user_names.append(name)
user_comments.append(content)
print(content)
user_names.pop(0)
user_comments.pop(0)
# export(user_names, user_comments)
driver.close()
df = pd.DataFrame(list(zip(user_names, user_comments)), columns=["Name", "Comments"])
# df.to_excel("Anime Content Engagement.xlsx")
print(df)
And the load-more-comments part, doesn't seem to work.
Since there are more than one buttons with the same class name, I"m not able to choose the right button to click on. And I'm a beginner so if there's anyone with any solution to how I can solve this it would be great.
you can select by aria-label text:
driver.find_element_by_css_selector("svg._8-yf5[aria-label='TEXT']")
i believe the text inside changes according to instagram language, put it according to what appears on your

Selenium button not being clicked but is being highlighted

What I'm trying to do is making nike product auto buyer the problem is after selecting size it doesn't let me click through selenium I even tried to click manually but nothing pops up this is my code where I try to click (not full code):
from selenium import webdriver
from selenium.common.exceptions import JavascriptException
from selenium.webdriver import ChromeOptions
import re
from bs4 import BeautifulSoup
import requests
import json
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import os
user = os.environ['USERNAME']
snkrsurl = "https://www.nike.com/t/air-zoom-pegasus-38-womens-running-shoe-wide-gg8GBK/CW7358-500" #input("Please input your SNKRS url \n")
size = float(input("Please input size \n"))
options = ChromeOptions()
options.add_experimental_option('excludeSwitches',['enable-logging'])
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("detach",True)
options.add_argument("--disable-notifications")
chrome = webdriver.Chrome(options=options)
if "https://" in snkrsurl:
pass
elif "http://" in snkrsurl:
pass
else:
snkrsurl = "http://"+snkrsurl
chrome.get(snkrsurl)
with requests.Session() as session:
soup = BeautifulSoup(session.get(snkrsurl).text, features="lxml")
script = soup.find("script", string=re.compile('INITIAL_REDUX_STATE')).string
redux = json.loads(script[script.find('{'):-1])
products = redux["Threads"]["products"]
wait = WebDriverWait(chrome, 15)
def step1(i,v):
for key, product in products.items():
if float(product["skus"][i]["nikeSize"]) == v:
print("Found")
if v.is_integer():
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="gen-nav-footer"]/nav/button'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[text()='{}']".format(int(v))))).click()
chrome.execute_script("window.scroll(0,609)")
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[text()="Add to Bag"]'))).click()
break
else:
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="gen-nav-footer"]/nav/button'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[text()='{}']".format(v)))).click()
e = chrome.find_element_by_css_selector("#floating-atc-wrapper > div > button.ncss-btn-primary-dark.btn-lg.add-to-cart-btn")
chrome.execute_script("arguments[0].scrollIntoView(true);")
e.click()
break
else:
pass
for i,v in products.items():
global length
length = len(v['skus'])
break
for i in range(length):
length -=1
step1(length,size)
I use window.scroll to go to that element because if I don't it throws error saying element is not interactable and yes checkout is being only clickable from real chrome.
Thanks

How to put mulitple value in loop and result to csv python selenium

Actually i want to get the value from here. Getting product hyper link is working fine. i want to get product information, price etc.. from above link in same in for loop. How to put result data into CSV file. please help me.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
import time
chrome_path = r"C:\Users\Venkatesh\AppData\Local\Programs\Python\Python35\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get("https://www.flipkart.com/mobiles")
search = driver.find_element_by_xpath("""//*[#id="container"]/div/div[2]/div/div[2]/div/div/div[1]/section/div[3]/div/div/a""").click()
delay = 20 # seconds
try:
WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, "//*[#id='container']/div/div[2]/div[2]/div/div[2]/div/div[3]/div[1]/div/div[1]/a/div[2]/div[1]/div[1]")))
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
print("Page is ready")
except TimeoutException:
print("Loading took too much time")
time.sleep(10)
for post in driver.find_elements_by_class_name("_1UoZlX"):
print(post.get_attribute("href"))
time.sleep(2)
driver.quit()
Output:
Page is ready
https://www.flipkart.com/moto-g5-plus-fine-gold-32-gb/p/itmes2zjvwfncxxr?pid=MOBEQHMGED7F9CZ2&srno=b_1_1&otracker=browse
&lid=LSTMOBEQHMGED7F9CZ2KHTBI8
https://www.flipkart.com/moto-g5-plus-lunar-grey-32-gb/p/itmes2zjvwfncxxr?pid=MOBEQHMGMAUXS5BF&srno=b_1_2&otracker=brows
e&lid=LSTMOBEQHMGMAUXS5BFVCF0ZO
https://www.flipkart.com/moto-e3-power-black-16-gb/p/itmekgt2fbywqgcv?pid=MOBEKGT2HGDGADFW&srno=b_1_3&otracker=browse&li
d=LSTMOBEKGT2HGDGADFWP5NHBY
https://www.flipkart.com/micromax-bolt-q381-plus-coffee-16-gb/p/itmeskgycnfghsat?pid=MOBESAMDG2GNUBC5&srno=b_1_4&otracke
r=browse&lid=LSTMOBESAMDG2GNUBC5KRPH8Q
https://www.flipkart.com/lenovo-k6-power-grey-dark-grey-32-gb/p/itmezenfhm4mvptw?pid=MOBEZENFZBPW8UMF&srno=b_1_5&otracke
r=browse&lid=LSTMOBEZENFZBPW8UMF7P8NY0
https://www.flipkart.com/lenovo-k6-power-gold-32-gb/p/itmezenfhm4mvptw?pid=MOBEZEMYH7FQBGBQ&srno=b_1_6&otracker=browse&l
id=LSTMOBEZEMYH7FQBGBQRHVU0S
https://www.flipkart.com/lenovo-k6-power-silver-32-gb/p/itmezenfhm4mvptw?pid=MOBEZEMX6CZHCJVY&srno=b_1_7&otracker=browse
&lid=LSTMOBEZEMX6CZHCJVYOIBM0E
https://www.flipkart.com/lenovo-vibe-k5-note-grey-64-gb/p/itmepcfqfdx9bdxs?pid=MOBEPCFQRJ6KFYZS&srno=b_1_8&otracker=brow
se&lid=LSTMOBEPCFQRJ6KFYZSI4DRRB
https://www.flipkart.com/lenovo-vibe-k5-note-gold-64-gb/p/itmepcfqfdx9bdxs?pid=MOBEPCFQ3ZSYTRUZ&srno=b_1_9&otracker=brow
se&lid=LSTMOBEPCFQ3ZSYTRUZGFSZCU
https://www.flipkart.com/samsung-galaxy-nxt-gold-32-gb/p/itmemzd4gepexjya?pid=MOBEMZD4KHRF5VZX&srno=b_1_10&otracker=brow
se&lid=LSTMOBEMZD4KHRF5VZX7FNU5S
https://www.flipkart.com/moto-e3-power-white-16-gb/p/itmekgt23fgwdgkg?pid=MOBEKGT2SVHPAHTM&srno=b_1_11&otracker=browse&l
id=LSTMOBEKGT2SVHPAHTMJA8RQ1
https://www.flipkart.com/lenovo-k6-power-silver-32-gb/p/itmezenfghddrfmc?pid=MOBEZENFKXZ4HSCG&srno=b_1_12&otracker=brows
e&lid=LSTMOBEZENFKXZ4HSCGC1OOAM
https://www.flipkart.com/lenovo-k6-power-gold-32-gb/p/itmezenfghddrfmc?pid=MOBEZENFSZGTQGWF&srno=b_1_13&otracker=browse&
lid=LSTMOBEZENFSZGTQGWFUR1LY1
https://www.flipkart.com/lenovo-k6-power-dark-gray-32-gb/p/itmezenfghddrfmc?pid=MOBEZENFG8BPDPSU&srno=b_1_14&otracker=br
owse&lid=LSTMOBEZENFG8BPDPSUUANLO6
https://www.flipkart.com/lava-arc-blue/p/itmezgyfszhmwfzt?pid=MOBEF6D24ZT6YHFJ&srno=b_1_15&otracker=browse&lid=LSTMOBEF6
D24ZT6YHFJZ6N7XC
https://www.flipkart.com/lenovo-vibe-k5-plus-3-gb-silver-16-gb/p/itmektn3t9rg9hnn?pid=MOBEKEF8ATFZZ8GN&srno=b_1_16&otrac
ker=browse&lid=LSTMOBEKEF8ATFZZ8GNY7WZBU
https://www.flipkart.com/lenovo-vibe-k5-plus-3-gb-gold-16-gb/p/itmektn3t9rg9hnn?pid=MOBEKEF8JYGKZCTF&srno=b_1_17&otracke
r=browse&lid=LSTMOBEKEF8JYGKZCTFUTCYS4
https://www.flipkart.com/lenovo-vibe-k5-plus-3-gb-dark-grey-16-gb/p/itmektn3t9rg9hnn?pid=MOBEKEF86VVUE8G2&srno=b_1_18&ot
racker=browse&lid=LSTMOBEKEF86VVUE8G2YCW5OP
https://www.flipkart.com/samsung-galaxy-nxt-black-32-gb/p/itmemzd4byrufyu7?pid=MOBEMZD4G83T5HKZ&srno=b_1_19&otracker=bro
wse&lid=LSTMOBEMZD4G83T5HKZVMFKK6
https://www.flipkart.com/samsung-galaxy-on8-gold-16-gb/p/itmemvarkqg5dyay?pid=MOBEMJR2NDM4EAHQ&srno=b_1_20&otracker=brow
se&lid=LSTMOBEMJR2NDM4EAHQ8BMJIN
https://www.flipkart.com/samsung-galaxy-on7-black-8-gb/p/itmedhx3jgmu2gps?pid=MOBECCA5SMRSKCNY&srno=b_1_21&otracker=brow
se&lid=LSTMOBECCA5SMRSKCNYWC8DYC
https://www.flipkart.com/samsung-galaxy-on7-gold-8-gb/p/itmedhx3jgmu2gps?pid=MOBECCA5Y5HBYR3Q&srno=b_1_22&otracker=brows
e&lid=LSTMOBECCA5Y5HBYR3QPDPGLJ
https://www.flipkart.com/samsung-galaxy-on5-gold-8-gb/p/itmedhx3uy3qsfks?pid=MOBECCA5FHQD43KA&srno=b_1_23&otracker=brows
e&lid=LSTMOBECCA5FHQD43KAFXOZYB
https://www.flipkart.com/lenovo-p2-gold-32-gb/p/itmeq5ygvgq9vyfn?pid=MOBEZFHHURMWYSFN&srno=b_1_24&otracker=browse&lid=LS
TMOBEZFHHURMWYSFNBBG6L0
https://www.flipkart.com/asus-zenfone-max-black-32-gb/p/itmege3d5pjpmknc?pid=MOBEGE3DYZM3ZYWB&srno=b_1_25&otracker=brows
e&lid=LSTMOBEGE3DYZM3ZYWBPCOZHP
https://www.flipkart.com/lenovo-vibe-k5-note-grey-32-gb/p/itmejj6kmhh2khk9?pid=MOBEJJ6KYARZGWJC&srno=b_1_26&otracker=bro
wse&lid=LSTMOBEJJ6KYARZGWJCCV4LRX
https://www.flipkart.com/swipe-elite-sense-4g-volte/p/itmeh6yfycypxfdz?pid=MOBEH6YFZYZZNCZK&srno=b_1_27&otracker=browse&
lid=LSTMOBEH6YFZYZZNCZKWVY6ES
https://www.flipkart.com/swipe-elite-sense-4g-volte/p/itmeh6yfycypxfdz?pid=MOBEH6YFZRTEMDBG&srno=b_1_28&otracker=browse&
lid=LSTMOBEH6YFZRTEMDBGYJNCJI
https://www.flipkart.com/xolo-era-1x-4g-volte-black-gun-metal-8-gb/p/itmerhq8uhtehukg?pid=MOBEHMEKGCZCGMB8&srno=b_1_29&o
tracker=browse&lid=LSTMOBEHMEKGCZCGMB8DCWHIY
https://www.flipkart.com/swipe-konnect-grand-black-8-gb/p/itmeqcgxvkyfzsgj?pid=MOBEQCGXN6HTZE2C&srno=b_1_30&otracker=bro
wse&lid=LSTMOBEQCGXN6HTZE2CXUT5W1
https://www.flipkart.com/lenovo-vibe-k5-note-gold-32-gb/p/itmejj6kczvxej4g?pid=MOBEJJ6K5A3GQ9SU&srno=b_1_31&otracker=bro
wse&lid=LSTMOBEJJ6K5A3GQ9SUZERSAR
https://www.flipkart.com/lyf-water-f1-black-32-gb/p/itmezh76z9jqsa8z?pid=MOBEZH76AFWSZVNH&srno=b_1_32&otracker=browse&li
d=LSTMOBEZH76AFWSZVNHOOBURN
https://www.flipkart.com/samsung-galaxy-j5-6-new-2016-edition-black-16-gb/p/itmegmrnzqjcpfg9?pid=MOBEG4XWHJDWMQDF&srno=b
_1_33&otracker=browse&lid=LSTMOBEG4XWHJDWMQDFZIWO93
https://www.flipkart.com/samsung-galaxy-j5-6-new-2016-edition-white-16-gb/p/itmegmrnzqjcpfg9?pid=MOBEG4XWJG7F9A6Z&srno=b
_1_34&otracker=browse&lid=LSTMOBEG4XWJG7F9A6ZHJOVBG
https://www.flipkart.com/samsung-galaxy-j5-6-new-2016-edition-gold-16-gb/p/itmegmrnzqjcpfg9?pid=MOBEG4XWFTBRMMBY&srno=b_
1_35&otracker=browse&lid=LSTMOBEG4XWFTBRMMBYZPYEGS
https://www.flipkart.com/moto-m-grey-64-gb/p/itmenqavgcezzk2y?pid=MOBENQATHQTKG7AV&srno=b_1_36&otracker=browse&lid=LSTMO
BENQATHQTKG7AVGFQI4N
https://www.flipkart.com/moto-m-gold-64-gb/p/itmenqavgcezzk2y?pid=MOBENQAVANRMEGAP&srno=b_1_37&otracker=browse&lid=LSTMO
BENQAVANRMEGAPHWU47I
https://www.flipkart.com/moto-m-silver-64-gb/p/itmenqavgcezzk2y?pid=MOBENQAVFTG6FPXX&srno=b_1_38&otracker=browse&lid=LST
MOBENQAVFTG6FPXXHZBIGV
https://www.flipkart.com/apple-iphone-6-silver-16-gb/p/itme8dvfeuxxbm4r?pid=MOBEYHZ2NUZGCHKN&srno=b_1_39&otracker=browse
&lid=LSTMOBEYHZ2NUZGCHKN7PMDIN
https://www.flipkart.com/samsung-galaxy-on8-black-16-gb/p/itmemvarprh8hegn?pid=MOBEMJRFZXZBESQW&srno=b_1_40&otracker=bro
wse&lid=LSTMOBEMJRFZXZBESQWCFHWJ0
https://www.flipkart.com/panasonic-eluga-tapp-silver-grey-16-gb/p/itmezf54ey3gf8ne?pid=MOBENRHGWZWKEGGF&srno=b_1_41&otra
cker=browse&lid=LSTMOBENRHGWZWKEGGFMJELY2
https://www.flipkart.com/panasonic-eluga-tapp-champagne-gold-16-gb/p/itmezf54ey3gf8ne?pid=MOBENRHGEQEJHSZM&srno=b_1_42&o
tracker=browse&lid=LSTMOBENRHGEQEJHSZMD8R5FE
https://www.flipkart.com/apple-iphone-6s-rose-gold-32-gb/p/itmen2yymnfcrxsz?pid=MOBEN2XYK8WFEGM8&srno=b_1_43&otracker=br
owse&lid=LSTMOBEN2XYK8WFEGM8QJW5XA
https://www.flipkart.com/lenovo-p2-grey-graphite-grey-32-gb/p/itmeq5ygvgq9vyfn?pid=MOBEZFHH2JYGXSNF&srno=b_1_44&otracker
=browse&lid=LSTMOBEZFHH2JYGXSNFNWKEAD
https://www.flipkart.com/forme-n1/p/itmeff8s2hdrfhyg?pid=MOBEFF8SHZPYKCRY&srno=b_1_45&otracker=browse&lid=LSTMOBEFF8SHZP
YKCRYEKQPPR
https://www.flipkart.com/forme-n1/p/itmeff8s2hdrfhyg?pid=MOBEFF8SSZNHCUND&srno=b_1_46&otracker=browse&lid=LSTMOBEFF8SSZN
HCUNDRC6GLT
https://www.flipkart.com/samsung-galaxy-on5-black-8-gb/p/itmekszmsqgpgygy?pid=MOBECCA5BJUVUGNP&srno=b_1_47&otracker=brow
se&lid=LSTMOBECCA5BJUVUGNPRKEGMG
https://www.flipkart.com/lenovo-p2-grey-graphite-grey-32-gb/p/itmeq5ygebzgqgfb?pid=MOBEZFHHVD8KXE7G&srno=b_1_48&otracker
=browse&lid=LSTMOBEZFHHVD8KXE7GB0OS6I
https://www.flipkart.com/lenovo-p2-gold-32-gb/p/itmeq5ygebzgqgfb?pid=MOBEZFHHGE2RXQUY&srno=b_1_49&otracker=browse&lid=LS
TMOBEZFHHGE2RXQUY2XDB97
https://www.flipkart.com/samsung-galaxy-j7-gold-16-gb/p/itmeafbfjhsydbpw?pid=MOBE93GWSMGZHFSK&srno=b_1_50&otracker=brows
e&lid=LSTMOBE93GWSMGZHFSKT6OZOB
https://www.flipkart.com/samsung-z2-gold-8-gb/p/itmenkygvprd5dwt?pid=MOBENKYGHFUHT6BH&srno=b_1_51&otracker=browse&lid=LS
TMOBENKYGHFUHT6BHVSHMDE
https://www.flipkart.com/leeco-le-2-grey-32-gb/p/itmejeucxaxmnk8k?pid=MOBEJFTH4C9Z2YZR&srno=b_1_52&otracker=browse&lid=L
STMOBEJFTH4C9Z2YZRVVL0EL
https://www.flipkart.com/lyf-water-10-black-16-gb/p/itmemj7d8qfkfu4r?pid=MOBEMJ7C7YMDMVDQ&srno=b_1_53&otracker=browse&li
d=LSTMOBEMJ7C7YMDMVDQPCFALX
https://www.flipkart.com/micromax-canvas-nitro-2-grey-silver-16-gb/p/itme7nhzw56hv2ga?pid=MOBE7NHZP7GHZ7SG&srno=b_1_54&o
tracker=browse&lid=LSTMOBE7NHZP7GHZ7SGCYGNI3
https://www.flipkart.com/moto-g-turbo-white-16-gb/p/itmecc4uhbue7ve6?pid=MOBECC4UQTJ5QZFR&srno=b_1_55&otracker=browse&li
d=LSTMOBECC4UQTJ5QZFR9CAUPO
https://www.flipkart.com/moto-g-turbo-black-16-gb/p/itmecc4uhbue7ve6?pid=MOBECC4UZTSGKWWZ&srno=b_1_56&otracker=browse&li
d=LSTMOBECC4UZTSGKWWZOQKAIZ
https://www.flipkart.com/apple-iphone-6-space-grey-16-gb/p/itme8dvfeuxxbm4r?pid=MOBEYHZ2YAXZMF2J&srno=b_1_57&otracker=br
owse&lid=LSTMOBEYHZ2YAXZMF2JEVWVNC
https://www.flipkart.com/yu-yunicorn-rush-silver-32-gb/p/itmenffyjfp8ubyg?pid=MOBEJ3MFUQAF8XJS&srno=b_1_58&otracker=brow
se&lid=LSTMOBEJ3MFUQAF8XJSBPC8L4
https://www.flipkart.com/yu-yunicorn-gold-rush-32-gb/p/itmenffyjfp8ubyg?pid=MOBEJ3MF23Q9MGMH&srno=b_1_59&otracker=browse
&lid=LSTMOBEJ3MF23Q9MGMHZ49MG2
https://www.flipkart.com/micromax-canvas-nitro-2-white-gold-16-gb/p/itme7nhzw56hv2ga?pid=MOBE8TJBHGQYHNPT&srno=b_1_60&ot
racker=browse&lid=LSTMOBE8TJBHGQYHNPTVL3HS0
Used openpyxl to create a csv for each run with the filename+timestamp. Links that are fetched are written to the csv file eventually.
I couldn't find the exact links that have been given and hence I chose my own links which are similar in case. This code has different links per se, but the solution scales up to be same for your case #venkatesh
One more thing: Try to keep xpaths as relative as possible, and the classes with such gibberish as _13oc-S Would not hold good as they tend to change dynamically for each DOM refresh or each browser instance.
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import openpyxl
current_time = time.strftime('%Y%m%d%H%M%S')
xlpath = "linktracker" + current_time + ".csv"
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.flipkart.com/mobiles")
driver.maximize_window()
# Searches for a certain brand of phpnes (POCO). Inefficient way of locator finding though
search = driver.find_element(By.XPATH, "(//*[#alt='Shop Now'])[2]").click()
time.sleep(10) # bad practice, but used for now. Webdriverwait to be used instead.
each_element = "//a[#rel='noopener noreferrer']" # locates each desired element in the search page (each phone block)
posts = driver.find_elements(By.XPATH, each_element)
print(len(posts))
ls=[]
for post in range(len(posts)-1): # len-1 because the last item is a footer and not the desired link in my view
# concatanates the subscript to element xpath: e.g.: (//*[#element = 'ele'])[1] ... (//*[#element = 'ele'])[n]
each_post = driver.find_element(By.XPATH, '(' + each_element + ')' + '[' + str(post + 1) + ']')
each_link = each_post.get_attribute("href")
ls.append(each_link)
wb = openpyxl.Workbook() # creates a workbook
sheet = wb.active
c=0
# looping through the created list and writing the values to the created workbook
for i in ls:
sheet.cell(row=c+1, column=1).value = i
c+=1 # incrementing the row for each iteration of i
wb.save(xlpath) # saving the workbook with the name as given in the xlpath variable above
driver.quit()
Result in csv - image

Categories

Resources