Python Selenium .send_keys() only sending first character of my string - python

I was trying to automate a post to Facebook using Python Selenium, and it was 90% complete. The only issue is that the string I give is "test," but when Facebook posts, it just sends the first character of "test," which is "t."
This is the code:
#libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
from time import sleep
import pyautogui
#fetching hashtags
def hashtags(hash_idea):
url = 'http://best-hashtags.com/hashtag/' + hash_idea
try:
req = Request(url, headers={'User-Agent' : 'Mozilla/5.0'})
page = urlopen(req, timeout=10)
page_html = page.read()
page.close()
page_soup = soup(page_html, 'html.parser')
result = page_soup.find('div',{'class':'tag-box tag-box-v3 margin-bottom-40'})
tags = result.decode()
start_index = tags.find('#')
end_index = tags.find('</p1>')
tags = tags[start_index:end_index]
return tags
except:
print('Something went wrong While Fetching hashtags')
def login(username, password):
try:
url = 'https://facebook.com'
driver.get(url)
user = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'email')))
user.send_keys(username)
pas = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'pass')))
pas.send_keys(password)
login_btn = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME,'login')))
login_btn.click()
except:
print('Something went wrong while login process')
def upload(img_path,caption):
try:
btn1 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[3]/div/div[2]/div/div/div/div[1]/div/div[1]')))
btn1.click()
btn2= WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[1]/div[2]/div/div[1]/div/span/div/div/div[1]/div/div/div[1]/i')))
btn2.click()
btn3 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/div/div/div[1]/div/i')))
btn3.click()
pyautogui.write(img_path)
pyautogui.press('enter')
cap = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/div/div/div[1]')))
cap.send_keys(caption)
sleep(5) # this is mandatory while doing some thing with bot
btn_post = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[2]/div/div/div[1]/div')))
btn_post.click()
except:
print('Something Went Wrong While posting the image or video')
if __name__== "__main__":
#turn for credentials, driver, and caption
username = input('username : ')
password = input('pass : ')
img_path = 'pic1.jpg'
hash_idea = 'covid'
caption = 'test' # if you want to
caption = caption + '\n' + hashtags(hash_idea)
driver = webdriver.Firefox(executable_path="C:/Users/Asus/Downloads/Compressed/geckodriver-v0.32.0-win64/geckodriver.exe")
login(username,password)
upload(img_path,caption)
I wanted to automate the post with the text I provided in the code.

You can try several alternatives
In the definition of cap replace presence_of_element_located with element_to_be_clickable.
Do what in 1. and moreover add
cap = ...
cap.clear()
cap.click()
cap.send_keys(caption)
Do what in 1. and moreover use ActionChains
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
cap = ...
actions.move_to_element(cap) # move the mouse to the middle of element
actions.click()
actions.send_keys(caption).perform()
If none works, then you can always send one character at a time
[cap.send_keys(c) for c in caption]

Related

Python Selenium 'post' button of Facebook is not working

Previously I asked about the Python Selenium .send_keys() only sending first character of my string. The issue has now been resolved, but the post button is no longer functional. The post button functions properly when there is a string problem, but when that problem is fixed, the post button stops functioning properly.
This is the previous code (only sending first character of my string) :
#libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
from time import sleep
import pyautogui
#fetching hashtags
def hashtags(hash_idea):
url = 'http://best-hashtags.com/hashtag/' + hash_idea
try:
req = Request(url, headers={'User-Agent' : 'Mozilla/5.0'})
page = urlopen(req, timeout=10)
page_html = page.read()
page.close()
page_soup = soup(page_html, 'html.parser')
result = page_soup.find('div',{'class':'tag-box tag-box-v3 margin-bottom-40'})
tags = result.decode()
start_index = tags.find('#')
end_index = tags.find('</p1>')
tags = tags[start_index:end_index]
return tags
except:
print('Something went wrong While Fetching hashtags')
def login(username, password):
try:
url = 'https://facebook.com'
driver.get(url)
user = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'email')))
user.send_keys(username)
pas = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'pass')))
pas.send_keys(password)
login_btn = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME,'login')))
login_btn.click()
except:
print('Something went wrong while login process')
def upload(img_path,caption):
try:
btn1 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[3]/div/div[2]/div/div/div/div[1]/div/div[1]')))
btn1.click()
btn2= WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[1]/div[2]/div/div[1]/div/span/div/div/div[1]/div/div/div[1]/i')))
btn2.click()
btn3 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/div/div/div[1]/div/i')))
btn3.click()
pyautogui.write(img_path)
pyautogui.press('enter')
cap = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/div/div/div[1]')))
cap.send_keys(caption)
sleep(5) # this is mandatory while doing some thing with bot
btn_post = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[2]/div/div/div[1]/div')))
btn_post.click()
except:
print('Something Went Wrong While posting the image or video')
if __name__== "__main__":
#turn for credentials, driver, and caption
username = input('username : ')
password = input('pass : ')
img_path = 'pic1.jpg'
hash_idea = 'covid'
caption = 'test' # if you want to
caption = caption + '\n' + hashtags(hash_idea)
driver = webdriver.Firefox(executable_path="C:/Users/Asus/Downloads/Compressed/geckodriver-v0.32.0-win64/geckodriver.exe")
login(username,password)
upload(img_path,caption)
This is the new code (String problem fixed but 'post' button is not working) :
#libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
from time import sleep
import pyautogui
#fetching hashtags
def hashtags(hash_idea):
url = 'http://best-hashtags.com/hashtag/' + hash_idea
try:
req = Request(url, headers={'User-Agent' : 'Mozilla/5.0'})
page = urlopen(req, timeout=10)
page_html = page.read()
page.close()
page_soup = soup(page_html, 'html.parser')
result = page_soup.find('div',{'class':'tag-box tag-box-v3 margin-bottom-40'})
tags = result.decode()
start_index = tags.find('#')
end_index = tags.find('</p1>')
tags = tags[start_index:end_index]
return tags
except:
print('Something went wrong While Fetching hashtags')
def login(username, password):
try:
url = 'https://facebook.com'
driver.get(url)
user = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'email')))
user.send_keys(username)
pas = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'pass')))
pas.send_keys(password)
login_btn = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME,'login')))
login_btn.click()
except:
print('Something went wrong while login process')
def upload(img_path,caption):
try:
btn1 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[3]/div/div[2]/div/div/div/div[1]/div/div[1]')))
btn1.click()
btn2= WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[1]/div[2]/div/div[1]/div/span/div/div/div[1]/div/div/div[1]/i')))
btn2.click()
btn3 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/div/div/div[1]/div/i')))
btn3.click()
pyautogui.write(img_path)
pyautogui.press('enter')
actions = ActionChains(driver)
cap = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/div/div/div[1]')))
actions.move_to_element(cap)
actions.click()
actions.send_keys(caption).perform()
sleep(5) # this is mandatory while doing some thing with bot
btn_post = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[2]/div/div/div[1]/div')))
btn_post.click()
except:
print('Something Went Wrong While posting the image or video')
if __name__== "__main__":
#turn for credentials, driver, and caption
username = input('username : ')
password = input('pass : ')
img_path = 'pic1.jpg'
hash_idea = 'covid'
caption = 'test' # if you want to
caption = caption + '\n' + hashtags(hash_idea)
driver = webdriver.Firefox(executable_path="C:/Users/Asus/Downloads/Compressed/geckodriver-v0.32.0-win64/geckodriver.exe")
login(username,password)
upload(img_path,caption)
I wanted to automate the post with the text I provided in the code. I've tried a number of different approaches, but the post button is still not functioning properly. When I execute the script, it gives the error message "Something Went Wrong While posting the image or video." Any helpful suggestion or response would be greatly appreciated.

How to solve invisible captcha on Selenium

Hi I am trying to write a sign-up bot for UEFA.com using Selenium as requests I find to be too difficult for me to try so I am just working on automating the sign-up process even if it is a lot slower.
I am able to get to the final stage where I click on Create an Account, but faced with a reCaptcha which only appears after clicking on Create an Account. And after solving the captcha there is no 'Submit' button but it will automatically submit the details for you.
I am able to get the captcha token returned from 2captcha solving service, and inputted it into the innerHTML of the g-response-token field using javascript. However I do not know how to submit the captcha and the form.
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from seleniumwire import webdriver
import pyautogui
from twocaptcha import TwoCaptcha
import random
import os
from random import randint
import sys
firstnames = []
lastnames = []
API_Key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
# Open Names File
with open('firstnames.txt', 'r') as f:
for name in f:
name = name.strip()
firstnames.append(name)
with open('lastnames.txt', 'r') as e:
for name in e:
name = name.strip()
lastnames.append(name)
with open('proxies.txt') as f:
proxy = f.readlines()
proxy_rand = randint(1, 35)
s_proxy = str(proxy[proxy_rand])
p_strip = s_proxy.rstrip()
# Proxy Input and Format
bare_proxy = p_strip.split(':')
username = bare_proxy[2]
password = bare_proxy[3]
ip = bare_proxy[0]
port = bare_proxy[1]
options = {
'proxy': {
'http': f'http://{username}:{password}#{ip}:{port}',
'https': f'https://{username}:{password}#{ip}:{port}',
'no_proxy': 'localhost,127.0.0.1'
}
}
os.environ['PATH'] += 'C:/SeleniumDrivers'
homepage_URL = 'https://www.uefa.com/tickets/'
driver = webdriver.Chrome(seleniumwire_options=options)
driver.get(homepage_URL)
# Accessing Register Page
reject_cookies = driver.find_element(By.ID, 'onetrust-reject-all-handler')
reject_cookies.click()
time.sleep(1)
login_button = driver.find_element(By.CSS_SELECTOR, "a[class='btn btn-secondary tickets__btn js-tracking-card']")
login_button.click()
time.sleep(10)
create_account = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[4]/a')
create_account.click()
time.sleep(10)
# Inputting SignUp Details
letters = 'abcdefghijklmnopqrstuvwxyz'
a = random.choice(letters)
b = random.choice(letters)
c = random.choice(letters)
d = random.choice(letters)
email = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[6]/input')
email.send_keys(f'{a}{b}{c}{d}#nottingham.pro')
time.sleep(2)
password = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[7]/input')
password.send_keys('19741002Rw!')
time.sleep(2)
first_name = driver.find_element(By.XPATH, '//*[#id="gigya-textbox-130722358975432270"]')
first_range = len(firstnames) - 1
random_first = randint(1, first_range)
f_name = firstnames[random_first]
first_name.send_keys(f'{f_name}')
time.sleep(2)
last_name = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[9]/input')
last_range = len(lastnames) - 1
random_last = randint(1, first_range)
l_name = lastnames[random_last]
last_name.send_keys(f'{l_name}')
time.sleep(2)
day_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[1]/input')
day = randint(1, 28)
day_of_birth.send_keys(f'{day}')
time.sleep(2)
month_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[2]/input')
month = randint(1, 12)
month_of_birth.send_keys(f'{month}')
time.sleep(2)
year_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[3]/input')
year = randint(1940, 2000)
year_of_birth.send_keys(f'{year}')
driver.execute_script("window.scrollTo(0, 500)")
time.sleep(2)
pyautogui.moveTo(353, 619)
time.sleep(2)
pyautogui.click()
time.sleep(5)
current_url = driver.current_url
print(current_url)
g_key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
def SolveCaptcha():
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
api_key = os.getenv(g_key, 'a733edea49a8327795d56edc9f06d391')
solver = TwoCaptcha(api_key)
try:
result = solver.recaptcha(
sitekey=g_key,
url=current_url)
except Exception as e:
print(e)
else:
return result
result = SolveCaptcha()
code = result['code']
print(code)
token = f'document.getElementById("g-recaptcha-response").innerHTML="{code}";'
driver.execute_script(token)
time.sleep(10000)
As you can see by the end of the code I have managed to input the captcha token but not sure how to submit as there is no submit button
I have tried to look for a callback function but can't seem to find it when I inspect the page.
submit the first form on the page:
driver.execute_script('document.forms[0].submit()')

How to stop selenium scraper from redirecting to another internal weblink of the scraped website?

Was wondering if anyone knows of a way for instructing a selenium script to avoid visiting/redirecting to an internal page that wasn't part of the code. Essentially, my code opens up this page:
https://cryptwerk.com/companies/?coins=1,6,11,2,3,8,17,7,13,4,25,29,24,32,9,38,15,30,43,42,41,12,40,44,20
keeps clicking on show more button until there's none (at end of page) - which by then - it should have collected the links of all the products listed on the page it scrolled through till the end, then visit each one respectively.
What happens instead, it successfully clicks on show more till the end of the page, but then it visits this weird promotion page of the same website instead of following each of the gathered links respectively and then scraping further data points located off each of those newly opened ones.
In a nutshell, would incredibly appreciate it if someone can explain how to avoid this automated redirection on its own! And this is the code in case someone can gratefully nudge me in the right direction :)
from selenium.webdriver import Chrome
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException
import json
import selenium.common.exceptions as exception
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
webdriver = '/Users/karimnabil/projects/selenium_js/chromedriver-1'
driver = Chrome(webdriver)
driver.implicitly_wait(5)
url = 'https://cryptwerk.com/companies/?coins=1,6,11,2,3,8,17,7,13,4,25,29,24,32,9,38,15,30,43,42,41,12,40,44,20'
driver.get(url)
links_list = []
coins_list = []
all_names = []
all_cryptos = []
all_links = []
all_twitter = []
all_locations = []
all_categories = []
all_categories2 = []
wait = WebDriverWait(driver, 2)
sign_in = driver.find_element_by_xpath("//li[#class='nav-item nav-guest']/a")
sign_in.click()
time.sleep(2)
user_name = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#name='login']")))
user_name.send_keys("karimnsaber95#gmail.com")
password = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#name='password']")))
password.send_keys("PleomaxCW#2")
signIn_Leave = driver.find_element_by_xpath("//div[#class='form-group text-center']/button")
signIn_Leave.click()
time.sleep(3)
while True:
try:
loadMoreButton = driver.find_element_by_xpath("//button[#class='btn btn-outline-primary']")
time.sleep(2)
loadMoreButton.click()
time.sleep(2)
except exception.StaleElementReferenceException:
print('stale element')
break
print('no more elements to show')
try:
company_links = driver.find_elements_by_xpath("//div[#class='companies-list items-infinity']/div[position() > 3]/div[#class='media-body']/div[#class='title']/a")
for link in company_links:
links_list.append(link.get_attribute('href'))
except:
pass
try:
with open("links_list.json", "w") as f:
json.dump(links_list, f)
with open("links_list.json", "r") as f:
links_list = json.load(f)
except:
pass
try:
for link in links_list:
driver.get(link)
name = driver.find_element_by_xpath("//div[#class='title']/h1").text
try:
show_more_coins = driver.find_element_by_xpath("//a[#data-original-title='Show more']")
show_more_coins.click()
time.sleep(1)
except:
pass
try:
categories = driver.find_elements_by_xpath("//div[contains(#class, 'categories-list')]/a")
categories_list = []
for category in categories:
categories_list.append(category.text)
except:
pass
try:
top_page_categories = driver.find_elements_by_xpath("//ol[#class='breadcrumb']/li/a")
top_page_categories_list = []
for category in top_page_categories:
top_page_categories_list.append(category.text)
except:
pass
coins_links = driver.find_elements_by_xpath("//div[contains(#class, 'company-coins')]/a")
all_coins = []
for coin in coins_links:
all_coins.append(coin.get_attribute('href'))
try:
location = driver.find_element_by_xpath("//div[#class='addresses mt-3']/div/div/div/div/a").text
except:
pass
try:
twitter = driver.find_element_by_xpath("//div[#class='links mt-2']/a[2]").get_attribute('href')
except:
pass
try:
print('-----------')
print('Company name is: {}'.format(name))
print('Potential Categories are: {}'.format(categories_list))
print('Potential top page categories are: {}'.format(top_page_categories_list))
print('Supporting Crypto is:{}'.format(all_coins))
print('Registered location is: {}'.format(location))
print('Company twitter profile is: {}'.format(twitter))
time.sleep(1)
except:
pass
all_names.append(name)
all_categories.append(categories_list)
all_categories2.append(top_page_categories_list)
all_cryptos.append(all_coins)
all_twitter.append(twitter)
all_locations.append(location)
except:
pass
df = pd.DataFrame(list(zip(all_names, all_categories, all_categories2, all_cryptos, all_twitter, all_locations)), columns=['Company name', 'Categories1', 'Categories2', 'Supporting Crypto', 'Twitter Handle', 'Registered Location'])
CryptoWerk_Data = df.to_csv('CryptoWerk4.csv', index=False)
Redirect calls happen for two reasons, in your case either by executing some javascript code when clicking the last time on the load more button or by receiving an HTTP 3xx code, which is the least likely in your case.
So you need to identify when this javascript code is executed and send an ESC_KEY before it loads and then executing the rest of your script.
You could also scrape the links and append them to your list before clicking the load more button and each time it is clicked, make an if statement the verify the link of the page you're in, if it is that of the promotion page then execute the rest of your code, else click load more.
while page_is_same:
scrape_elements_add_to_list()
click_load_more()
verify_current_page_link()
if current_link_is_same != link_of_scraped_page:
page_is_same = False
# rest of the code here

Youtube Scraping (List of Videos)

On a Youtube channel, I'm trying to get a list of videos listed in the channel. (i.e. link, title, view, etc)
Yet, my code doesn't return any object. Any help will be appreciated!
from bs4 import BeautifulSoup as bs
import requests
from selenium.webdriver import Chrome
import re
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
address = "https://www.youtube.com/channel/UCepWEz3BW6EMKA4CU-yGDMw/videos"
#driver = webdriver.Chrome('./chromedriver')
#driver.get(address)
#driver.maximize_window()
#body = driver.find_element_by_css_selector('body')
#for i in range(250):
# body.send_keys(Keys.PAGE_DOWN)
# time.sleep(1)
r = requests.get(address)
page = r.text
soup=bs(page,'html.parser')
result=soup.find_all('div', attrs={"class": 'videoId'})
print(result)
Try it:
from selenium import webdriver
url = "https://www.youtube.com/channel/UCepWEz3BW6EMKA4CU-yGDMw/videos"
browser = webdriver.Firefox()
browser.get(url)
datas = browser.find_elements_by_css_selector(".ytd-grid-renderer")
result = {"title":[], "link":[], "views":[]}
for data in datas:
try:
title = data.find_element_by_css_selector("#video-title").text
result["title"].append(title)
except:
result["title"].append("")
try:
link = data.find_element_by_css_selector("#video-title").get_attribute("href")
result["link"].append(link)
except:
result["link"].append("")
try:
views = data.find_element_by_css_selector("#metadata-line .ytd-grid-video-renderer:nth-child(1)").text
result["views"].append(views)
except:
result["views"].append("")
# print(result)
browser.close()

if or try loop for an element in a page selenium

I am trying to scrape agents data here. I am able to get the links from the first page. I am using numbered loops because I know the total number of pages. I tried to run this as long as the "next" page option is there. I tried both "try" and "if not" but wasn't able to figure it out. Any help is welcome. Here is the code.
from selenium import webdriver
import time
from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)
links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")
def first_links():
initial_data = driver.find_elements_by_tag_name('td')
for initial in initial_data:
page_links = initial.find_elements_by_tag_name('a')
for page in page_links:
page_link = page.get_attribute("href")
links_total.append(page_link)
driver.refresh()
if driver.find_element_by_partial_link_text('next'):
next_page = driver.find_element_by_partial_link_text('next')
next_page.click()
time.sleep(2)
new_data = driver.find_elements_by_tag_name('td')
for new in new_data:
links = new.find_elements_by_tag_name('a')
for link in links:
new_link = link.get_attribute("href")
links_total.append(new_link)
for i in range(1, 23):
first_links()
for link in links_total:
print(link)
Try-catch would be better option
from selenium import webdriver
import time
from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)
driver.implicitly_wait(10)
# links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")
def first_links(links_total=[]):
initial_data = driver.find_elements_by_tag_name('td')
for initial in initial_data:
page_links = initial.find_elements_by_tag_name('a')
for page in page_links:
page_link = page.get_attribute("href")
links_total.append(page_link)
# driver.refresh()
try:
next_page = driver.find_element_by_partial_link_text('next')
next_page.click()
time.sleep(2)
first_links(links_total)
except (TimeoutError, ElementNotVisibleException, NoSuchElementException):
print("NEXT btn not found : ")
pass
return links_total
all_links = first_links()
for link in all_links:
print(link)
You don't need to use Selenium actually. You could do it with BeautifulSoap like so :
import requests
from bs4 import BeautifulSoup
page_num=0
url_cbp = r"https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=&page={}"
def get_links(links_total=[], page_num=0):
page = requests.get(url_cbp.format(page_num))
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find(id='region-content')
table_cells = results.find_all('td', class_='views-field')
for cell in table_cells:
# print(cell )
# print('\n\n')
cell_link = cell.find('a')
page_link = cell_link["href"]
links_total.append(page_link)
next_page = results.find('li', class_='pager-next')
if next_page:
page_num += 1
get_links(links_total, page_num)
return links_total
all_links = get_links()
for link in all_links:
print(link)

Categories

Resources