Storing file on a specific path after scraping - python

i want to store my file into a folder after scraping data and i didn't know how to add exactly to my script :
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome("C:/chrome/chromedriver.exe")
driver.execute("get", {'url': 'http://www.ins.tn/statistiques/90#'})
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='export']//a[#class='btnexport ' and starts-with(#id, 'btnExporttoExcel')]"))).click()
This is the path of the foler : C:\Users\ASUS\Documents\data
Also, how to change the name of the file ecerytime i scrape data ? like adding the time of the last time that i excuted the code !

Selenium has an obtions object in the webdriver module that you can modify and pass to the driver when instantiating the driver.
For chrome, I set up the options object in a method like this, so I can just pass in my path to dest and get the modified object.
def change_download_folder(dest):
options = selenium.webdriver.ChromeOptions()
prefs = {}
os.makedirs(dest, exist_ok=True)
prefs["profile.default_content_settings.popups"] = 0
prefs["download.default_directory"] = dest
options.add_experimental_option("prefs", prefs)
return options
Then you can call this and pass the return to the driver instantiation:
options = change_download_folder("YOUR_FILE_PATH")
driver = selenium.webdriver.Chrome(
options = options,
executable_path = "PATH_TO_DRIVER_FILE"
)

Related

Selenium python driver doesn't click or press the key for the button all the times

I'm using selenium to get to YouTube and write something on the search bar and then press the button or press the enter key.
Both clicking or pressing a key does sometimes work, but sometimes it does not.
I tried to wait with WebDriverWait, and I even changed the waiting time from 10 to 20 seconds, but it didn't make any difference.
And if I add anything (like printing the new page title), it only shows me the first page title and not the title after the search.
Here is my code and what I tried:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def get_driver():
firefox_options = Options()
# firefox_options.add_argument("--headless")
driver = webdriver.Firefox(executable_path=r"C:\Program Files\Mozilla Firefox\geckodriver.exe", options=firefox_options)
driver.implicitly_wait(9)
return driver
driver = get_driver()
driver.get('https://www.youtube.com/')
search = driver.find_element(By.XPATH, '//input[#id="search"]')
search.send_keys("python")
# search.send_keys(Keys.ENTER) #using the enter key # If I add nothing after this line it work
# searchbutton = driver.find_element(By.XPATH,'//*[#id="search-icon-legacy"]') # This also dose doesn't work
# searchbutton.click() # using the click method() #also dose not work
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-icon-legacy"]'))).click() # Sometimes work
# driver.implicitly_wait(10)
# print(driver.title) # This show me only the title of the first page not the one after the search
Is it because I use the Firefox webdriver (should I change to Chrome)?
Or is it because of my internet connection?
To make this working you need to click the search field input first, then add a short delay and then send the Keys.ENTER or click search-icon-legacy element.
So, this is not your fault, this is how YouTube webpage works. You may even call it a kind of bug. But since this webpage it built for human users it works good since human will never click on the input field and insert the search value there within zero time.
Anyway, the 2 following codes are working:
First.
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument('--disable-notifications')
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.youtube.com/"
driver.get(url)
search = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#id="search"]')))
search.click()
time.sleep(0.2)
search.send_keys("python")
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-icon-legacy"]'))).click()
Second.
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument('--disable-notifications')
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.youtube.com/"
driver.get(url)
search = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#id="search"]')))
search.click()
time.sleep(0.2)
search.send_keys("python" + Keys.ENTER)

Didn't able to locate send files button

I am trying to locate a button that uploads a file and gets the ouput result by clicking the button on the page itself, I know how to upload file by send keys.
The website is https://huggingface.co/spaces/vaibhavsharda/semantic_clustering
My code is
import csv
import time
from selenium import webdriver
import chromedriver_autoinstaller
import datetime
from bs4 import BeautifulSoup
from selenium.webdriver import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chromedriver_autoinstaller.install() # Check if the current version of chromedriver exists
# and if it doesn't exist, download it automatically,
# then add chromedriver to path
driver = webdriver.Chrome()
count = 0
entire_data = []
driver.get("https://huggingface.co/spaces/vaibhavsharda/semantic_clustering")
driver.maximize_window()
time.sleep(10)
s = driver.find_element(By.CSS_SELECTOR,'.exg6vvm15 .edgvbvh9') # the error is here.
s.send_keys("small_test.txt")
I am trying to locate element by selenium, I don't know if it doesn't load or something else is the error but I just want to locate the "Browse Files" button. Feel free to ask me anything.
Element you trying to click is inside an iframe, so you need first to switch into the iframe in order to access that element.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://huggingface.co/spaces/vaibhavsharda/semantic_clustering"
driver.get(url)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[title]")))
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#kind='primary'][not(#disabled)]"))).click()
When finished don't forget to switch to the default content with:
driver.switch_to.default_content()
UPD
Uploading file with Selenium is done by sending the uploaded file to a special element. This is not an element you are clicking as a user via GUI to upload elements. The element actually receiving uploaded files normally matching this XPath: //input[#type='file']
This is the fully working code - I tried this on my PC uploading some text file.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://huggingface.co/spaces/vaibhavsharda/semantic_clustering"
driver.get(url)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[title]")))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='file']"))).send_keys("C:/project_name/.gitignore")

cannot click element in python via selenium as a variable

I want to save element data to an excel file via python. I have the code below, I need some help why the line where
element.click()
gives an error. Even though I put the click() method upper line, but i need it to be in line below.
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(r"C:\Users\Admin\Downloads\chromedriver_win32 (1)\chromedriver.exe")
driver.get("https://www.nba.com/schedule?pd=false&region=1")
driver.implicitly_wait(30)
element_to_click=driver.find_element(By.ID,"onetrust-accept-btn-handler").click()
element_to_click.click() 'error
element_to_save=driver.find_element(By.XPATH,"//div/div/div/div/h4")
#element_to_save.to_excel("3row,3column)")
driver.quit()
This is one way to reject/accept cookies on that website:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(browser, 20)
url = 'https://www.nba.com/schedule?pd=false&region=1'
browser.get(url)
try:
wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
print('accepted cookies')
except Exception as e:
print('no cookie button!')
Setup is selenium/chrome on linux - just observe the imports and the part after defining the browser/driver.
Selenium documentation can be found at https://www.selenium.dev/documentation/

My Selenium webdriver doesn’t work on cTrader

Why is my Selenium webdriver not working?
I would like to log in automatically on https://ct.spotware.com/. But Selenium can't find the HTML class for the login box.
For this, I wrote this little script:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome("./chromedriver")
driver.get("https://ct.spotware.com/")
time.sleep(10)
Login = driver.find_element(By.CLASS_NAME,"_a _b _gc _gw _dq _dx _gd _cw _em _cy _gx _fu _gy _fv _fy _fw _fx _db _ge _gf _gz _gg _gh _gi _gj _gk _gl _gm _gn")
Ctrader HTM class reference
The error message is:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"._a _b _gc _gw _dq _dx _gd _cw _em _cy _gx _fu _gy _fv _fy _fw _fx _db _ge _gf _gz _gg _gh _gi _gj _gk _gl _gm _gn"}
Somehow the whole site doesn't work with Selenium. On other sites, like Wikipedia, my script works perfectly. Just not on cTrader.
Is there a solution?
There are several issues here:
All these class name values _a _b _gc _gw _dq _dx _gd _cw _em _cy _gx _fu _gy _fv _fy _fw _fx _db _ge _gf _gz _gg _gh _gi _gj _gk _gl _gm _gn are multiple separate class names. To use them you need to use CSS Selector or XPath.
The sequence of all the above class names looks to be fragile. You should use another, more stable and more clear locator.
Instead of hardcoded sleep you should use WebdriverWait explicit waits.
You need to close the cookies banner
And insert the user name and passwords
Anyway, the code below clicks the login button itself.
Please see the code below:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("--start-maximized")
s = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=s)
wait = WebDriverWait(driver, 20)
driver.get("https://ct.spotware.com/")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
The spaces in your class name are not handled by Selenium. The following may help.
Login = driver.find_element(By.CSS_SELECTOR, "._a._b._gc._gw._dq._dx._gd _cw._em._cy._gx._fu._gy._fv._fy._fw._fx._db._ge._gf._gz._gg._gh._gi._gj._gk._gl._gm._gn")
However, upon examining your site, I'd recommend using a CSS selector such as this:
'input[placeholder="Enter your email or cTrader ID"]'
This is one way to correctly select the elements and login:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
import pandas as pd
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
# chrome_options.add_argument("--headless")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1920,1080")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
wait = WebDriverWait(browser, 20)
url = 'https://ct.spotware.com/'
browser.get(url)
login_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[placeholder="Enter your email or cTrader ID"]')))
pass_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[placeholder="Enter your password"]')))
submit_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//button[text() = "Log In"]')))
login_field.send_keys('username')
pass_field.send_keys('bad_pass')
submit_button.click()
print('clicked')
Selenium documentation can be found at https://www.selenium.dev/documentation/

Python Automatic browser without webdriver

I have this code , it opens chrome, but it doesn't want to continue with the code. Don't really know how to fix the issue. I do NOT want it to open selenium webdriver, want it to open my own local chrome path, I want at the same time to make the script to read read elements and print the values.
import names, time, random
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
def AccGen():
while True:
# *************Static***************
prefs = {"profile.managed_default_content_settings.images": 1}
options = Options()
# options.add_argument('--disable-gpu')
# options.add_argument("--disable-extensions")
# options.add_argument('--disable-notifications')
options.add_experimental_option("prefs", prefs)
options.add_argument("--window-size=1600,900")
browser = webdriver.Chrome(executable_path='C:/Users/Jonathan/AppData/Local/Google/Chrome/Application/Chrome.exe',chrome_options=options)
browser.implicitly_wait(10)
# ------------------------------------
# Access to site
browser.get(
"https://accounts.google.com/SignUp?service=mail&continue=https%3A%2F%2Fmail.google.com%2Fmail%2F&ltmpl=default"
)
###################################################################
firstName = names.get_first_name()
lastName = names.get_last_name()
email = '{}.{}{}'.format(firstName, lastName, random.randint(1000, 9999))
password = '2001jl00'
###################################################################
# Write in random Name
WebDriverWait(browser, 20).until(
EC.visibility_of_element_located(
(By.XPATH, '//*[#id="firstName"]'))).send_keys(firstName)
https://mystb.in/vevivuneku.coffeescript

Categories

Resources