Downloading images at a particular location in selenium python - python

I am trying to download images using selenium but I don't know how to direct those files at a desired location. Can anyone tell me how to do this?

Use this code to set desire download location in Selenium-Python bindings :
executable_path = r"C:\\Selenium+Python\\chromedriver.exe"
options = webdriver.ChromeOptions()
prefs = {'download.default_directory' : '/path/to/dir'}
options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(executable_path, options=options)
You need to change /path/to/dir to your desired location.

In case you are using a Chrome webdriver you can use these settings:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("download.default_directory=C:/Downloads")
driver = webdriver.Chrome(chrome_options=options)
Here I set it to "C:/Downloads" but you can change it to any other destination.
For the Firefox you can use this:
from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.download.dir", 'PATH TO DESKTOP')
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/x-gzip")
driver = webdriver.Firefox(firefox_profile=profile)
Where 'PATH TO DESKTOP' is a path on your disk where you want to download your files

as per this post to download using firefox
from selenium import webdriver
profile = webdriver.FirefoxProfile()
path = 'C:\\downloads'
profile.set_preference('browser.download.folderList', 2) # custom location
profile.set_preference('browser.download.manager.showWhenStarting', False)
profile.set_preference('browser.download.dir', path)
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'image/png', 'image/jpeg')
then select the button to download and click it.
if you only have the link of the image, i'd recommend using something like mechanize or urllib to download the contents
img = driver.find_element_by_xpath(xpath)
src = img.get_attribute('src')
# download the image
req = urllib.urlopen(src)
f = open(filename,'wb')
f.write(req.read())
f.close()

Related

Selenium Chrome: Load profile and change download folder - Python

OS: Win 10
Chrome: 81.0.4044.129
ChromeDriver: 81.0.4044.69
Goal:
Load an existing profile with extensions and preference configured AND specify default download locations.
Purpose:
I want to save images into their respective folders name.
Challenges
If I specify a Chrome profile to be loaded, then I cannot change the default download folder.
Code Snippets:
# Loading profile works!
options = webdriver.ChromeOptions()
options.add_argument(f'user-data-dir={profile_path}')
options.add_argument(f'--profile-directory={profile_name}')
driver = webdriver.Chrome(chrome_options=options)
# Changing default download location works!
options = webdriver.ChromeOptions()
prefs = {"download.default_directory" : "C:/Downloads/Folder A"}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(chrome_options=options)
# This DOESN'T work! Default download location is not changed.
options = webdriver.ChromeOptions()
options.add_argument(f'user-data-dir={profile_path}')
options.add_argument(f'--profile-directory={profile_name}')
prefs = {"download.default_directory" : "C:/Downloads/Folder A"}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(chrome_options=options)
Is it possible to BOTH load the profile and change the default download location before the driver is created?
I believe there isn't a way to both load an existing profile and also to change the default_directory option.
So instead, I used json.loads and json.dump to modify the 'Preference' file before loading the profile.
import json
from selenium import webdriver
# helper to edit 'Preferences' file inside Chrome profile directory.
def set_download_directory(profile_path, profile_name, download_path):
prefs_path = os.path.join(profile_path, profile_name, 'Preferences')
with open(prefs_path, 'r') as f:
prefs_dict = json.loads(f.read())
prefs_dict['download']['default_directory'] = download_path
prefs_dict['savefile']['directory_upgrade'] = True
prefs_dict['download']['directory_upgrade'] = True
with open(prefs_path, 'w') as f:
json.dump(prefs_dict, f)
options = webdriver.ChromeOptions()
set_download_directory(profile_path, profile_name, download_path) # Edit the Preferences first before loading the profile.
options.add_argument(f'user-data-dir={profile_path}')
options.add_argument(f'--profile-directory={profile_name}')
driver = webdriver.Chrome(chrome_options=options)

Click on a file to download Selenium Python

I use Selenium (python) and Firefox portable browser.
My goal is to download a lot of files using selenium (namely through Selenium).
When you click on the link, the file should start downloading, but this window opens.
Tell me, are there any selenium settings to avoid opening such a window?
Try to set preference neverAsk.saveToDisk
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
def example():
opt = Options()
opt.headless = False # Or True
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2)
fp.set_preference("browser.download.manager.showWhenStarting", False)
fp.set_preference("browser.download.manager.showAlertOnComplete", False)
fp.set_preference("browser.helperApps.neverAsk.saveToDisk",
"application/vnd.ms-powerpoint")
fp.set_preference("browser.download.dir", "C:\\folder_name\\Downloads")
firefox_browser = webdriver.Firefox(firefox_profile=fp, options=opt)
file type https://www.freeformatter.com/mime-types-list.html

Downloading zip files using Python Selenium

I'm trying to download every country's "administrative areas" from this website: http://www.diva-gis.org/gdata.
I'm new to using the Python Selenium package, but I'm fairly certain the code below should at least download Afghanistan's data.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
profile = webdriver.FirefoxProfile()
profile.set_preference('browser.download.folderList', 2) # custom location
profile.set_preference('browser.download.manager.showWhenStarting', False)
profile.set_preference('browser.download.dir', '/tmp')
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/csv')
driver = webdriver.Firefox(profile)
driver.get("http://www.diva-gis.org/gdata")
driver.find_element_by_name('OK').click()
driver.find_element_by_link_text('Download').click()
I'm able to open other links on the same page using this method, but I can't open the "Download" link for some reason. No error message is shown after running this code. I did look at similar SO posts, but I still don't know what's wrong.
Please check if below code works. I've replaced your find link by text with css selector. Also, updated text/csv with application/zip.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
profile = webdriver.FirefoxProfile()
profile.set_preference('browser.download.folderList', 2) # custom location
profile.set_preference('browser.download.manager.showWhenStarting', False)
profile.set_preference('browser.download.dir', '/tmp')
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/zip')
driver = webdriver.Firefox(profile)
driver.get("http://www.diva-gis.org/gdata")
driver.find_element_by_name('OK').click()
driver.find_element_by_css_selector("#node-39 > div > div > div > div > a > h2").click()
I recommend it for Firefox
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
options = Options()
options.set_preference("browser.download.folderList",2)
options.set_preference("browser.download.manager.showWhenStarting", False)
options.set_preference("browser.download.dir","/data")
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream,application/vnd.ms-excel")
driver = webdriver.Firefox(firefox_options=self.options)

Python Set Firefox Preferences for Selenium--Download Location

I use Selenium Marrionette and GeckoDriver to pull web data. I use the following to set my Firefox profile preferences:
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 1)
fp.set_preference("browser.helperApps.alwaysAsk.force", False)
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.download.dir", "H:\Downloads")
fp.set_preference("browser.download.downloadDir","H:\Downloads")
fp.set_preference("browser.download.defaultFolder","H:\Downloads")
binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\Firefox.exe')
firefox_capabilities = DesiredCapabilities.FIREFOX
firefox_capabilities['marionette'] = True
driver = webdriver.Firefox(capabilities=firefox_capabilities, firefox_binary=binary, firefox_profile = fp)
From what I understand after reading Unable to set firefox profile preferences and FirefoxProfile passed to FirefoxDriver, it seems that nothing is being done when using firefox_profile now. So I need to implement the new updates to firefox_capabilities, but I'm not sure how to exactly do that. Any ideas?
Ok, I believe I finally figured this mess out. Instead of using the code above, I used the following code which I point to my Firefox profile folder(if you need to update your default profile settings do that in Firefox before running this code):
from selenium.webdriver.firefox.options import Options
binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\Firefox.exe')
fp = (r'C:\Users\username\AppData\Roaming\Mozilla\Firefox\Profiles\oqmqnsih.default')
opts = Options()
opts.profile = fp
firefox_capabilities = DesiredCapabilities.FIREFOX
firefox_capabilities['marionette'] = True
driver = webdriver.Firefox(capabilities=firefox_capabilities,firefox_binary=binary, firefox_options = opts)
I ran this code along with my web-scraping code and once I clicked the "Export CSV" link, it automatically downloaded as opposed to the Download Manager window popping up. Feel free to add any feedback.
The initial code is partialy correct. You must set browser.download.folderList value as 2 :
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2) # 0 means to download to the desktop, 1 means to download to the default "Downloads" directory, 2 means to use the directory
fp.set_preference("browser.helperApps.alwaysAsk.force", False)
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.download.dir", "H:\Downloads")
binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\Firefox.exe')
firefox_capabilities = DesiredCapabilities.FIREFOX
firefox_capabilities['marionette'] = True
driver = webdriver.Firefox(capabilities=firefox_capabilities,firefox_binary=binary, firefox_profile = fp)
the solution for my python script (on raspi 3):
binary = FirefoxBinary('/usr/bin/firefox')
driver = webdriver.Firefox(capabilities={'browserName': 'firefox' }, firefox_binary=binary)

Selenium pdf automatic download not working

I am new to selenium and I am writing a scraper to download pdf files automatically from a given site.
Below is my code:
from selenium import webdriver
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList",2);
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.download.dir", "/home/jill/Downloads/Dinamalar")
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
browser = webdriver.Firefox(firefox_profile=fp)
browser.get("http://epaper.dinamalar.com/PUBLICATIONS/DM/MADHURAI/2015/05/26/PagePrint//26_05_2015_001_b2b69fda315301809dda359a6d3d9689.pdf");
webobj = browser.find_element_by_id("download").click();
I followed the steps mentioned in Selenium documentation and in the this link. I am not sure why download dialog box is getting shown every time.
Is there anyway to fix it else can there be a way to give "application/all" so that all the files can be downloaded (work-around)?
Disable the built-in pdfjs plugin and navigate to the URL - the PDF file would be downloaded automatically, the code:
from selenium import webdriver
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2)
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.download.dir", "/home/jill/Downloads/Dinamalar")
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf,application/x-pdf")
fp.set_preference("pdfjs.disabled", "true") # < KEY PART HERE
browser = webdriver.Firefox(firefox_profile=fp)
browser.get("http://epaper.dinamalar.com/PUBLICATIONS/DM/MADHURAI/2015/05/26/PagePrint//26_05_2015_001_b2b69fda315301809dda359a6d3d9689.pdf");
Update (the complete code that worked for me):
from selenium import webdriver
mime_types = "application/pdf,application/vnd.adobe.xfdf,application/vnd.fdf,application/vnd.adobe.xdp+xml"
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2)
fp.set_preference("browser.download.manager.showWhenStarting", False)
fp.set_preference("browser.download.dir", "/home/aafanasiev/Downloads")
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", mime_types)
fp.set_preference("plugin.disable_full_page_plugin_for_types", mime_types)
fp.set_preference("pdfjs.disabled", True)
browser = webdriver.Firefox(firefox_profile=fp)
browser.get("http://epaper.dinamalar.com/")
webobj_get_link = browser.find_element_by_id("liSavePdf")
webobj_get_object = webobj_get_link.find_element_by_tag_name("a")
webobj_get_object.click()
I tested the following code and I succesfully downloaded your pdf on Windows 7:
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2)
fp.set_preference("browser.download.manager.showWhenStarting", False)
fp.set_preference("browser.download.dir", download_location)
fp.set_preference("plugin.disable_full_page_plugin_for_types", "application/pdf")
fp.set_preference("pdfjs.disabled", True)
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
driver = webdriver.Firefox(fp)
driver.implicitly_wait(10)
driver.maximize_window()
driver.get("http://epaper.dinamalar.com/")
element = driver.find_element_by_css_selector("li#liSavePdf>a>img")
element.click()
Since there is not HTML code available, my guess is that this line
webobj = browser.find_element_by_id("download").click();
actually calls the onclick event, but you don't handle it properly. In other words, what you're missing is the location where this .pdf file will be stored. I have very little experience with python programming, but one solution could be to use HTTP webclient lib, that will allow you to automatically download files. Something like CSharp's WebClient.DownloadFile Method (String, String). And if used properly, you can skip any Selenium commands for this action.
Maybe something like this post will be a good start.

Categories

Resources