Currently, I am using this code to save a downloaded file but it is placing them in the same folder where it is being run from.
r = requests.get(url)
with open('file_name.pdf', 'wb') as f:
f.write(r.content)
How would I save the downloaded file to another directory of my choice?
Or if in Linux, try:
# To save to an absolute path.
r = requests.get(url)
with open('/path/I/want/to/save/file/to/file_name.pdf', 'wb') as f:
f.write(r.content)
# To save to a relative path.
r = requests.get(url)
with open('folder1/folder2/file_name.pdf', 'wb') as f:
f.write(r.content)
See open() function docs for more details.
You can just give open a full file path or a relative file path
r = requests.get(url)
with open(r'C:\path\to\save\file_name.pdf', 'wb') as f:
f.write(r.content)
As long as you have access to the directory you can simply change your file_name.pdf' to '/path_to_directory_you_want_to_save/file_name.pdf' and that should do what you want.
Here is a quicker solution:
r = requests.get(url)
open('/path/to/directory/file_name.pdf', 'wb').write(r.content)
Related
I am using the requests library to download a file from a URL. This is my code
for tag in soup.find_all('a'):
if '.zip' in str(tag):
file_name = str(tag).strip().split('>')[-2].split('<')[0]
link = link_name+tag.get('href')
r = requests.get(link, stream=True)
with open(os.path.join(download_path, file_name), 'wb') as fd:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
fd.write(chunk)
And then I unzip the file using this code
unzip_path = os.path.join(download_path, file_name.split('.')[0])
with zipfile.ZipFile(os.path.join(download_path, file_name), 'r') as zip_ref:
zip_ref.extractall(unzip_path)
This code looks if there is a zip file in the provided page and then downloads the zipped file in a directory. Then it will unzip the file using the zipFile library.
The problem with this code is that sometimes the download is not complete. So for example if the zipped file is 312KB long only parts of it is downloaded. And then I get a BadZipFile error. But sometimes the entire file is downloaded correctly.
I tried the same without streaming and even that results in the same problem.
How do I check if all the chunks are downloaded properly.
Maybe this works:
r = requests.get(link)
with open(os.path.join(download_path, file_name), 'wb') as fd:
fd.write(r.content)
I'm trying to download PDFs from a list of re-directed URLs. I want to go through a list and download the PDFs into my local drive.
The code that I have so far:
for i in announcement_url:
local_filename = r'C:\Users\jorda\Documents\University\DataScience\PDFDataScraper'
with requests.get(i, stream=True, allow_redirects =True) as r:
with open(local_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)
print(i)
I was under the impression that this would download on to my local drive?
You can directly save the file to a given path (no copy required):
for i, url in enumerate(announcement_url):
local_filename = "path_to_local_directory"
local_filename = local_filename+ f"pdffilename_{i}.pdf"
with requests.get(url, allow_redirects =True) as r: #get the url
print(url)
open(local_filename, 'wb').write(r.content) #save content with a filename
Currently im facing following problem:
I have 3 download links in a list. Only the last file in the list is downloaded completely.
The others have a file size of one kilobyte.
Code:
from requests import get
def download(url, filename):
with open(filename, "wb") as file:
response = get(url, stream=True)
file.write(response.content)
for link in f:
url = link
split_url = url.split("/")
filename = split_url[-1]
filename = filename.replace("\n", "")
download(url,filename)
The result looks like this:
Result
How do I make sure that all files are downloaded correctly?
All links are direct download links.
Thanks in advance!
EDIT:
I discovered it only happens when I read the links from the .txt
If I create the list in python like this:
links = ["http://ipv4.download.thinkbroadband.com/20MB.zip",
"http://ipv4.download.thinkbroadband.com/10MB.zip",
"http://ipv4.download.thinkbroadband.com/5MB.zip"]
... the problem doesnt appear.
reproduceable example:
from requests import get
def download(url, filename):
with open(filename, "wb") as file:
response = get(url, stream = True)
file.write(response.content)
f = open('links.txt','r')
for link in f:
url = link
split_url = url.split("/")
filename = split_url[-1]
filename = filename.replace("\n", "")
download(url,filename)
content of links.txt:
http://ipv4.download.thinkbroadband.com/20MB.zip
http://ipv4.download.thinkbroadband.com/10MB.zip
http://ipv4.download.thinkbroadband.com/5MB.zip
url = url.replace("\n", "")
solved it!
I have 1k of image urls in a csv file and I am trying to download all the images from the urls. I don't know why I am not able to download all the images. Here is my code:
print('Beginning file download with requests')
path = '/home/tt/image_scrap/image2'
for idx, url in tqdm(enumerate(dataset['url']), total=len(dataset['url'])):
response = requests.get(url,stream=True)
time.sleep(2)
filename = url.split("/")[-1]
with open(path+'/'+filename, 'wb') as f:
f.write(response.content)
Try / Except statements are really good for these type of 'errors':
Try this:
try:
with open(path+'/'+filename, 'wb') as f:
f.write(response.content)
except Exception as error:
print(error)
I've a lot of URL with file types .docx and .pdf I want to run a python script that downloads them from the URL and saves it in a folder. Here is what I've done for a single file I'll add them to a for loop:
response = requests.get('http://wbesite.com/Motivation-Letter.docx')
with open("my_file.docx", 'wb') as f:
f.write(response.content)
but the my_file.docx that it is saving is only 266 bytes and is corrupt but the URL is fine.
UPDATE:
Added this code and it works but I want to save it in a new folder.
import os
import shutil
import requests
def download_file(url, folder_name):
local_filename = url.split('/')[-1]
path = os.path.join("/{}/{}".format(folder_name, local_filename))
with requests.get(url, stream=True) as r:
with open(path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
return local_filename
Try using stream option:
import os
import requests
def download(url: str, dest_folder: str):
if not os.path.exists(dest_folder):
os.makedirs(dest_folder) # create folder if it does not exist
filename = url.split('/')[-1].replace(" ", "_") # be careful with file names
file_path = os.path.join(dest_folder, filename)
r = requests.get(url, stream=True)
if r.ok:
print("saving to", os.path.abspath(file_path))
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 8):
if chunk:
f.write(chunk)
f.flush()
os.fsync(f.fileno())
else: # HTTP status code 4XX/5XX
print("Download failed: status code {}\n{}".format(r.status_code, r.text))
download("http://website.com/Motivation-Letter.docx", dest_folder="mydir")
Note that mydir in example above is the name of folder in current working directory. If mydir does not exist script will create it in current working directory and save file in it. Your user must have permissions to create directories and files in current working directory.
You can pass an absolute file path in dest_folder, but check permissions first.
P.S.: avoid asking multiple questions in one post
try:
import urllib.request
urllib.request.urlretrieve(url, filename)