Downloading images from a server - python

i wanna download images from a server using python but for e.g the URL is "example.com/photo/1.jpg" i wanna run the script again and again but with different image name
here is my code
import wget
fileurl = 0
url = 'http://example.com/photo/' + str(fileurl) + '.jpg'
while fileurl != 500:
fileurl += 1
download = wget.download(url)

If you put the url line in the loop, it will change every iteration. In your current code it never changes.
import wget
fileurl = 0
while fileurl != 500:
url = 'http://example.com/photo/' + str(fileurl) + '.jpg'
fileurl += 1
try:
download = wget.download(url)
except Exception as e:
print(e)

Related

How to reconnect in requests to continue a download

I have been making this download manager app in tkinter and requests and I realized that sometimes if the user is downloading multiple files at the same time it fails to keep up and all the downloads end without any error. I also tried urllib3 and the standard urllib though the only difference that urrlib had was that it just raised and error but still failed. I want to make my program in a way that if the download ends:
Firstly check if the file size is less than it is supposed to be
If it is then get the size of that file and make a range header like so: {"Range": f"bytes={current_size}-{file_size}"}
Store the rest of the file in a temp file. After it is downloaded, get the data from both of the files and write it to one (merge the files together)
I used a while loop and temp counter but the problem is that when requests can't keep up and reaches the while loop, it makes millions of temp files with the size of each of them being 197 bytes and it doesn't work. I also tried just using an if loop hoping that it would be fixed, the difference being that it just didn't create millions of files but still didn't work. Finally I tried writing a separate mock program that just straightly got the rest of the files and merged it the half-downloaded file and it worked but for some reason when I try it in my program it doesn't. Keep in mind that I don't want to create a thread for each tempfile because it can be easily written in the same thread as the one that is downloading the file. How can I do this? My code (Be aware that this function is running in a separate thread):
currently_downloading = np.array([], dtype='S')
current_temp = 0
def download_files():
global files_downloading, times_clicked, currently_downloading, packed, last_temp, current_temp
try:
abort = False
win = None
available_num = 0
downloaded = 0
url = str(url_entry.get())
try:
headers = requests.head(url, headers={'accept-encoding': ''}).headers
except ValueError:
raise InvalidURL()
try:
file_size = float(headers['Content-Length'])
except TypeError:
raise NotDownloadable()
name = ""
formatname = ""
if num.get() == 1:
name = url.split("/")[-1].split(".")[0]
else:
if name_entry.get().strip() != "":
for char in str(name_entry.get()):
if char in banned_chars:
print("Usage of banned characters")
raise BannedCharsUsage()
else:
name = str(name_entry.get())
else:
raise EmptyName()
if var.get() == 1:
formatname = '.' + headers['Content-Type'].split('/')[1]
else:
if str(format_entry.get())[0] == '.' and len(format_entry.get()) >= 3:
formatname = str(format_entry.get())
else:
raise InvalidFormat()
fullname = str(name) + formatname
path = (str(output_entry.get()) + "/").replace(r" \ ".strip(), "/")
if chum.get() == 1:
conn = sqlite3.connect("DEF_PATH.db")
c = conn.cursor()
c.execute("SELECT * FROM DIRECTORY_LIST WHERE SELECTED_DEF = 1")
crnt_default_path = np.array(c.fetchone())
path = str(crnt_default_path[0] + "/").replace(r" \ ".strip(), "/")
conn.commit()
conn.close()
else:
pass
all_files_dir = np.array([], dtype='S')
for file in os.listdir(path):
all_files_dir = np.append(all_files_dir, path + file)
all_files_dir = np.concatenate((all_files_dir, currently_downloading))
while path + fullname in all_files_dir:
for element in currently_downloading:
if element not in all_files_dir:
all_files_dir = np.append(all_files_dir, element)
available_num += 1
if num.get() == 1:
name = url.split("/")[-1].split(".")[0] + f" ({available_num})"
else:
name = str(name_entry.get()) + f" ({available_num})"
fullname = name + formatname
if path + fullname not in all_files_dir:
currently_downloading = np.append(currently_downloading, path + fullname)
available_num = 0
break
else:
currently_downloading = np.append(currently_downloading, path + fullname)
def cancel_dl():
nonlocal abort
abort = True
start_time = time.time()
try:
r = requests.get(url, allow_redirects=False, stream=True)
start = last_print = time.time()
with open(path + fullname, 'wb') as fp:
for chunk in r.iter_content(chunk_size=4096):
if abort:
raise AbortException()
downloaded += fp.write(chunk)
if downloaded > 1000000:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000000, 2)} MB")
else:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000, 2)} KB")
pct_done = int(downloaded / file_size * 100)
lbl_percent.config(text=f"{round(pct_done, 2)} %")
download_prg["value"] = pct_done
now = time.time()
if now - last_print >= 1:
speed_sec = round(downloaded / (now - start))
if speed_sec > 1000000:
lbl_speed.config(text=f"{round(speed_sec / 1000000, 3)} MB/s")
else:
lbl_speed.config(text=f"{round(speed_sec / 1000, 3)} KB/s")
last_print = time.time()
while os.stat(path + fullname).st_size < file_size:
current_temp += 1
rng = {"Range": f"bytes={os.stat(path + fullname).st_size}-{file_size}"}
r = requests.get(url, allow_redirects=False, stream=True, headers=rng)
start = last_print = time.time()
with open(f"temp/Temp-{current_temp}{formatname}", 'wb') as fp:
for chunk in r.iter_content(chunk_size=4096):
if abort:
raise AbortException()
downloaded += fp.write(chunk)
if downloaded > 1000000:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000000, 2)} MB")
else:
lbl_crnt_size.config(text=f"Downloaded: {round(downloaded / 1000, 2)} KB")
pct_done = int(downloaded / file_size * 100)
lbl_percent.config(text=f"{round(pct_done, 2)} %")
download_prg["value"] = pct_done
now = time.time()
if now - last_print >= 1:
speed_sec = round(downloaded / (now - start))
if speed_sec > 1000000:
lbl_speed.config(text=f"{round(speed_sec / 1000000, 3)} MB/s")
else:
lbl_speed.config(text=f"{round(speed_sec / 1000, 3)} KB/s")
last_print = time.time()
with open(f"temp/Temp-{current_temp}{formatname}", 'rb') as fp:
temp_binary = fp.read()
with open(path + fullname, 'rb') as fp:
main_binary = fp.read()
with open(path + fullname, 'wb') as fp:
fp.write(main_binary + temp_binary)
except AbortException:
if os.path.exists(path + fullname):
os.remove(path + fullname)
There is no inbuilt function to do that so you will have to Manually do that .
First thing you need to do is keep record of how many chunks/buffers you have written to file.
Before download function declare some variable, say x=0. (To count how much data is written to file)
then inside the download function check if x == 0.
If true then download normally,
Else : resume download using range header
Read Following examples for range header :- source
If the web server supports the range request then you can add the Range header to your request:
Range: bytes=StartPos-StopPos
You will receive the part between StartPos and StopPos. If dont know the StopPos just use:
Range: bytes=StartPos-
So your code would be:
def resume_download(fileurl, resume_byte_position):
resume_header = {'Range': 'bytes=%d-' % resume_byte_position}
return requests.get(fileurl, headers=resume_header, stream=True, verify=False, allow_redirects=True)
Another example :-
https://www.oreilly.com/library/view/python-cookbook/0596001673/ch11s06.html
Also update the variable x after writing each chunk (x = x + chunk_size)
And in the end of your download part, add a "if" statement to check if the file size of downloaded file is same as the file size of file on server (you can get that by requests.header.get('Content-Length'). If file size is not same then you call your download function again.

As I send list link to requests.get (). content Python?

I am sending an extracted list with xpath but it doesn't work with xpath
it works like this URL1
url = ['http://images/productos/on-line/items/large/nb/de/nbdell1tc73.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_1.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_2.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_3.jpg'
]
it does not work URL2
url = sel.xpath('//section[#class="product-images js-product-images-container"]//img/#src')
result url.path
'url': ['http://images/productos/on-line/items/large/nb/de/nbdell1tc73.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_1.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_2.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_3.jpg'],
this code recepte the url
i = 0
for url22 in url:
try:
imagen_content = requests.get(url22).content
image_file = io.BytesIO(imagen_content)
imagen = Image.open(image_file).convert('RGB')
path = './imagenes/' + '_' + str(i) + '.jpg'
with open(path, 'wb')as f:
imagen.save(f, "JPEG", quality=85)
except Exception as ex:
print(ex)
print("ERROR")
i += 1
With the first url it works, with the second one it doesn't.
How do I make it work with url2?
it turned out, I need to put url.get () url22.get()
i = 0
for url22 in url:
try:
imagen_content = requests.get(url22.get()).content
image_file = io.BytesIO(imagen_content)
imagen = Image.open(image_file).convert('RGB')
path = './imagenes/' + '_' + str(i) + '.jpg'
with open(path, 'wb')as f:
imagen.save(f, "JPEG", quality=85)
except Exception as ex:
print(ex)
print("ERROR")
i += 1

Copy cell images from Smartsheet using Python

I am trying to make a copy of smart sheet data on my local disk. I am able to copy all the smart sheet data except for the cell images. Below is the code am using. This code works perfectly fine to copy the data but not the cell images
NOTE: I am not trying to copy the attachments from smart sheets; only the cell the images and data.
Could someone help me to enhance this code to copy the cell images as well?
import json
import os
import requests
import time
token = "Bearer <TOken>"
backed_up_sheets = {"Attach": 86960044478894,"test2":6659760455684}
dir = r'C:\Users\\me\SmartSheetsBackup\WorkSheet' + time.strftime("-%m_%d_%Y_%H_%M")
API_URL = "https://api.smartsheet.com/2.0/sheets/"
payload = {"Authorization": token,
"Accept": "application/vnd.ms-excel,image/*"}
amount = len(backed_up_sheets)
i = 1
for el in backed_up_sheets:
r = requests.get(API_URL + str(backed_up_sheets[el]) , headers=payload)
if r.status_code != 200:
print ('Some problem with connections please retry later0')
pass
if not os.path.exists(dir):
os.makedirs(dir)
with open(dir + el + time.strftime("-%m_%d_%Y_%H_%M") + ".xlsx", 'wb') as output:
output.write(r.content)
print ('Progress in sheets: ' + str(i) + '/' + str(amount))
i += 1
Here's a complete code sample:
# Download an image in a cell
def download_cell_image(client, sheet_id, row_id, column_id, default_filename):
# Get desired row
row = client.Sheets.get_row(sheet_id, row_id)
cell = row.get_column(column_id)
image = cell.image
filename = getattr(image, 'alt_text', default_filename)
# Obtain a temporary image URL
imageUrl = ss_client.models.ImageUrl( { "imageId": image.id } )
response = ss_client.Images.get_image_urls([imageUrl])
url = response.image_urls[0].url
# Download the image
import requests
response = requests.get(url)
if response.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
Note that this requires SDK version 1.3.0 or later
The same steps illustrated in the cURL example should work in Python. (Apologies that we don't have an complete published sample)
Get the image id from the cell object, as returned from get_sheet
Convert the image id to a download url, using images.get_image_urls (docs)
Download the image from the url, probably using the Requests library.

How can I improve downloading speed with python urllib.request

How can I improve downloading speed with urllib.request? I want to download image from web and It works well. But it takes too long downloading it. It took 42 seconds to excute donwload_album_art() func. What Can I do for that? Can I use mutiprocess or etc? h
import os
import shutil
import requests
from bs4 import BeautifulSoup
from urllib import request
URL = 'https://music.bugs.co.kr/chart/track/day/total'
PATH = os.getcwd() + '/static/images/'
# Scrapping html code
def get_html(target_url):
_html = ""
response = requests.get(target_url)
if response.status_code == 200:
_html = response.text
return _html
# parse image url and save in list
def get_image_url():
html = get_html(URL)
soup = BeautifulSoup(html, 'html.parser')
img_url = []
for image in soup.select('a.thumbnail > img'):
if image.has_attr('src'):
img_url.append(image.get('src'))
else:
continue
return img_url
# download album art in static/images directory
def download_album_arts():
images = get_image_url()
for i in range(0, 100):
url = images[i]
file_name = PATH + str(i + 1) + '.png'
request.urlretrieve(url, file_name)
# delete all album art
def delete_album_art():
path = os.getcwd() + '/static/images'
if os.path.exists(path):
shutil.rmtree(path)
os.mkdir(path)
else:
os.mkdir(path)

Python FTP download 550 error

I've written an ftp crawler to download specific files. It works up until it finds the specific file it wants to download, and then it throws this error:
ftplib.error_perm: 550
The file exists in my download folder, but the size of the file is 0 kb.
Do I need to convert something in order to get it to download?.
I can access the ftp manual and download the file without any problems, so don't think it's the login part (unless there's different ways of logging in??)
Here's my code:
import ftplib
import re
import os
class Reader:
def __init__(self):
self.data = ""
def __call__(self,s):
self.data += s + "\n"
ftp = ftplib.FTP("my_ftp_server")
ftp.login()
r = Reader()
ftp.dir(r)
def get_file_list(folder):
r = Reader()
ftp.dir(folder, r)
print ("Reading folder",folder)
global tpe
global name
for l in r.data.split("\n"):
if len(l) > 0:
vars = re.split("[ ]*", l)
tpe = vars[2]
name = vars[3]
if tpe == "<DIR>":
get_file_list( folder + "/" + name )
else:
print (folder + name)
for name in folder:
if vars[3].endswith(('501.zip','551.zip')):
if os.path.exists('C:\\download\\' + vars[3]) == False:
fhandle = open(os.path.join('C:\\download\\', vars[3]), 'wb')
print ('Getting ' + vars[3])
ftp.retrbinary('RETR ' + vars[3], fhandle.write)
fhandle.close()
elif os.path.exists(('C:\\download\\' + vars[3])) == True:
print ('File ', vars[3], ' Already Exists, Skipping Download')
print("-"*30)
print ("Fetching folders...")
get_file_list("")
Your code is probably OK.
FTP error 550 is caused by a permission issue on the server side.
This error means 'Requested action not taken. File unavailable (e.g., file not found, no access).', as you can find out here on Wikipedia
If you expect to have access to it, you should contact the sysadmin to rectify the file permission.

Categories

Resources