As I send list link to requests.get (). content Python? - python

I am sending an extracted list with xpath but it doesn't work with xpath
it works like this URL1
url = ['http://images/productos/on-line/items/large/nb/de/nbdell1tc73.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_1.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_2.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_3.jpg'
]
it does not work URL2
url = sel.xpath('//section[#class="product-images js-product-images-container"]//img/#src')
result url.path
'url': ['http://images/productos/on-line/items/large/nb/de/nbdell1tc73.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_1.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_2.jpg',
'http://images/productos/on-line/items/large/nb/de/nbdell1tc73_3.jpg'],
this code recepte the url
i = 0
for url22 in url:
try:
imagen_content = requests.get(url22).content
image_file = io.BytesIO(imagen_content)
imagen = Image.open(image_file).convert('RGB')
path = './imagenes/' + '_' + str(i) + '.jpg'
with open(path, 'wb')as f:
imagen.save(f, "JPEG", quality=85)
except Exception as ex:
print(ex)
print("ERROR")
i += 1
With the first url it works, with the second one it doesn't.
How do I make it work with url2?

it turned out, I need to put url.get () url22.get()
i = 0
for url22 in url:
try:
imagen_content = requests.get(url22.get()).content
image_file = io.BytesIO(imagen_content)
imagen = Image.open(image_file).convert('RGB')
path = './imagenes/' + '_' + str(i) + '.jpg'
with open(path, 'wb')as f:
imagen.save(f, "JPEG", quality=85)
except Exception as ex:
print(ex)
print("ERROR")
i += 1

Related

HTTP 403 when downloading images from a .CSV file with URL's What can I do?

def url_to_jpg(i, url, FILE_PATH):
try:
url_basename = url.split("/")[-1]
filename = '{}.jpg'.format(url_basename.rsplit( ".", 1 )[ 0 ])
full_path = '{}{}'.format(FILE_PATH, filename)
response = urllib.request.urlretrieve(url, full_path)
print('{} saved.'.format(full_path))
return None
except HTTPError as err:
print(err)
except:
e = sys.exc_info()[0]
print(e)
That's the central part of my code, without the elements and stuff, what can I do to be able to avoid that 403 error, maybe trying selenium?
def url_to_jpg(i, url, filepath):
url_basename = url.split("/")[-1]
imagename = '{}.jpg'.format(url_basename.rsplit( ".", 1 )[0])
fullpath = '{}{}'.format(filepath, imagename)
response = requests.get(url, fullpath)
file = open(imagename, "wb")
file.write(response.content)
file.close()
print('{} saved.'.format(fullpath))
return None
filename = 'libro.csv'
filepath = 'fotospython/'
urls = pd.read_csv(filename)
for i, url in enumerate(urls.values):
url_to_jpg(i, url[0], filepath)
that was my final requests using code, it works, but can't put the files in the folder, nowadays, worked

Finde all possibilities loop - function try

I wrote the code and now I have problem that the code finde only the first value (for example only img, we have img2 too, but it went to another picture) and copy only one, but we have 2 possibilities.
for i in df_list:
img = (filepath + i + ".jpg")
img2 = (filepath + i + "-1" + ".jpg")
img3 = (filepath + i + "-2" ".jpg")
img4 = (filepath + i + "-3" + ".jpg")
img5 = (filepath + i + "-4" + ".jpg")
img6 = (filepath + i + " -5" + ".jpg")
try:
shutil.copy(img, newpath, follow_symlinks=True)
except:
try:
shutil.copy(img6, newpath, follow_symlinks=True)
except:
try:
shutil.copy(img2, newpath, follow_symlinks=True)
except:
try:
shutil.copy(img3, newpath, follow_symlinks=True)
except:
try:
shutil.copy(img4, newpath, follow_symlinks=True)
except:
try:
shutil.copy(img5, newpath, follow_symlinks=True)
except:
with open("C:/Users/"+user+"/Desktop/J/"+datum+"/"+"Napake.txt", "a") as text_file:
print("Slika za ident {} ne obstaja.\n".format(i), file=text_file)
I neeed help, thank you for answers.
Rather than copying the try,except you can loop over the filenumbers and try copy each file. and print an error if there is an exception.
save_path = "C:/Users/" + user + "/Desktop/J/" + datum + "/" + "Napake.txt"
for folder in df_list:
for index in range(6):
if index == 0:
img = filepath + folder + ".jpg"
else:
img = f"{filepath}{folder}-{index}.jpg"
try:
shutil.copy(img6, newpath, follow_symlinks=True)
with open(save_path, "a") as text_file:
text_file.write(f"Slika za ident {folder}-{i} ne obstaja.\n")
except Exception as e:
print('could not copy file')
print(e)
I would also recommend having a look at this answer to see how to copy all files in a directory.

Downloading images from a server

i wanna download images from a server using python but for e.g the URL is "example.com/photo/1.jpg" i wanna run the script again and again but with different image name
here is my code
import wget
fileurl = 0
url = 'http://example.com/photo/' + str(fileurl) + '.jpg'
while fileurl != 500:
fileurl += 1
download = wget.download(url)
If you put the url line in the loop, it will change every iteration. In your current code it never changes.
import wget
fileurl = 0
while fileurl != 500:
url = 'http://example.com/photo/' + str(fileurl) + '.jpg'
fileurl += 1
try:
download = wget.download(url)
except Exception as e:
print(e)

how to download the image from google and rename the image with keywords at the same time in google-images-download

how to download the image from google and rename the image with keywords at the same time when using google-images-download? While using this package, the name is generated from the Image URL
what I except is that the image can be named with the Item name
Could anyone help me ?
here is code:
from google_images_download import google_images_download
response = google_images_download.googleimagesdownload()
arguments = {"keywords":"cat, dog, pig", "limit":1, "print_urls":True, "image_directory":'home1', "size":"large"}
absolute_image_paths = response.download(arguments)
you need to override the methods in google_images_download including: download_image, _get_all_items, download. _get_all_items and download need to post download_image
# Download Images
def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,search_term):
if print_urls:
print("Image URL: " + image_url)
try:
req = Request(image_url, headers={
"User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
try:
# timeout time to download an image
if socket_timeout:
timeout = float(socket_timeout)
else:
timeout = 10
response = urlopen(req, None, timeout)
data = response.read()
response.close()
# keep everything after the last '/'
# for keyword in keywords.split(','):
# image_name = str(keyword)
# print(image_name)
image_name = search_term + "." + "jpeg"
print(image_name,'XXXxXXXXx')
# image_name = str(image_url[(image_url.rfind('/')) + 1:])
# image_name = image_name.lower()
# if no extension then add it
# remove everything after the image name
# if image_format == "":
# image_name = image_name + "." + "jpg"
# elif image_format == "jpeg":
# image_name = image_name[:image_name.find(image_format) + 4]
# else:
# image_name = image_name[:image_name.find(image_format) + 3]
# prefix name in image
if prefix:
prefix = prefix + " "
else:
prefix = ''
# if no_numbering:
path = main_directory + "/" + dir_name + "/" + prefix + image_name
# else:
# path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name
print(path)
try:
output_file = open(path, 'wb')
output_file.write(data)
output_file.close()
absolute_path = os.path.abspath(path)
except OSError as e:
download_status = 'fail'
download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
#return image name back to calling method to use it for thumbnail downloads
download_status = 'success'
download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name
return_image_name = prefix + str(count) + ". " + image_name
# image size parameter
if print_size:
print("Image Size: " + str(self.file_size(path)))
except UnicodeEncodeError as e:
download_status = 'fail'
download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
except URLError as e:
download_status = 'fail'
download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
except HTTPError as e: # If there is any HTTPError
download_status = 'fail'
download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
except URLError as e:
download_status = 'fail'
download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
except ssl.CertificateError as e:
download_status = 'fail'
download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
except IOError as e: # If there is any IOError
download_status = 'fail'
download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
except IncompleteRead as e:
download_status = 'fail'
download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''
return download_status,download_message,return_image_name,absolute_path

unable to use PIL in django view function

I am trying to make a function that makes thumbnail of an image but I am unable to open the image file.
def create_thumbnail(request):
slug = 'yftguhjkn'
post = get_object_or_404(Post, slug=slug)
if post:
markdownify = import_string(MARKDOWNX_MARKDOWNIFY_FUNCTION)
content = BeautifulSoup(markdownify(post.content), "html5lib")
try:
img_link = content.findAll('img')[0].get('src')
print(img_link)
filename = img_link.split('/')[-1]
filename = filename.split('.')[0]
file_path = settings.MEDIA_URL + settings.DRACEDITOR_UPLOAD_PATH + post.author.username + '/' + filename + '.jpg'
print (file_path)
#im = Image.open(img_link)
im = Image.open(file_path)
print (im.size)
except:
img_link = 'http://howtorecordpodcasts.com/wp-content/uploads/2012/10/YouTube-Background-Pop-4.jpg'
return HttpResponse(img_link)
But it is going to except statement. Can someone please tell me how to fix it.
def create_thumbnail(request):
slug = 'yftguhjkn'
post = get_object_or_404(Post, slug=slug)
if post:
markdownify = import_string(MARKDOWNX_MARKDOWNIFY_FUNCTION)
content = BeautifulSoup(markdownify(post.content), "html5lib")
#try:
img_link = content.findAll('img')[0].get('src')
print(img_link)
filename = img_link.split('/')[-1]
filename = filename.split('.')[0]
file_path = settings.MEDIA_URL + settings.DRACEDITOR_UPLOAD_PATH + post.author.username + '/' + filename + '.jpg'
print (file_path)
im = Image.open(file_path)
print (im.size)
#except:
#img_link = 'http://howtorecordpodcasts.com/wp-content/uploads/2012/10/YouTube-Background-Pop-4.jpg'
return HttpResponse(file_path)
I think the Pillow need a correct path (full path) from the image to open.
im = Image.open(file_path)
the commond output of settings.MEDIA_URL is /media/ right?
so, you need to file_path is the full path, eg:
/home/myuser/myenv/myproject/media/images/uploads/2017/05/29/foobar.png
If your settings.MEDIA_ROOT is:
/home/myuser/myenv/myproject/media
You can doing with:
os.path.join(settings.MEDIA_ROOT, settings.DRACEDITOR_UPLOAD_PATH, image_name)
# example:
'/home/myuser/myenv/myproject/media' + 'images/uploads/2017/05/29/' + 'foobar.png'
you can also using BASE_DIR for absolute dir.

Categories

Resources