Download a remote image and save it to a Django model - python

I am writing a Django app which will fetch all images of particular URL and save them in the database.
But I am not getting on how to use ImageField in Django.
Settings.py
MEDIA_ROOT = os.path.join(PWD, "../downloads/")
# URL that handles the media served from MEDIA_ROOT. Make sure to use a
# trailing slash.
# Examples: "http://example.com/media/", "htp://media.example.com/"
MEDIA_URL = '/downloads/'
models.py
class images_data(models.Model):
image_id =models.IntegerField()
source_id = models.IntegerField()
image=models.ImageField(upload_to='images',null=True, blank=True)
text_ind=models.NullBooleanField()
prob=models.FloatField()
download_img.py
def spider(site):
PWD = os.path.dirname(os.path.realpath(__file__ ))
#site="http://en.wikipedia.org/wiki/Pune"
hdr= {'User-Agent': 'Mozilla/5.0'}
outfolder=os.path.join(PWD, "../downloads")
#outfolder="/home/mayank/Desktop/dreamport/downloads"
print "MAYANK:"+outfolder
req = urllib2.Request(site,headers=hdr)
page = urllib2.urlopen(req)
soup =bs(page)
tag_image=soup.findAll("img")
count=1;
for image in tag_image:
print "Image: %(src)s" % image
filename = image["src"].split("/")[-1]
outpath = os.path.join(outfolder, filename)
urlretrieve('http:'+image["src"], outpath)
im = img(image_id=count,source_id=1,image=outpath,text_ind=None,prob=0)
im.save()
count=count+1
I am calling download_imgs.py inside one view like
if form.is_valid():
url = form.cleaned_data['url']
spider(url)

Django Documentation is always good place to start
class ModelWithImage(models.Model):
image = models.ImageField(
upload_to='images',
)
UPDATED
So this script works.
Loop over images to download
Download image
Save to temp file
Apply to model
Save model
.
import requests
import tempfile
from django.core import files
# List of images to download
image_urls = [
'http://i.thegrindstone.com/wp-content/uploads/2013/01/how-to-get-awesome-back.jpg',
]
for image_url in image_urls:
# Stream the image from the url
response = requests.get(image_url, stream=True)
# Was the request OK?
if response.status_code != requests.codes.ok:
# Nope, error handling, skip file etc etc etc
continue
# Get the filename from the url, used for saving later
file_name = image_url.split('/')[-1]
# Create a temporary file
lf = tempfile.NamedTemporaryFile()
# Read the streamed image in sections
for block in response.iter_content(1024 * 8):
# If no more file then stop
if not block:
break
# Write image block to temporary file
lf.write(block)
# Create the model you want to save the image to
image = Image()
# Save the temporary image to the model#
# This saves the model so be sure that it is valid
image.image.save(file_name, files.File(lf))
Some reference links:
requests - "HTTP for Humans", I prefer this to urllib2
tempfile - Save temporay file and not to disk
Django filefield save

If you want to save downloaded images without saving them to disk first (without using NamedTemporaryFile etc) then there's an easy way to do that.
This will be slightly quicker than downloading the file and writing it to disk as it is all done in memory. Note that this example is written for Python 3 - the process is similar in Python 2 but slightly different.
from django.core import files
from io import BytesIO
import requests
url = "https://example.com/image.jpg"
resp = requests.get(url)
if resp.status_code != requests.codes.ok:
# Error handling here
fp = BytesIO()
fp.write(resp.content)
file_name = url.split("/")[-1] # There's probably a better way of doing this but this is just a quick example
your_model.image_field.save(file_name, files.File(fp))
Where your_model is an instance of the model you'd like to save to and .image_field is the name of the ImageField.
See the documentation for io for more info.

# this is my solution
from django.core import files
from django.core.files.base import ContentFile
import requests
from .models import MyModel
def download_img():
r = requests.get("remote_file_url", allow_redirects=True)
filename = "remote_file_url".split("/")[-1]
my_model = MyModel(
file=files.File(ContentFile(r.content), filename)
)
my_model.save()
return

As an example of what I think you're asking:
In forms.py:
imgfile = forms.ImageField(label = 'Choose your image', help_text = 'The image should be cool.')
In models.py:
imgfile = models.ImageField(upload_to='images/%m/%d')
So there will be a POST request from the user (when the user completes the form). That request will contain basically a dictionary of data. The dictionary holds the submitted files. To focus the request on the file from the field (in our case, an ImageField), you would use:
request.FILES['imgfield']
You would use that when you construct the model object (instantiating your model class):
newPic = ImageModel(imgfile = request.FILES['imgfile'])
To save that the simple way, you'd just use the save() method bestowed upon your object (because Django is that awesome):
if form.is_valid():
newPic = Pic(imgfile = request.FILES['imgfile'])
newPic.save()
Your image will be stored, by default, to the directory you indicate for MEDIA_ROOT in settings.py.
Accessing the image in the template:
<img src="{{ MEDIA_URL }}{{ image.imgfile.name }}"></img>
The urls can be tricky, but here's a basic example of a simple url pattern to call the stored images:
urlpatterns += patterns('',
url(r'^media/(?P<path>.*)$', 'django.views.static.serve', {
'document_root': settings.MEDIA_ROOT,
}),
)
I hope it helps.

Similar to #boltsfrombluesky's answer above you can do this in Python 3 without any external dependencies like so:
from os.path import basename
import urllib.request
from urllib.parse import urlparse
import tempfile
from django.core.files.base import File
def handle_upload_url_file(url, obj):
img_temp = tempfile.NamedTemporaryFile(delete=True)
req = urllib.request.Request(
url, data=None,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
}
)
with urllib.request.urlopen(req) as response:
img_temp.write(response.read())
img_temp.flush()
filename = basename(urlparse(url).path)
result = obj.image.save(filename, File(img_temp))
img_temp.close()
return result

Try doing it this way instead of assigning path to the image...
import urllib2
from django.core.files.temp import NamedTemporaryFile
def handle_upload_url_file(url):
img_temp = NamedTemporaryFile()
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1')]
img_temp.write(opener.open(url).read())
img_temp.flush()
return img_temp
use the above function like this..
new_image = images_data()
#rest of the data in new_image and then do this.
new_image.image.save(slug_filename,File(handle_upload_url_file(url)))
#here slug_filename is just filename that you want to save the file with.

In case you are saving image by overriding models' save method to modify the name of file and struggling with random invalid filename(like me) in django. You can follow up below code (Copied from Accepted answer):
lf = tempfile.NamedTemporaryFile()
for block in response.iter_content(1024*8):
if not block:
break
lf.write(block)
lf.name = name. # Set your custom file name here
dc = ImageFile(file=files.File(lf))
dc.file.save()
I have configured my storage with django-storages, in order to directly upload media content to s3. For some reasons I wasn't able to replace file name. After some R&D it worked.
Note: I have used FileField in the model, hence few line of code is not needed

def qrcodesave(request):
import urllib2;
url ="http://chart.apis.google.com/chart?cht=qr&chs=300x300&chl=s&chld=H|0";
opener = urllib2.urlopen(url);
mimetype = "application/octet-stream"
response = HttpResponse(opener.read(), mimetype=mimetype)
response["Content-Disposition"]= "attachment; filename=aktel.png"
return response

Related

Unable to save webp file to model field

I'm not sure if this is entirely django related, but if someone could help me, that would be so much appreciated! I'm having trouble generating a webp file from the following code
from io import BytesIO
from PIL import Image
import requests
I've got the following model
class UserImage(models.Model):
user_provided_image = VersatileImageField(upload_to=folder10, null=True, blank=True)
nextgen_image = models.FileField(upload_to=folder10,null=True, blank=True) #for WebP images
I'm creating a webp file. This code works, but it saved it to the file to the root directory of my project and I'm not sure how to save it to the FileField (i.e. nextgen_image ) on my model
def create_webp_image(sender, instance, *args, **kwargs):
image_url = instance.image.thumbnail['1920x1080'].url
try:
response = requests.get(image_url, stream=True)
path = image_url
except: #local env
path = "http://localhost:8000" + image_url
response = requests.get(path, stream=True)
img = Image.open(BytesIO(response.content))
#build file path
position = path.rfind("/") + 1
newpath = path[0:position]
#build file name
image_name = path[position:]
name_of_file = image_name.split('.')[0] + ".webp"
#this creates the webp file
img.save(name_of_file,"webp")
#save image to model
#instance.nextgen_image = ?
post_save.connect(create_webp_image, sender=UserImage)
Thanks!
You can use something like that:
from django.core.files.base import ContentFile
...
img_content = ContentFile(BytesIO(response.content))
instance.nextgen_image.save(name_of_file, img_content, save=True)
...
If you want to use packages to get the job done then use this package django-resized.
Based on the code provided above this should do the trick. Hoping that this will solve your issue. Cheers
nextgen_image = ResizedImageField(force_format="WEBP",
upload_to=folder10,null=True, blank=True)

Python requests - how to save a gif scraped with requests? [duplicate]

I'm trying to download and save an image from the web using python's requests module.
Here is the (working) code I used:
img = urllib2.urlopen(settings.STATICMAP_URL.format(**data))
with open(path, 'w') as f:
f.write(img.read())
Here is the new (non-working) code using requests:
r = requests.get(settings.STATICMAP_URL.format(**data))
if r.status_code == 200:
img = r.raw.read()
with open(path, 'w') as f:
f.write(img)
Can you help me on what attribute from the response to use from requests?
You can either use the response.raw file object, or iterate over the response.
To use the response.raw file-like object will not, by default, decode compressed responses (with GZIP or deflate). You can force it to decompress for you anyway by setting the decode_content attribute to True (requests sets it to False to control decoding itself). You can then use shutil.copyfileobj() to have Python stream the data to a file object:
import requests
import shutil
r = requests.get(settings.STATICMAP_URL.format(**data), stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
To iterate over the response use a loop; iterating like this ensures that data is decompressed by this stage:
r = requests.get(settings.STATICMAP_URL.format(**data), stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
for chunk in r:
f.write(chunk)
This'll read the data in 128 byte chunks; if you feel another chunk size works better, use the Response.iter_content() method with a custom chunk size:
r = requests.get(settings.STATICMAP_URL.format(**data), stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
for chunk in r.iter_content(1024):
f.write(chunk)
Note that you need to open the destination file in binary mode to ensure python doesn't try and translate newlines for you. We also set stream=True so that requests doesn't download the whole image into memory first.
Get a file-like object from the request and copy it to a file. This will also avoid reading the whole thing into memory at once.
import shutil
import requests
url = 'http://example.com/img.png'
response = requests.get(url, stream=True)
with open('img.png', 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
How about this, a quick solution.
import requests
url = "http://craphound.com/images/1006884_2adf8fc7.jpg"
response = requests.get(url)
if response.status_code == 200:
with open("/Users/apple/Desktop/sample.jpg", 'wb') as f:
f.write(response.content)
I have the same need for downloading images using requests. I first tried the answer of Martijn Pieters, and it works well. But when I did a profile on this simple function, I found that it uses so many function calls compared to urllib and urllib2.
I then tried the way recommended by the author of requests module:
import requests
from PIL import Image
# python2.x, use this instead
# from StringIO import StringIO
# for python3.x,
from io import StringIO
r = requests.get('https://example.com/image.jpg')
i = Image.open(StringIO(r.content))
This much more reduced the number of function calls, thus speeded up my application.
Here is the code of my profiler and the result.
#!/usr/bin/python
import requests
from StringIO import StringIO
from PIL import Image
import profile
def testRequest():
image_name = 'test1.jpg'
url = 'http://example.com/image.jpg'
r = requests.get(url, stream=True)
with open(image_name, 'wb') as f:
for chunk in r.iter_content():
f.write(chunk)
def testRequest2():
image_name = 'test2.jpg'
url = 'http://example.com/image.jpg'
r = requests.get(url)
i = Image.open(StringIO(r.content))
i.save(image_name)
if __name__ == '__main__':
profile.run('testUrllib()')
profile.run('testUrllib2()')
profile.run('testRequest()')
The result for testRequest:
343080 function calls (343068 primitive calls) in 2.580 seconds
And the result for testRequest2:
3129 function calls (3105 primitive calls) in 0.024 seconds
This might be easier than using requests. This is the only time I'll ever suggest not using requests to do HTTP stuff.
Two liner using urllib:
>>> import urllib
>>> urllib.request.urlretrieve("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
There is also a nice Python module named wget that is pretty easy to use. Found here.
This demonstrates the simplicity of the design:
>>> import wget
>>> url = 'http://www.futurecrew.com/skaven/song_files/mp3/razorback.mp3'
>>> filename = wget.download(url)
100% [................................................] 3841532 / 3841532>
>> filename
'razorback.mp3'
Enjoy.
Edit: You can also add an out parameter to specify a path.
>>> out_filepath = <output_filepath>
>>> filename = wget.download(url, out=out_filepath)
Following code snippet downloads a file.
The file is saved with its filename as in specified url.
import requests
url = "http://example.com/image.jpg"
filename = url.split("/")[-1]
r = requests.get(url, timeout=0.5)
if r.status_code == 200:
with open(filename, 'wb') as f:
f.write(r.content)
There are 2 main ways:
Using .content (simplest/official) (see Zhenyi Zhang's answer):
import io # Note: io.BytesIO is StringIO.StringIO on Python2.
import requests
r = requests.get('http://lorempixel.com/400/200')
r.raise_for_status()
with io.BytesIO(r.content) as f:
with Image.open(f) as img:
img.show()
Using .raw (see Martijn Pieters's answer):
import requests
r = requests.get('http://lorempixel.com/400/200', stream=True)
r.raise_for_status()
r.raw.decode_content = True # Required to decompress gzip/deflate compressed responses.
with PIL.Image.open(r.raw) as img:
img.show()
r.close() # Safety when stream=True ensure the connection is released.
Timing both shows no noticeable difference.
As easy as to import Image and requests
from PIL import Image
import requests
img = Image.open(requests.get(url, stream = True).raw)
img.save('img1.jpg')
This is how I did it
import requests
from PIL import Image
from io import BytesIO
url = 'your_url'
files = {'file': ("C:/Users/shadow/Downloads/black.jpeg", open('C:/Users/shadow/Downloads/black.jpeg', 'rb'),'image/jpg')}
response = requests.post(url, files=files)
img = Image.open(BytesIO(response.content))
img.show()
Here is a more user-friendly answer that still uses streaming.
Just define these functions and call getImage(). It will use the same file name as the url and write to the current directory by default, but both can be changed.
import requests
from StringIO import StringIO
from PIL import Image
def createFilename(url, name, folder):
dotSplit = url.split('.')
if name == None:
# use the same as the url
slashSplit = dotSplit[-2].split('/')
name = slashSplit[-1]
ext = dotSplit[-1]
file = '{}{}.{}'.format(folder, name, ext)
return file
def getImage(url, name=None, folder='./'):
file = createFilename(url, name, folder)
with open(file, 'wb') as f:
r = requests.get(url, stream=True)
for block in r.iter_content(1024):
if not block:
break
f.write(block)
def getImageFast(url, name=None, folder='./'):
file = createFilename(url, name, folder)
r = requests.get(url)
i = Image.open(StringIO(r.content))
i.save(file)
if __name__ == '__main__':
# Uses Less Memory
getImage('http://www.example.com/image.jpg')
# Faster
getImageFast('http://www.example.com/image.jpg')
The request guts of getImage() are based on the answer here and the guts of getImageFast() are based on the answer above.
I'm going to post an answer as I don't have enough rep to make a comment, but with wget as posted by Blairg23, you can also provide an out parameter for the path.
wget.download(url, out=path)
This is the first response that comes up for google searches on how to download a binary file with requests. In case you need to download an arbitrary file with requests, you can use:
import requests
url = 'https://s3.amazonaws.com/lab-data-collections/GoogleNews-vectors-negative300.bin.gz'
open('GoogleNews-vectors-negative300.bin.gz', 'wb').write(requests.get(url, allow_redirects=True).content)
my approach was to use response.content (blob) and save to the file in binary mode
img_blob = requests.get(url, timeout=5).content
with open(destination + '/' + title, 'wb') as img_file:
img_file.write(img_blob)
Check out my python project that downloads images from unsplash.com based on keywords.
You can do something like this:
import requests
import random
url = "https://images.pexels.com/photos/1308881/pexels-photo-1308881.jpeg? auto=compress&cs=tinysrgb&dpr=1&w=500"
name=random.randrange(1,1000)
filename=str(name)+".jpg"
response = requests.get(url)
if response.status_code.ok:
with open(filename,'w') as f:
f.write(response.content)
Agree with Blairg23 that using urllib.request.urlretrieve is one of the easiest solutions.
One note I want to point out here. Sometimes it won't download anything because the request was sent via script (bot), and if you want to parse images from Google images or other search engines, you need to pass user-agent to request headers first, and then download the image, otherwise, the request will be blocked and it will throw an error.
Pass user-agent and download image:
opener=urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582')]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(URL, 'image_name.jpg')
Code in the online IDE that scrapes and downloads images from Google images using requests, bs4, urllib.requests.
Alternatively, if your goal is to scrape images from search engines like Google, Bing, Yahoo!, DuckDuckGo (and other search engines), then you can use SerpApi. It's a paid API with a free plan.
The biggest difference is that there's no need to figure out how to bypass blocks from search engines or how to extract certain parts from the HTML or JavaScript since it's already done for the end-user.
Example code to integrate:
import os, urllib.request
from serpapi import GoogleSearch
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google",
"q": "pexels cat",
"tbm": "isch"
}
search = GoogleSearch(params)
results = search.get_dict()
print(json.dumps(results['images_results'], indent=2, ensure_ascii=False))
# download images
for index, image in enumerate(results['images_results']):
# print(f'Downloading {index} image...')
opener=urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582')]
urllib.request.install_opener(opener)
# saves original res image to the SerpApi_Images folder and add index to the end of file name
urllib.request.urlretrieve(image['original'], f'SerpApi_Images/original_size_img_{index}.jpg')
-----------
'''
]
# other images
{
"position": 100, # 100 image
"thumbnail": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQK62dIkDjNCvEgmGU6GGFZcpVWwX-p3FsYSg&usqp=CAU",
"source": "homewardboundnj.org",
"title": "pexels-helena-lopes-1931367 - Homeward Bound Pet Adoption Center",
"link": "https://homewardboundnj.org/upcoming-event/black-cat-appreciation-day/pexels-helena-lopes-1931367/",
"original": "https://homewardboundnj.org/wp-content/uploads/2020/07/pexels-helena-lopes-1931367.jpg",
"is_product": false
}
]
'''
Disclaimer, I work for SerpApi.
Here is a very simple code
import requests
response = requests.get("https://i.imgur.com/ExdKOOz.png") ## Making a variable to get image.
file = open("sample_image.png", "wb") ## Creates the file for image
file.write(response.content) ## Saves file content
file.close()
for download Image
import requests
Picture_request = requests.get(url)

Save binary data get from url into Django FileField

Is there any ways to save a binary data get from an external url (in my case, an excel file) into a Django FileField, with the file uploaded to the destination according to the django project settings?
class FileData(models.Model):
excel_file = models.FileField(upload_to='excel_file_path')
import requests
url = 'https://www.example.getfile.com/file_id=234'
r = requests.get(url)
# How to store the binary data response to FileField?
Thanks for help. Please also let me know if further information is needed in my case.
You can make use of django.core.files.uploadedfile.SimpleUploadedFile to save your content as a file field of your model instance.
>>> import requests
>>> from django.core.files.uploadedfile import SimpleUploadedFile
>>> response = requests.get("https://www.example.getfile.com/file_id=234")
>>> excel_file = SimpleUploadedFile("excel.xls", response.content, content_type="application/vnd.ms-excel")
>>> file_data = FileData(excel_file=excel_file)
>>> file_data.save()

Python PIL: IOError: cannot identify image file

I am trying to get the image from the following URL:
image_url = http://www.eatwell101.com/wp-content/uploads/2012/11/Potato-Pancakes-recipe.jpg?b14316
When I navigate to it in a browser, it sure looks like an image. But I get an error when I try:
import urllib, cStringIO, PIL
from PIL import Image
img_file = cStringIO.StringIO(urllib.urlopen(image_url).read())
image = Image.open(img_file)
IOError: cannot identify image file
I have copied hundreds of images this way, so I'm not sure what's special here. Can I get this image?
when I open the file using
In [3]: f = urllib.urlopen('http://www.eatwell101.com/wp-content/uploads/2012/11/Potato-Pancakes-recipe.jpg')
In [9]: f.code
Out[9]: 403
This is not returning an image.
You could try specifing a user-agent header to see if you can trick the server into thinking you are a browser.
Using requests library (because it is easier to send header information)
In [7]: f = requests.get('http://www.eatwell101.com/wp-content/uploads/2012/11/Potato-Pancakes-recipe.jpg', headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:16.0) Gecko/20100101 Firefox/16.0,gzip(gfe)'})
In [8]: f.status_code
Out[8]: 200
The problem exists not in the image.
>>> urllib.urlopen(image_url).read()
'\n<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n<html>\n <head>\n <title>403 You are banned from this site. Please contact via a different client configuration if you believe that this is a mistake.</title>\n </head>\n <body>\n <h1>Error 403 You are banned from this site. Please contact via a different client configuration if you believe that this is a mistake.</h1>\n <p>You are banned from this site. Please contact via a different client configuration if you believe that this is a mistake.</p>\n <h3>Guru Meditation:</h3>\n <p>XID: 1806024796</p>\n <hr>\n <p>Varnish cache server</p>\n </body>\n</html>\n'
Using user agent header will solve the problem.
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
response = opener.open(image_url)
img_file = cStringIO.StringIO(response.read())
image = Image.open(img_file)
To get some image , you can first save the image and then , load it to PIL . for example:
import urllib2,PIL
opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPCookieProcessor())
image_content = opener.open("http://www.eatwell101.com/wp-content/uploads/2012/11/Potato-Pancakes-recipe.jpg?b14316").read()
opener.close()
save_dir = r"/some/folder/to/save/image.jpg"
f = open(save_dir,'wb')
f.write(image_content)
f.close()
image = Image.open(save_dir)
...

Unable to open file object created in Django

I'm developing an application that runs on an Apache server with Django framework. My current script works fine when it runs on the local desktop (without Django). The script downloads all the images from a website to a folder on the desktop. However, when I run the script on the server a file object is just create by Django that apparently has something in it (should be google's logo), however, I can't open up the file. I also create an html file, updated image link locations, but the html file gets created fine, I'm assuming because it's all text, maybe? I believe I may have to use a file wrapper somewhere, but I'm not sure. Any help is appreciated, below is my code, Thanks!
from django.http import HttpResponse
from bs4 import BeautifulSoup as bsoup
import urlparse
from urllib2 import urlopen
from urllib import urlretrieve
import os
import sys
import zipfile
from django.core.servers.basehttp import FileWrapper
def getdata(request):
out = 'C:\Users\user\Desktop\images'
if request.GET.get('q'):
#url = str(request.GET['q'])
url = "http://google.com"
soup = bsoup(urlopen(url))
parsedURL = list(urlparse.urlparse(url))
for image in soup.findAll("img"):
print "Old Image Path: %(src)s" % image
#Get file name
filename = image["src"].split("/")[-1]
#Get full path name if url has to be parsed
parsedURL[2] = image["src"]
image["src"] = '%s\%s' % (out,filename)
print 'New Path: %s' % image["src"]
# print image
outpath = os.path.join(out, filename)
#retrieve images
if image["src"].lower().startswith("http"):
urlretrieve(image["src"], outpath)
else:
urlretrieve(urlparse.urlunparse(parsedURL), out) #Constructs URL from tuple (parsedURL)
#Create HTML File and writes to it to check output (stored in same directory).
html = soup.prettify("utf-8")
with open("output.html", "wb") as file:
file.write(html)
else:
url = 'You submitted nothing!'
return HttpResponse(url)
My problem had to do with storing the files on the desktop. I stored the files in the DJango workspace folder, changed the paths, and it worked for me.

Categories

Resources