How to read pdf file in Django - python

This is my view.. In the blank space after open I want to read a file but I dont to what to write to make it dynamic..
def home(request):
image_data = open('', 'rb').read()
return HttpResponse(image_data, mimetype='application/pdf')
This my model here file is uploaded .. Its is either pdf or .doc .. I want to read the file uploaded
from django.db import models
class CV(models.Model):
file = models.FileField(upload_to='media')

try this
def home(request):
file = request.FILES['yourHtmlFileFieldName']
image_date = open(file,'rb').read()

Related

Django REST framework - parse uploaded csv file

I have setup Django REST framework endpoint that allows me to upload a csv file.
The serializers.py looks like this:
from rest_framework import serializers
class UploadSerializer(serializers.Serializer):
file_uploaded = serializers.FileField()
class Meta:
fields = ['file_uploaded']
In my views.py file, I'm trying to read data from uploaded csv like this:
class UploadViewSet(viewsets.ViewSet):
serializer_class = UploadSerializer
def create(self, request):
file_uploaded = request.FILES.get('file_uploaded')
with open(file_uploaded, mode ='r')as file:
csvFile = csv.reader(file)
for lines in csvFile:
print(lines)
I'm getting the following error:
... line 37, in create
with open(file_uploaded, mode ='r') as file:
TypeError: expected str, bytes or os.PathLike object, not InMemoryUploadedFile
I have checked type() of file_uploaded and It is <class 'django.core.files.uploadedfile.InMemoryUploadedFile'>
How can I read this file into dictionary or dataframe so I can extract the data I need from it?
When you do request.FILES.get('file_uploaded') it returns back an InMemoryUploadedFile which is a wrapper around a file object. You can access the file object using the file attribute.
file_uploaded # <InMemoryUploadedFile: xxx (xxx/xxx)>
file_object = file_uploaded.file
This file_object can then be opened.
an InMemoryFileObject can be used pretty much in the same way as an open file., so you don't need to open it.
def create(self, request):
file_upload = request.FILES.get("file_uploaded")
csvFile = csv.reader(file_upload)
for line in csvFile:
print(line)
This has some good information on file handling in django.
https://docs.djangoproject.com/en/4.1/topics/http/file-uploads/#handling-uploaded-files-with-a-model

How to I get the path to a file with Django?

I am currently using Django's forms to create a FileField that allows the user to upload a file. However, after I select the file, it creates an object of type django.core.files.uploadedfile.InMemoryUploadedFile, which is a copy of the original file.
How would I go about to get the original path of this file so I can either modify or replace it?
# forms.py
class UploadFile(forms.Form):
file = forms.FileField(label="Select File: ", required=True)
# views.py
def get_path(response):
context = {}
if response.method == "POST":
form = UploadFile(response.FILES)
if form.is_valid():
file = form.cleaned_data["file"]
# Is there a way to get the original file path or should I just not use FileField?
print("The File Path Here")
temporary_file_path function
if form.is_valid():
file = form.cleaned_data["file"]
print(file.temporary_file_path())
Not sure why you'd want to try modifying an InMemoryUploadedFile file path. However, just keep in mind the term "InMemory".
The file has not been saved as yet, so there will be no path associated with the file before saving. To my knowledge (I stand corrected if I'm wrong here), InMemoryUploadedFile do not have a path attribute either, but content/file, a filename, etc.; in which the filename can be modified. You can see the doc here.
You could make modifications to the filename though if you wish by doing the following:
if form.is_valid():
form.cleaned_data["file"].name = "modify.txt" # or to whatever filename and extension you want.
form.save() # If you have a save method defined
However, you could get a little creative in saving the file to a specific path within the media folder though. For example:
# forms.py
import os
from django.conf import settings
class UploadFile(forms.Form):
file = forms.FileField(label="Select File: ", required=True)
# The save method that will save the file to a specific location in the media folder.
def save(self):
filename = file.name
folder = 'uploaded/'
# Create the folder if it doesn't exist.
try:
os.mkdir(os.path.join(settings.MEDIA_ROOT, folder))
except:
pass
# Save the uploaded file inside that folder.
file_path = os.path.join(settings.MEDIA_ROOT, folder, filename)
with open(file_path, 'wb+') as destination:
for chunk in file.chunks():
destination.write(chunk)
# view.py
def get_path(response):
if response.method == "POST":
form = UploadFile(response.FILES)
if form.is_valid():
file = form.cleaned_data["file"]
# file.name = "modify.txt" # Can modify here if necessary...
form.save()

how to convert format of my .pdf file to .txt in Django

I am working on a Django web-application, where I want to convert .pdf and .docx files to .txt .
I wrote a save() function where I want to get the file and then convert it into .txt format . But it is giving me the error that :
FileNotFoundError at /post/new/
[Errno 2] No such file or directory: 'Python String.pdf'
my models.py is :
from django.db import models
from django.contrib.auth.models import User
from django.urls import reverse
import PyPDF2
import os
# Create your models here.
class FileUpload(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE, blank=True , null=True)
file = models.FileField(upload_to='files')
def save(self, *args, **kwargs):
if self.file:
pdffileobj = open(str(self.file.name), 'rb') # this is where i want to get the file but it give me the error
pdfreader = PyPDF2.PdfFileReader(pdffileobj)
x = pdfreader.numPages
pageobj = pdfreader.getPage(x + 1)
text = pageobj.extractText()
file1 = open(self.file.name.txt, "a")
file1.writelines(text)
super(FileUpload, self).save(*args, **kwargs)
def get_absolute_url(self):
return reverse('home')
The conclusion is I want to convert my .pdf to .txt before saving it .
Thank you
i have not worked with django but this code might help:
use os for file path
os.path.join(os.path.dirname(os.path.dirname(__file__)),'filename')
source : https://stackoverflow.com/a/24971898/13319197

Store an uploaded file temporarilly when there are form errors

I have a form with a FileField and other fields:
from flask_wtf.file import FileField
class FooForm(Form):
file = FileField()
#...
How I can "persist" file uploads through multiple request? For example, I attach a file to file upload field and write some text to another field. After I click submit, the text doesn't validate, so I display the form again to review the data, but the file is gone now. How can I improve the form so that the user doesn't have to upload the file again?
The simplest way is to store the file on first load and then reference it by ID after that:
class FooForm(Form):
file = FileField()
file_id = HiddenField()
Then in your handler:
if form.file_id.data:
# remove any validators from form.file
form.file.validators = []
if form.validate():
possible_file_stream = request.files.get(form.file.name)
file_data = get_file(possible_file_stream, form.file_id.data)
# Do things with file data
return redirect('/somewhere/else')
elif form.file.data:
file_data = request.files[form.file.name]
form.file_id.data = save_file(file_data)
# Optionally, if the form should not display the file input
# once one has been uploaded
# del form.file
return render_template("form.html", form=form)

Programmatically saving image to Django ImageField

Ok, I've tried about near everything and I cannot get this to work.
I have a Django model with an ImageField on it
I have code that downloads an image via HTTP (tested and works)
The image is saved directly into the 'upload_to' folder (the upload_to being the one that is set on the ImageField)
All I need to do is associate the already existing image file path with the ImageField
I've written this code about 6 different ways.
The problem I'm running into is all of the code that I'm writing results in the following behavior:
(1) Django will make a 2nd file, (2) rename the new file, adding an _ to the end of the file name, then (3) not transfer any of the data over leaving it basically an empty re-named file. What's left in the 'upload_to' path is 2 files, one that is the actual image, and one that is the name of the image,but is empty, and of course the ImageField path is set to the empty file that Django try to create.
In case that was unclear, I'll try to illustrate:
## Image generation code runs....
/Upload
generated_image.jpg 4kb
## Attempt to set the ImageField path...
/Upload
generated_image.jpg 4kb
generated_image_.jpg 0kb
ImageField.Path = /Upload/generated_image_.jpg
How can I do this without having Django try to re-store the file? What I'd really like is something to this effect...
model.ImageField.path = generated_image_path
...but of course that doesn't work.
And yes I've gone through the other questions here like this one as well as the django doc on File
UPDATE
After further testing, it only does this behavior when running under Apache on Windows Server. While running under the 'runserver' on XP it does not execute this behavior.
I am stumped.
Here is the code which runs successfully on XP...
f = open(thumb_path, 'r')
model.thumbnail = File(f)
model.save()
I have some code that fetches an image off the web and stores it in a model. The important bits are:
from django.core.files import File # you need this somewhere
import urllib
# The following actually resides in a method of my model
result = urllib.urlretrieve(image_url) # image_url is a URL to an image
# self.photo is the ImageField
self.photo.save(
os.path.basename(self.url),
File(open(result[0], 'rb'))
)
self.save()
That's a bit confusing because it's pulled out of my model and a bit out of context, but the important parts are:
The image pulled from the web is not stored in the upload_to folder, it is instead stored as a tempfile by urllib.urlretrieve() and later discarded.
The ImageField.save() method takes a filename (the os.path.basename bit) and a django.core.files.File object.
Let me know if you have questions or need clarification.
Edit: for the sake of clarity, here is the model (minus any required import statements):
class CachedImage(models.Model):
url = models.CharField(max_length=255, unique=True)
photo = models.ImageField(upload_to=photo_path, blank=True)
def cache(self):
"""Store image locally if we have a URL"""
if self.url and not self.photo:
result = urllib.urlretrieve(self.url)
self.photo.save(
os.path.basename(self.url),
File(open(result[0], 'rb'))
)
self.save()
Super easy if model hasn't been created yet:
First, copy your image file to the upload path (assumed = 'path/' in following snippet).
Second, use something like:
class Layout(models.Model):
image = models.ImageField('img', upload_to='path/')
layout = Layout()
layout.image = "path/image.png"
layout.save()
tested and working in django 1.4, it might work also for an existing model.
Just a little remark. tvon answer works but, if you're working on windows, you probably want to open() the file with 'rb'. Like this:
class CachedImage(models.Model):
url = models.CharField(max_length=255, unique=True)
photo = models.ImageField(upload_to=photo_path, blank=True)
def cache(self):
"""Store image locally if we have a URL"""
if self.url and not self.photo:
result = urllib.urlretrieve(self.url)
self.photo.save(
os.path.basename(self.url),
File(open(result[0], 'rb'))
)
self.save()
or you'll get your file truncated at the first 0x1A byte.
Ok, If all you need to do is associate the already existing image file path with the ImageField, then this solution may be helpfull:
from django.core.files.base import ContentFile
with open('/path/to/already/existing/file') as f:
data = f.read()
# obj.image is the ImageField
obj.image.save('imgfilename.jpg', ContentFile(data))
Well, if be earnest, the already existing image file will not be associated with the ImageField, but the copy of this file will be created in upload_to dir as 'imgfilename.jpg' and will be associated with the ImageField.
Here is a method that works well and allows you to convert the file to a certain format as well (to avoid "cannot write mode P as JPEG" error):
import urllib2
from django.core.files.base import ContentFile
from PIL import Image
from StringIO import StringIO
def download_image(name, image, url):
input_file = StringIO(urllib2.urlopen(url).read())
output_file = StringIO()
img = Image.open(input_file)
if img.mode != "RGB":
img = img.convert("RGB")
img.save(output_file, "JPEG")
image.save(name+".jpg", ContentFile(output_file.getvalue()), save=False)
where image is the django ImageField or your_model_instance.image
here is a usage example:
p = ProfilePhoto(user=user)
download_image(str(user.id), p.image, image_url)
p.save()
Hope this helps
What I did was to create my own storage that will just not save the file to the disk:
from django.core.files.storage import FileSystemStorage
class CustomStorage(FileSystemStorage):
def _open(self, name, mode='rb'):
return File(open(self.path(name), mode))
def _save(self, name, content):
# here, you should implement how the file is to be saved
# like on other machines or something, and return the name of the file.
# In our case, we just return the name, and disable any kind of save
return name
def get_available_name(self, name):
return name
Then, in my models, for my ImageField, I've used the new custom storage:
from custom_storage import CustomStorage
custom_store = CustomStorage()
class Image(models.Model):
thumb = models.ImageField(storage=custom_store, upload_to='/some/path')
A lot of these answers were outdated, and I spent many hours in frustration (I'm fairly new to Django & web dev in general). However, I found this excellent gist by #iambibhas: https://gist.github.com/iambibhas/5051911
import requests
from django.core.files import File
from django.core.files.temp import NamedTemporaryFile
def save_image_from_url(model, url):
r = requests.get(url)
img_temp = NamedTemporaryFile(delete=True)
img_temp.write(r.content)
img_temp.flush()
model.image.save("image.jpg", File(img_temp), save=True)
Another possible way to do that:
from django.core.files import File
with open('path_to_file', 'r') as f: # use 'rb' mode for python3
data = File(f)
model.image.save('filename', data, True)
If you want to just "set" the actual filename, without incurring the overhead of loading and re-saving the file (!!), or resorting to using a charfield (!!!), you might want to try something like this --
model_instance.myfile = model_instance.myfile.field.attr_class(model_instance, model_instance.myfile.field, 'my-filename.jpg')
This will light up your model_instance.myfile.url and all the rest of them just as if you'd actually uploaded the file.
Like #t-stone says, what we really want, is to be able to set instance.myfile.path = 'my-filename.jpg', but Django doesn't currently support that.
This is might not be the answer you are looking for. but you can use charfield to store the path of the file instead of ImageFile. In that way you can programmatically associate uploaded image to field without recreating the file.
With Django 3,
with a model such as this one:
class Item(models.Model):
name = models.CharField(max_length=255, unique=True)
photo= models.ImageField(upload_to='image_folder/', blank=True)
if the image has already been uploaded, we can directly do :
Item.objects.filter(...).update(photo='image_folder/sample_photo.png')
or
my_item = Item.objects.get(id=5)
my_item.photo='image_folder/sample_photo.png'
my_item.save()
You can try:
model.ImageField.path = os.path.join('/Upload', generated_image_path)
class tweet_photos(models.Model):
upload_path='absolute path'
image=models.ImageField(upload_to=upload_path)
image_url = models.URLField(null=True, blank=True)
def save(self, *args, **kwargs):
if self.image_url:
import urllib, os
from urlparse import urlparse
file_save_dir = self.upload_path
filename = urlparse(self.image_url).path.split('/')[-1]
urllib.urlretrieve(self.image_url, os.path.join(file_save_dir, filename))
self.image = os.path.join(file_save_dir, filename)
self.image_url = ''
super(tweet_photos, self).save()
class Pin(models.Model):
"""Pin Class"""
image_link = models.CharField(max_length=255, null=True, blank=True)
image = models.ImageField(upload_to='images/', blank=True)
title = models.CharField(max_length=255, null=True, blank=True)
source_name = models.CharField(max_length=255, null=True, blank=True)
source_link = models.CharField(max_length=255, null=True, blank=True)
description = models.TextField(null=True, blank=True)
tags = models.ForeignKey(Tag, blank=True, null=True)
def __unicode__(self):
"""Unicode class."""
return unicode(self.image_link)
def save(self, *args, **kwargs):
"""Store image locally if we have a URL"""
if self.image_link and not self.image:
result = urllib.urlretrieve(self.image_link)
self.image.save(os.path.basename(self.image_link), File(open(result[0], 'r')))
self.save()
super(Pin, self).save()
Working!
You can save image by using FileSystemStorage.
check the example below
def upload_pic(request):
if request.method == 'POST' and request.FILES['photo']:
photo = request.FILES['photo']
name = request.FILES['photo'].name
fs = FileSystemStorage()
##### you can update file saving location too by adding line below #####
fs.base_location = fs.base_location+'/company_coverphotos'
##################
filename = fs.save(name, photo)
uploaded_file_url = fs.url(filename)+'/company_coverphotos'
Profile.objects.filter(user=request.user).update(photo=photo)
class DemoImage(models.Model):
title = models.TextField(max_length=255, blank=False)
image = models.ImageField(blank=False, upload_to="images/DemoImages/")
import requests
import urllib.request
from django.core.files import File
url = "https://path/to/logo.jpg"
# Below 3 lines is to fake as browser agent
# as many sites block urllib class suspecting to be bots
opener = urllib.request.build_opener()
opener.addheaders = [("User-agent", "Mozilla/5.0")]
urllib.request.install_opener(opener)
# Issue command to actually download and create temp img file in memory
result = urllib.request.urlretrieve(url)
# DemoImage.objects.create(title="title", image=File(open(result[0], "rb")))
# ^^ This erroneously results in creating the file like
# images/DemoImages/path/to/temp/dir/logo_image_file
# as opposed to
# images/DemoImages/logo_image_file
# Solution to get the file in images/DemoImages/
reopen = open(result[0], "rb") # Returns a BufferedReader object of the temp image
django_file = File(reopen) # Create the file from the BufferedReader object
demoimg = DemoImage()
demoimg.title = "title"
demoimg.image.save("logo.png", django_file, save=True)
This approach also triggers file upload to cloudinary/S3 if so configured
So, if you have a model with an imagefield with an upload_to attribute set, such as:
class Avatar(models.Model):
image_file = models.ImageField(upload_to=user_directory_path_avatar)
then it is reasonably easy to change the image, at least in django 3.15.
In the view, when you process the image, you can obtain the image from:
self.request.FILES['avatar']
which is an instance of type InMemoryUploadedFile, as long as your html form has the enctype set and a field for avatar...
<form method="post" class="avatarform" id="avatarform" action="{% url avatar_update_view' %}" enctype="multipart/form-data">
{% csrf_token %}
<input id="avatarUpload" class="d-none" type="file" name="avatar">
</form>
Then, setting the new image in the view is as easy as the following (where profile is the profile model for the self.request.user)
profile.avatar.image_file.save(self.request.FILES['avatar'].name, self.request.FILES['avatar'])
There is no need to save the profile.avatar, the image_field already saves, and into the correct location because of the 'upload_to' callback function.
Your can use Django REST framework and python Requests library to Programmatically saving image to Django ImageField
Here is a Example:
import requests
def upload_image():
# PATH TO DJANGO REST API
url = "http://127.0.0.1:8080/api/gallery/"
# MODEL FIELDS DATA
data = {'first_name': "Rajiv", 'last_name': "Sharma"}
# UPLOAD FILES THROUGH REST API
photo = open('/path/to/photo', 'rb')
resume = open('/path/to/resume', 'rb')
files = {'photo': photo, 'resume': resume}
request = requests.post(url, data=data, files=files)
print(request.status_code, request.reason)
I save the image with uuid in django 2 python 3 because thats how django do it:
import uuid
from django.core.files import File
import urllib
httpUrl = "https://miimgeurl/image.jpg"
result = urllib.request.urlretrieve(httpUrl)
mymodel.imagefield.save(os.path.basename(str(uuid.uuid4())+".jpg"),File(open(result[0], 'rb')))
mymodel.save()
if you use admin.py you can solve the problem override (doc on django):
def save_model(self, request, obj, form, change):
obj.image_data = bytes(obj.image_name.read())
super().save_model(request, obj, form, change)
with models.py:
image_name = models.ImageField()
image_data = models.BinaryField()

Categories

Resources