Save file received from POST request in Python - python

I'm trying to implement an upload feature to the basic http.server Python module.
So far, I've created a new class named SimpleHTTPRequestHandlerWithUpload which inherits from SimpleHTTPRequestHandler and added an upload section to list_directory(). The next step would be creating a do_POST() method, which handles the request and saves the file inside the current working directory. However, I have no idea how to do this. I looked at UniIsland's code on GitHub but I can't understand what he did and the code is very old. I also read this question and tried to implement it in my code.
It kind of works, but the file is "littered" with headers. This does not pose a big problem on txt files, but it corrupts all of the other file extensions.
I'd like to know how to remove the headers, save the uploaded file inside the current working directory with its original name and check if the upload was successful or not.
This is my code:
__version__ = '0.1'
import http.server
import html
import io
import os
import socket # For gethostbyaddr()
import sys
import urllib.parse
import contextlib
from http import HTTPStatus
class SimpleHTTPRequestHandlerWithUpload(http.server.SimpleHTTPRequestHandler):
server_version = 'SimpleHTTPWithUpload/' + __version__
def do_POST(self):
"""Serve a POST request."""
data = self.rfile.read(int(self.headers['content-length']))
with open('file.txt', 'wb') as file:
file.write(data)
r = []
enc = sys.getfilesystemencoding()
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Upload Result Page</title>\n")
r.append("<body>\n<h2>Upload Result Page</h2>\n")
r.append("</body>\n</html>")
encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO()
f.write(encoded)
f.seek(0)
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "text/html")
self.send_header("Content-Length", str(len(encoded)))
self.end_headers()
if f:
self.copyfile(f, self.wfile)
f.close()
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except OSError:
self.send_error(
HTTPStatus.NOT_FOUND,
'No permission to list directory')
return None
list.sort(key=lambda a: a.lower())
r = []
try:
displaypath = urllib.parse.unquote(self.path,
errors='surrogatepass')
except UnicodeDecodeError:
displaypath = urllib.parse.unquote(path)
displaypath = html.escape(displaypath, quote=False)
enc = sys.getfilesystemencoding()
title = 'Directory listing for %s' % displaypath
r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
'"http://www.w3.org/TR/html4/strict.dtd">')
r.append('<html>\n<head>')
r.append('<meta http-equiv="Content-Type" '
'content="text/html; charset=%s">' % enc)
r.append('<title>%s</title>\n</head>' % title)
r.append('<body>\n<h1>%s</h1>' % title)
r.append('<hr>\n<ul>')
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or # for symbolic links
if os.path.isdir(fullname):
displayname = name + '/'
linkname = name + '/'
if os.path.islink(fullname):
displayname = name + '#'
# Note: a link to a directory displays with # and links with /
r.append('<li>%s</li>' % (urllib.parse.quote(linkname, errors='surrogatepass'),
html.escape(displayname, quote=False)))
r.append('</ul>\n<hr>\n')
r.append('<form id="upload" enctype="multipart/form-data" method="post" action="#">\n'
'<input id="fileupload" name="file" type="file" />\n'
'<input type="submit" value="Submit" id="submit" />\n'
'</form>')
r.append('\n<hr>\n</body>\n</html>\n')
encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO()
f.write(encoded)
f.seek(0)
self.send_response(HTTPStatus.OK)
self.send_header('Content-type', 'text/html; charset=%s' % enc)
self.send_header('Content-Length', str(len(encoded)))
self.end_headers()
return f
if __name__ == '__main__':
class DualStackServer(http.server.ThreadingHTTPServer):
def server_bind(self):
# suppress exception when protocol is IPv4
with contextlib.suppress(Exception):
self.socket.setsockopt(
socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
return super().server_bind()
http.server.test(
HandlerClass=SimpleHTTPRequestHandlerWithUpload,
ServerClass=DualStackServer
)
If you want to test it, just run the script on your machine, open a web browser on a different machine and type in the address bar <IP_ADDRESS_1>:8000 where IP_ADDRESS_1 is the IP of the machine you're running the code on.
Please, tell me if there's something wrong with it other than the do_POST() method. I'm a new Python programmer and I'm trying to improve my software design skills in general. Thank you!
EDIT: I figured out how to remove the headers and save the file with its original name. However, the script hangs on data = self.rfile.readlines() until I close the browser tab and then works well. I don't know what to do. It seems I have to send some sort of EOF to notify readlines() that I'm finished sending the file but I have no clue how to do it. I also can't figure out how to check if the file has been uploaded successfully or not. Any help is appreciated!
Updated do_POST() method:
def do_POST(self):
"""Serve a POST request."""
data = self.rfile.readlines()
filename = re.findall(r'Content-Disposition.*name="file"; filename="(.*)"', str(data[1]))
if len(filename) == 1:
filename = ''.join(filename)
else:
return
data = data[4:-2]
data = b''.join(data)
with open(filename, 'wb') as file:
file.write(data)
r = []
enc = sys.getfilesystemencoding()
r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
'"http://www.w3.org/TR/html4/strict.dtd">')
r.append('<html>\n<title>Upload result page</title>\n')
r.append('<body>\n<h2>Upload result page</h2>\n')
r.append('</body>\n</html>')
encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO()
f.write(encoded)
f.seek(0)
self.send_response(HTTPStatus.OK)
self.send_header('Content-type', 'text/html')
self.send_header('Content-Length', str(len(encoded)))
self.end_headers()
if f:
self.copyfile(f, self.wfile)
f.close()

I managed to solve all of my problems. I posted my code on GitHub, for anyone interested.

Related

Python - create txt file on the fly and send it via FTP?

So i am currently creating a text file from a jinja2 template on the fly and having it be downloaded by the users browser, however i want to add an option to send it somewhere via FTP (all the FTP details are predefined and wont change)
how do i create the file to be sent?
Thanks
code:
...
device_config.stream(
STR = hostname,
IP = subnet,
BGPASNO = bgp_as,
LOIP = lo1,
DSLUSER = dsl_user,
DSLPASS = dsl_pass,
Date = install_date,
).dump(config_file)
content = config_file.getvalue()
content_type = 'text/plain'
content_disposition = 'attachment; filename=%s' % (file_name)
response = None
if type == 'FILE':
response = HttpResponse(content, content_type=content_type)
response['Content-Disposition'] = content_disposition
elif type == 'FTP':
with tempfile.NamedTemporaryFile() as temp:
temp.write(content)
temp.seek(0)
filename = temp.name
session = ftplib.FTP('192.168.1.1','test','password')
session.storbinary('STOR {0}'.format(file_name), temp)
session.quit()
temp.flush()
return response
EDIT
needed to add temp.seek(0) before sending the file
You can use the tempfile module to create a named temporary file.
import tempfile
with tempfile.NamedTemporaryFile() as temp:
temp.write(content)
temp.flush()
filename = temp.name
session.storbinary('STOR {0}'.format(file_name), temp)
Here is a working example using BytesIO under io module. Code is tested and works.
import ftplib
import io
session = ftplib.FTP('192.168.1.1','USERNAME','PASSWORD')
# session.set_debuglevel(2)
buf=io.BytesIO()
# b'str' to content of buff.write() as it throws an error in python3.7
buf.write(b"test string")
buf.seek(0)
session.storbinary("STOR testfile.txt",buf)
session.quit()

Using Python webpage(with necessary files) offline

Could you help me with this below script, please?
I am trying to store webpage with it's files offline to open them without internet connection using Python. I can copy the source codes of the target page into a file and save it as “example.html” in local PC. but my code is not saving files whenever i trying to open saved html file using python browser it's loading necessary files from the internet and it takes too much time to load (very slow).
This the a part of the original codes:
import urllib2, codecs
def download(site):
response = urllib2.urlopen(site)
html_input = response.read()
return html_input
def derealitivise(site, html_input):
active_html = html_input.replace('<head>', '<head> <base href='+site+'>')
return active_html
def main(site, output_filename):
#try:
site_url = site.encode("utf-8")
html_input = download(site_url)
#active_html = derealitivise(site_url, active_html)
header = "<head> <base href="+site_url+">"
active_html = html_input.replace('<head>', header)
#output_file = open (output_filename, "w")
#output_file = codecs.open(output_filename, "wb", "utf-8-sig")
#output_file.write(active_html.encode("utf-8"))
#output_file.close()
with open(output_filename, 'w') as fid:
fid.write(active_html)
return "OK"

Line breaks not occuring in for statement iterations

The follow takes multiple file inputs from a form and writes them in a serial method. While it does print when each file is successful to the page, line breaks are not occurring between each loop? What would be the best method to fix this? I thought print statements would add line breaks by default?
#!/usr/bin/python
import cgi, os
import shutil
import cgitb; cgitb.enable() # for troubleshooting
form = cgi.FieldStorage()
print """\
Content-Type: text/html\n
<html><body>
"""
if 'file' in form:
filefield = form['file']
if not isinstance(filefield, list):
filefield = [filefield]
for fileitem in filefield:
if fileitem.filename:
fn = os.path.basename(fileitem.filename)
# save file
with open('/var/www/rsreese.com/files/' + fn, 'wb') as f:
shutil.copyfileobj(fileitem.file, f)
# line breaks are not occuring between interations
print 'File "' + fn + '" was uploaded successfully \n'
message = 'All files uploaded'
else:
message = 'No file was uploaded'
print """\
<p>%s</p>
</body></html>
""" % (message)
Python will print newlines just fine, but your browser won't show these.
Use <br/> tags instead, or wrap the whole output in <pre>/</pre> tags.

Download files from server in web2py

I am currently setting up a website where I get a file uploaded from the user , do some processing on it and provide a link for the user to download the processed file from. I presently want to provide a path to the file on my local system, I am new to web2py, and am having trouble doing this.
Could someone please help me do this?
Regards
see this link for some hint: webpy: how to stream files , and may be add some code like this:
BUF_SIZE = 262144
class download:
def GET(self):
file_name = # get from url
file_path = os.path.join('/path to your file', file_name)
f = None
try:
f = open(file_path, "rb")
webpy.header('Content-Type','application/octet-stream')
webpy.header('Content-disposition', 'attachment; filename=%s' % file_name)
while True:
c = f.read(BUF_SIZE)
if c:
yield c
else:
break
except Exception, e:
# throw 403 or 500 or just leave it
pass
finally:
if f:
f.close()

Download whole directories in Python SimpleHTTPServer

I really like how I can easily share files on a network using the SimpleHTTPServer, but I wish there was an option like "download entire directory". Is there an easy (one liner) way to implement this?
Thanks
I did that modification for you, I don't know if there'are better ways to do that but:
Just save the file (Ex.: ThreadedHTTPServer.py) and access as:
$ python -m /path/to/ThreadedHTTPServer PORT
BPaste Raw Version
The modification also works in threaded way so you won't have problem with download and navigation in the same time, the code aren't organized but:
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from SocketServer import ThreadingMixIn
import threading
import SimpleHTTPServer
import sys, os, zipfile
PORT = int(sys.argv[1])
def send_head(self):
"""Common code for GET and HEAD commands.
This sends the response code and MIME headers.
Return value is either a file object (which has to be copied
to the outputfile by the caller unless the command was HEAD,
and must be closed by the caller under all circumstances), or
None, in which case the caller has nothing further to do.
"""
path = self.translate_path(self.path)
f = None
if self.path.endswith('?download'):
tmp_file = "tmp.zip"
self.path = self.path.replace("?download","")
zip = zipfile.ZipFile(tmp_file, 'w')
for root, dirs, files in os.walk(path):
for file in files:
if os.path.join(root, file) != os.path.join(root, tmp_file):
zip.write(os.path.join(root, file))
zip.close()
path = self.translate_path(tmp_file)
elif os.path.isdir(path):
if not self.path.endswith('/'):
# redirect browser - doing basically what apache does
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
else:
for index in "index.html", "index.htm":
index = os.path.join(path, index)
if os.path.exists(index):
path = index
break
else:
return self.list_directory(path)
ctype = self.guess_type(path)
try:
# Always read in binary mode. Opening files in text mode may cause
# newline translations, making the actual size of the content
# transmitted *less* than the content-length!
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
self.send_response(200)
self.send_header("Content-type", ctype)
fs = os.fstat(f.fileno())
self.send_header("Content-Length", str(fs[6]))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import cgi, urllib
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(urllib.unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
f.write("<a href='%s'>%s</a>\n" % (self.path+"?download",'Download Directory Tree as Zip'))
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or # for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "#"
# Note: a link to a directory displays with # and links with /
f.write('<li>%s\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n</body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
Handler.send_head = send_head
Handler.list_directory = list_directory
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
"""Handle requests in a separate thread."""
if __name__ == '__main__':
server = ThreadedHTTPServer(('0.0.0.0', PORT), Handler)
print 'Starting server, use <Ctrl-C> to stop'
server.serve_forever()
Look at the sources, e.g. online here. Right now, if you call the server with a URL that's a directory, its index.html file is served, or, missing that, the list_directory method is called. Presumably, you want instead to make a zip file with the directory's contents (recursively, I imagine), and serve that? Obviously there's no way to do it with a one-line change, since you want to replace what are now lines 68-80 (in method send_head) plus the whole of method list_directory, lines 98-137 -- that's already at least a change to over 50 lines;-).
If you're OK with a change of several dozen lines, not one, and the semantics I've described are what you want, you could of course build the required zipfile as a cStringIO.StringIO object with the ZipFile class, and populate it with an os.walk on the directory in question (assuming you want, recursively, to get all subdirectories as well). But it's most definitely not going to be a one-liner;-).
There is no one liner which would do it, also what do you mean by "download whole dir" as tar or zip?
Anyway you can follow these steps
Derive a class from SimpleHTTPRequestHandler or may be just copy its code
Change list_directory method to return a link to "download whole folder"
Change copyfile method so that for your links you zip whole dir and return it
You may cache zip so that you do not zip folder every time, instead see if any file is modified or not
Would be a fun exercise to do :)
There is no simple way.
An alternative is to use the python script below to download the whole folder recursively. This works well for Python 3. Change the URL as needed.
import os
from pathlib import Path
from urllib.parse import urlparse, urljoin
import requests
from bs4 import BeautifulSoup
def get_links(content):
soup = BeautifulSoup(content)
for a in soup.findAll('a'):
yield a.get('href')
def download(url):
path = urlparse(url).path.lstrip('/')
print(path)
r = requests.get(url)
if r.status_code != 200:
raise Exception('status code is {} for {}'.format(r.status_code, url))
content = r.text
if path.endswith('/'):
Path(path.rstrip('/')).mkdir(parents=True, exist_ok=True)
for link in get_links(content):
if not link.startswith('.'): # skip hidden files such as .DS_Store
download(urljoin(url, link))
else:
with open(path, 'w') as f:
f.write(content)
if __name__ == '__main__':
# the trailing / indicates a folder
url = 'http://ed470d37.ngrok.io/a/bc/'
download(url)
I like #mononoke 's solution. But there are several problems in it. They are
write files in text mode
sometimeshref and text are different, especially for non-ascii path
not download large file block-wisely
I tried to fix these problems:
import os
from pathlib import Path
from urllib.parse import urlparse, urljoin
import requests
from bs4 import BeautifulSoup
import math
def get_links(content):
soup = BeautifulSoup(content)
for a in soup.findAll('a'):
yield a.get('href'), a.get_text()
def download(url, path=None, overwrite=False):
if path is None:
path = urlparse(url).path.lstrip('/')
if url.endswith('/'):
r = requests.get(url)
if r.status_code != 200:
raise Exception('status code is {} for {}'.format(r.status_code, url))
content = r.text
Path(path.rstrip('/')).mkdir(parents=True, exist_ok=True)
for link, name in get_links(content):
if not link.startswith('.'): # skip hidden files such as .DS_Store
download(urljoin(url, link), os.path.join(path, name))
else:
if os.path.isfile(path):
print("#existing", path)
if not overwrite:
return
chunk_size = 1024*1024
r = requests.get(url, stream=True)
content_size = int(r.headers['content-length'])
total = math.ceil(content_size / chunk_size)
print("#", path)
with open(path, 'wb') as f:
c = 0
st = 100
for chunk in r.iter_content(chunk_size=chunk_size):
c += 1
if chunk:
f.write(chunk)
ap = int(c*st/total) - int((c-1)*st/total)
if ap > 0:
print("#" * ap, end="")
print("\r "," "*int(c*st/total), "\r", end="")
if __name__ == '__main__':
# the trailing / indicates a folder
url = 'http://ed470d37.ngrok.io/a/bc/'
download(url, "/data/bc")

Categories

Resources