Line breaks not occuring in for statement iterations - python

The follow takes multiple file inputs from a form and writes them in a serial method. While it does print when each file is successful to the page, line breaks are not occurring between each loop? What would be the best method to fix this? I thought print statements would add line breaks by default?
#!/usr/bin/python
import cgi, os
import shutil
import cgitb; cgitb.enable() # for troubleshooting
form = cgi.FieldStorage()
print """\
Content-Type: text/html\n
<html><body>
"""
if 'file' in form:
filefield = form['file']
if not isinstance(filefield, list):
filefield = [filefield]
for fileitem in filefield:
if fileitem.filename:
fn = os.path.basename(fileitem.filename)
# save file
with open('/var/www/rsreese.com/files/' + fn, 'wb') as f:
shutil.copyfileobj(fileitem.file, f)
# line breaks are not occuring between interations
print 'File "' + fn + '" was uploaded successfully \n'
message = 'All files uploaded'
else:
message = 'No file was uploaded'
print """\
<p>%s</p>
</body></html>
""" % (message)

Python will print newlines just fine, but your browser won't show these.
Use <br/> tags instead, or wrap the whole output in <pre>/</pre> tags.

Related

Save file received from POST request in Python

I'm trying to implement an upload feature to the basic http.server Python module.
So far, I've created a new class named SimpleHTTPRequestHandlerWithUpload which inherits from SimpleHTTPRequestHandler and added an upload section to list_directory(). The next step would be creating a do_POST() method, which handles the request and saves the file inside the current working directory. However, I have no idea how to do this. I looked at UniIsland's code on GitHub but I can't understand what he did and the code is very old. I also read this question and tried to implement it in my code.
It kind of works, but the file is "littered" with headers. This does not pose a big problem on txt files, but it corrupts all of the other file extensions.
I'd like to know how to remove the headers, save the uploaded file inside the current working directory with its original name and check if the upload was successful or not.
This is my code:
__version__ = '0.1'
import http.server
import html
import io
import os
import socket # For gethostbyaddr()
import sys
import urllib.parse
import contextlib
from http import HTTPStatus
class SimpleHTTPRequestHandlerWithUpload(http.server.SimpleHTTPRequestHandler):
server_version = 'SimpleHTTPWithUpload/' + __version__
def do_POST(self):
"""Serve a POST request."""
data = self.rfile.read(int(self.headers['content-length']))
with open('file.txt', 'wb') as file:
file.write(data)
r = []
enc = sys.getfilesystemencoding()
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Upload Result Page</title>\n")
r.append("<body>\n<h2>Upload Result Page</h2>\n")
r.append("</body>\n</html>")
encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO()
f.write(encoded)
f.seek(0)
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "text/html")
self.send_header("Content-Length", str(len(encoded)))
self.end_headers()
if f:
self.copyfile(f, self.wfile)
f.close()
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except OSError:
self.send_error(
HTTPStatus.NOT_FOUND,
'No permission to list directory')
return None
list.sort(key=lambda a: a.lower())
r = []
try:
displaypath = urllib.parse.unquote(self.path,
errors='surrogatepass')
except UnicodeDecodeError:
displaypath = urllib.parse.unquote(path)
displaypath = html.escape(displaypath, quote=False)
enc = sys.getfilesystemencoding()
title = 'Directory listing for %s' % displaypath
r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
'"http://www.w3.org/TR/html4/strict.dtd">')
r.append('<html>\n<head>')
r.append('<meta http-equiv="Content-Type" '
'content="text/html; charset=%s">' % enc)
r.append('<title>%s</title>\n</head>' % title)
r.append('<body>\n<h1>%s</h1>' % title)
r.append('<hr>\n<ul>')
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or # for symbolic links
if os.path.isdir(fullname):
displayname = name + '/'
linkname = name + '/'
if os.path.islink(fullname):
displayname = name + '#'
# Note: a link to a directory displays with # and links with /
r.append('<li>%s</li>' % (urllib.parse.quote(linkname, errors='surrogatepass'),
html.escape(displayname, quote=False)))
r.append('</ul>\n<hr>\n')
r.append('<form id="upload" enctype="multipart/form-data" method="post" action="#">\n'
'<input id="fileupload" name="file" type="file" />\n'
'<input type="submit" value="Submit" id="submit" />\n'
'</form>')
r.append('\n<hr>\n</body>\n</html>\n')
encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO()
f.write(encoded)
f.seek(0)
self.send_response(HTTPStatus.OK)
self.send_header('Content-type', 'text/html; charset=%s' % enc)
self.send_header('Content-Length', str(len(encoded)))
self.end_headers()
return f
if __name__ == '__main__':
class DualStackServer(http.server.ThreadingHTTPServer):
def server_bind(self):
# suppress exception when protocol is IPv4
with contextlib.suppress(Exception):
self.socket.setsockopt(
socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
return super().server_bind()
http.server.test(
HandlerClass=SimpleHTTPRequestHandlerWithUpload,
ServerClass=DualStackServer
)
If you want to test it, just run the script on your machine, open a web browser on a different machine and type in the address bar <IP_ADDRESS_1>:8000 where IP_ADDRESS_1 is the IP of the machine you're running the code on.
Please, tell me if there's something wrong with it other than the do_POST() method. I'm a new Python programmer and I'm trying to improve my software design skills in general. Thank you!
EDIT: I figured out how to remove the headers and save the file with its original name. However, the script hangs on data = self.rfile.readlines() until I close the browser tab and then works well. I don't know what to do. It seems I have to send some sort of EOF to notify readlines() that I'm finished sending the file but I have no clue how to do it. I also can't figure out how to check if the file has been uploaded successfully or not. Any help is appreciated!
Updated do_POST() method:
def do_POST(self):
"""Serve a POST request."""
data = self.rfile.readlines()
filename = re.findall(r'Content-Disposition.*name="file"; filename="(.*)"', str(data[1]))
if len(filename) == 1:
filename = ''.join(filename)
else:
return
data = data[4:-2]
data = b''.join(data)
with open(filename, 'wb') as file:
file.write(data)
r = []
enc = sys.getfilesystemencoding()
r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
'"http://www.w3.org/TR/html4/strict.dtd">')
r.append('<html>\n<title>Upload result page</title>\n')
r.append('<body>\n<h2>Upload result page</h2>\n')
r.append('</body>\n</html>')
encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO()
f.write(encoded)
f.seek(0)
self.send_response(HTTPStatus.OK)
self.send_header('Content-type', 'text/html')
self.send_header('Content-Length', str(len(encoded)))
self.end_headers()
if f:
self.copyfile(f, self.wfile)
f.close()
I managed to solve all of my problems. I posted my code on GitHub, for anyone interested.

Python MD5 hashing same content returns different hash

I am writing a python program, because I am lazy, that checks a website for a job opening I have been told about and returns all the jobs the companies web page.
Here is my code so far (yes I know the code is jancky however I am just trying to get it working)
import requests
from bs4 import BeautifulSoup
import sys
import os
import hashlib
reload(sys)
sys.setdefaultencoding('utf8')
res = requests.get('WEBSITE URL', verify=False)
res.raise_for_status()
filename = "JobWebsite.txt"
def StartUp():
if not os.path.isfile(filename):
try:
jobfile = open(filename, 'a')
jobfile = open(filename, 'r+')
print("[*] Succesfully Created output file")
return jobfile
except:
print("[*] Error creating output file!")
sys.exit(0)
else:
try:
jobfile = open(filename, 'r+')
print("[*] Succesfully Opened output file")
return jobfile
except:
print("[*] Error opening output file!")
sys.exit(0)
def AnyChange(htmlFile):
fileCont = htmlFile.read()
FileHash = hasher(fileCont, "File Code Hashed")
WebHash = hasher(res.text, "Webpage Code Hashed")
!!!!! Here is the Problem
print ("[*] File hash is " + str(FileHash))
print ("[*] Website hash is " + str(WebHash))
if FileHash == WebHash:
print ("[*] Jobs being read from file!")
num_of_jobs(fileCont)
else:
print("[*] Jobs being read from website!")
num_of_jobs(res.text)
deleteContent(htmlFile)
writeWebContent(htmlFile, res.text)
def hasher(content, message):
content = hashlib.md5(content.encode('utf-8'))
return content
def num_of_jobs(htmlFile):
content = BeautifulSoup(htmlFile, "html.parser")
elems = content.select('.search-result-inner')
print("[*] There are " + str(len(elems)) + " jobs available!")
def deleteContent(htmlFile):
print("[*] Deleting Contents of local file! ")
htmlFile.seek(0)
htmlFile.truncate()
def writeWebContent(htmlFile, content):
htmlFile = open(filename, 'r+')
print("[*] Writing Contents of website to file! ")
htmlFile.write(content.encode('utf-8'))
jobfile = StartUp()
AnyChange(jobfile)
The problem I currently have is that I hash both of the websites html code and the files html code. However both of the hashes don't match, like ever, I am not sure and can only guess that it might be something with the contents being save in a file. The hashes aren't too far apart but it still causes the If statement to fail each time
Breakpoint in Program with hashes
The screenshot you have attached is showing the location of the two hash objects fileHash and webHash. They should be in different locations.
What you really want to compare is the hexdigest() of the two hash objects. Change your if statement to:
if FileHash.hexdigest() == WebHash.hexdigest():
print ("[*] Jobs being read from file!")
num_of_jobs(fileCont)
Take a look at this other StackOverflow answer for some more how-to.

How to download a file in Python (Jinja2) on-click Export button?

I have a button export :
<button class="aptButton" formaction="/export/" type="submit">export</button>
and I have this in the /export/
index.cgi
#! /apollo/sbin/envroot $ENVROOT/bin/python
# -*- coding: utf-8 -*-
import cgitb
cgitb.enable()
import cgi
def main():
print "Content-Type: text/html"
print
form = cgi.FieldStorage()
results = helpers.getResults()
environment = helpers.get_environment()
print environment.get_template('export.html').render(
results = results)
main()
and I have this in my export.html
<!doctype html>
{% for id in results %}
{{ write_results_to_file(id) }}
{% endfor %}
I am trying to download the results to a tab separated file, so I thought of writing to a local file and then send(download) the file but I am not sure how to do the download part, I couldnt use flask or django which has some good libs.. is there any other lib which I can use to download the results to a tab delimited file on the users desktop?
export.py
def write_results_to_file(result):
local_filename = "/home/testing.txt"
# NOTE the stream=True parameter
with open(local_filename, 'w') as f:
f.write('\t'.join(result) + '\n')
If you're using good old-fashioned CGI to produce a tab-separated file,
all you need to do is print an appropriate header and then print the content on stdout, something like this:
def main():
form = cgi.FieldStorage()
results = helpers.getResults()
print "Content-Type: text/plain"
print "Content-Disposition: attachment; filename=testing.txt"
print
for result in results:
print '\t'.join(result) + '\n'
main()
The essential parts are the 2 lines that print the header,
followed by a blank line to separate from the content,
followed by the plain text content.
If you want to make this happen on the click of an Export button,
then you can, for example:
Make the Export button a link to another URL endpoint that will use the example script I put above
Or, use the same script, with a conditional statement on form parameters to decide to print the front page, or to print the content using the example script above
Let me know if you need further help.

save print output to .txt

I have a script that export all email adresses from a .txt document and print all the email adresses.
I would like to save this to list.txt, and if possible delete duplicates,
but it will give the error
Traceback (most recent call last):
File "mail.py", line 44, in <module>
notepad.write(email.read())
AttributeError: 'str' object has no attribute 'read'
Script:
from optparse import OptionParser
import os.path
import re
regex = re.compile(("([a-z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_`"
"{|}~-]+)*(#|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|"
"\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)"))
def file_to_str(filename):
"""Returns the contents of filename as a string."""
with open(filename) as f:
return f.read().lower() # Case is lowered to prevent regex mismatches.
def get_emails(s):
"""Returns an iterator of matched emails found in string s."""
# Removing lines that start with '//' because the regular expression
# mistakenly matches patterns like 'http://foo#bar.com' as '//foo#bar.com'.
return (email[0] for email in re.findall(regex, s) if not email[0].startswith('//'))
if __name__ == '__main__':
parser = OptionParser(usage="Usage: python %prog [FILE]...")
# No options added yet. Add them here if you ever need them.
options, args = parser.parse_args()
if not args:
parser.print_usage()
exit(1)
for arg in args:
if os.path.isfile(arg):
for email in get_emails(file_to_str(arg)):
#print email
notepad = open("list.txt","wb")
notepad.write(email.read())
notepad.close()
else:
print '"{}" is not a file.'.format(arg)
parser.print_usage()
When I remove .read() it shows only 1 email adres in list.txt when I
use print email is shows a couple of hundred. when refreshing the
list.txt while the extraction is busy the email adres change's but it
only shows 1.
This is because you have open() and close() within the loop, i. e. the file is written anew for each email and you end up with only the last address line written. Change the loop to:
notepad = open("list.txt", "wb")
for email in get_emails(file_to_str(arg)):
#print email
notepad.write(email)
notepad.close()
or even better:
with open("list.txt", "wb") as notepad:
for email in get_emails(file_to_str(arg)):
#print email
notepad.write(email)

Using Python and the Pillow Library and CGI for file upload

I've been having hard time importing Image, and CGI together. So, basically I want to upload a gif image, and then display the output which is a thumbnail of the image. I'm getting bunch of errors. I'm unable to use from PIL import Pillow and cgi at the same time. Here is the code below. Your help is highly appreciated. I did hours and hours of research and can't figure that out. I'm getting this error: End of script output before headers: save_file.py
#!C:\Anaconda3\python.exe
from PIL import Image
import cgi, os
import cgitb; cgitb.enable() #cgitb enabled for bug tracking purposes
try: # Windows needs stdio set for binary mode.
import msvcrt
msvcrt.setmode (0, os.O_BINARY) # stdin = 0
msvcrt.setmode (1, os.O_BINARY) # stdout = 1
except ImportError:
pass
form = cgi.FieldStorage()
# A nested FieldStorage instance holds the file
fileitem = form['file']
# Test if the file was uploaded
if fileitem.filename:
# strip leading path from file name to avoid directory traversal attacks
fn = os.path.basename(fileitem.filename)
ext = fn[-3:]
condi = 'gif'
if ext == condi:
open('tmpmedia/' + fn, 'wb').write(fileitem.file.read())
message = 'The file "' + fn + '" was uploaded successfully'
selectfunction = "Please select a function from below"
else:
message = 'You can only upload a gif image. No file was uploaded'
size = (128,128)
saved = "thumb.jpg"
infile = ('C:\\xampp\\htdocs\\aztec\\tmpmedia\\gif' + fn)
try:
im = Image.open(infile)
except:
print("Unable to load image")
im.thumbnail(size)
im.save(saved)
print ("Content-Type: text/html\n")
print ("<html>")
print("<body>")
print(message)
print("</body>")
print("</html>")
I figured it out. There must be a try statement at the end after the print html, and then I would use from PIL import Image

Categories

Resources