Are there any straightforward ways to make a HTTP request and get at the raw, unparsed response (specifically the headers)?
Using the socket module directly:
import socket
CRLF = "\r\n"
request = [
"GET / HTTP/1.1",
"Host: www.example.com",
"Connection: Close",
"",
"",
]
# Connect to the server
s = socket.socket()
s.connect(('www.example.com', 80))
# Send an HTTP request
s.send(CRLF.join(request))
# Get the response (in several parts, if necessary)
response = ''
buffer = s.recv(4096)
while buffer:
response += buffer
buffer = s.recv(4096)
# HTTP headers will be separated from the body by an empty line
header_data, _, body = response.partition(CRLF + CRLF)
print header_data
HTTP/1.0 302 Found
Location: http://www.iana.org/domains/example/
Server: BigIP
Connection: Keep-Alive
Content-Length: 0
Related
Socket can't receive any data from the server, when there is a successful response, but with bad requests it can. Also server responds, just the socket can't receive data (checked in WireShark)
import socket
import ssl
HOST, PORT = 'example.com', 443
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ssock = ssl.wrap_socket(sock)
ssock.connect((HOST, PORT))
raw_req = [f'GET / HTTP/1.1', 'Host: {HOST}', 'Connection: keep-alive']
req = '\n'.join(raw_req)
ssock.send(req.encode())
msg = ssock.recv(4096).decode()
print(msg)
ssock.close()
First, the HTTP GET expects a sequence of CR LF characters after each header line not just a single '\n' character and an extra CR LF after the last header line. Also, the join() adds the separator between each pair but not at the end so must append data with CR LF + CR LF to be a valid HTTP request.
Second, the 'Host: {HOST}' must be a f-string otherwise the "{HOST}" is not replaced.
import socket
import ssl
HOST, PORT = 'stackoverflow.com', 443
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
ssock = ssl.wrap_socket(sock)
ssock.connect((HOST, PORT))
raw_req = [f'GET / HTTP/1.1', f'Host: {HOST}', 'Connection: keep-alive']
req = ('\r\n'.join(raw_req) + "\r\n\r\n").encode()
print("Request:", req)
ssock.send(req)
msg = ssock.recv(4096).decode()
print("\nResponse:")
print(msg)
Output:
Request: b'GET / HTTP/1.1\r\nHost: stackoverflow.com\r\nConnection: keep-alive\r\n\r\n'
Response:
HTTP/1.1 200 OK
Connection: keep-alive
content-type: text/html; charset=utf-8
...
If the HTTP response is larger than 4096 bytes then you would need to call ssock.recv() in a loop until it returns a byte array of length 0.
I'm sending data in a TCP client in python and the tutorial I'm following is telling me to send this:
"GET / HTTP/1.1\r\nHost: google.com\r\n\r\n"
I've tried looking up information about the formatting here and I'm confused about what the GET is actually requesting or what data would be sent back by this request, and also what is the purpose of the carriage returns and newlines?
If want to write low-level HTTP GET in Python then you can create a TCP Socket and write the GET command optionally with header parameters then read the response.
The HTTP request starts with a Request-line (e.g. GET / HTTP/1.1 with a terminating CRLF or "\r\n"). The request line is followed by zero or more headers each ending with a CRLF. A final CRLF sequence marks the end of the request line and header part of the HTTP request followed by an optional message body. The request structure is defined in section 5 of the HTTP 1.1 spec
import socket
# host and port map to URL http://localhost:8000/
host = "localhost"
port = 8000
try:
sock = socket.socket()
sock.connect((host, port))
sock.sendall("GET / HTTP/1.1\r\nHost: google.com\r\n\r\n".encode())
# keep reading from socket until no more data in response
while True:
response = sock.recv(8096)
if len(response) == 0:
break
print(response)
except Exception as ex:
print("I/O Error:", ex)
The first line of the HTTP response is the status line including status code terminated with \r\n and followed by response headers.
HTTP/1.1 200 OK\r\n
Content-type: text/plain\r\n
Content-length: 14\r\n
\r\n
This is a test
You need to parse the status line and headers to determine how to decode the message body of the HTTP response.
Details of the HTTP response are in section 6 of the HTTP 1.1 Spec.
Alternatively, the requests module implements the HTTP spec in a simple API.
Example to make a HTTP GET using requests API.
import requests
url = 'http://localhost:8000/'
response = requests.get(url)
print("Status code:", response.status_code)
print("Content:", response.text)
I've got here a code that sends an HTTPS request.
My problem is handling redirection requests using the same socket connection.
I know that the requests module can handle this redirects very well but this code
is for a proxy server that I'm developing.
Any help would be appreciated. Thanks!
import socket, ssl
from ssl import SSLContext
HOST = "www.facebook.com"
PORT = 443
ContextoSSL = SSLContext(protocol=ssl.PROTOCOL_SSLv23)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sslSocket = ContextoSSL.wrap_socket(sock, server_hostname=HOST)
sslSocket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sslSocket.connect((HOST, PORT))
sslSocket.do_handshake()
der = sslSocket.getpeercert(binary_form=True)
pem_data = ssl.DER_cert_to_PEM_cert(der)
#print(pem_data) # print certificate
'''1st request'''
headers = \
"GET / HTTP/1.1\r\n"\
"Host: www.facebook.com\r\n"\
"User-Agent: python-requests/2.22.0\r\n"\
"Accept-Encoding: gzip, deflate\r\nAccept: */*\r\n"\
"Connection: keep-alive\r\n\r\n"
print("\n\n" + headers)
sslSocket.send(headers.encode()) # send request
response = sslSocket.recv(9999)
print(response) # print receive response
'''2nd request''' # on this redirect with cookie set, response should be 200 OK
cookie, location = "", ""
for line in response.decode().splitlines():
if "Set-Cookie" in line:
cookie = line.replace("Set-Cookie: ", "").split(";")[0]
if "Location" in line:
location = line.replace("Location: ", "").split("/")[3]
print(cookie, location)
headers = \
f"GET /{location} HTTP/1.1\r\n"\
"Host: www.facebook.com\r\n"\
"User-Agent: python-requests/2.22.0\r\n"\
"Accept-Encoding: gzip, deflate\r\nAccept: */*\r\n"\
"Connection: keep-alive\r\n"\
f"Cookie: {cookie}\r\n\r\n"
print("\n\n" + headers)
sslSocket.send(headers.encode()) # send request
response = sslSocket.recv(9999)
print(response) # print received response
To handle a redirect you must first get the new location:
first properly read the response as specified in the HTTP standard, i.e. read the full body based on the length declared in the response
parse the response
check for a response code which indicates a redirect
in case of a redirect extract the new location from the Location field in the response header
Once you have the new location you can issue the new request for this location. If the new location is for the same domain and if both request and response indicated that the TCP connection can be reused you can try to issue the new request on the same TCP connection. But you need to handle the case that the server might close the connection anyway since this is explicitly allowed.
In all other cases you have to create a new TCP connection for the new request.
Note that showing you how you exactly can code this would be too broad. There is a reason HTTP libraries exist which you'd better use for this purpose instead of implementing all the complexity yourself.
I need to build a http server without using an HTTP library.
I have the server running and an html page beeing loaded but my <img src="..."/> tags are not beeing loaded, I recive the call but cannot preset the png/JPEG in the page.
httpServer.py
# Define socket host and port
SERVER_HOST = '0.0.0.0'
SERVER_PORT = 8000
# Create socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind((SERVER_HOST, SERVER_PORT))
server_socket.listen(1)
print('Listening on port %s ...' % SERVER_PORT)
while True:
# Wait for client connections
client_connection, client_address = server_socket.accept()
# Handle client request
request = client_connection.recv(1024).decode()
content = handle_request(request)
# Send HTTP response
if content:
response = 'HTTP/1.1 200 OK\n\n'
response += content
else:
response = 'HTTP/1.1 404 NOT FOUND\n\nFile Not Found'
client_connection.sendall(response.encode())
client_connection.close()
# Close socket
server_socket.close()
Function where handles the call
def handle_request(request):
http = HttpHandler.HTTPHandler
# Parse headers
print(request)
headers = request.split('\n')
get_content = headers[0].split()
accept = headers[6].split()
type_content = accept[1].split('/')
try:
# Filename
filename = get_content[1]
if get_content[0] == "GET":
content = http.get(None, get_content[1], type_content[0])
return content
except FileNotFoundError:
return None
class to handle the http verbs
class HTTPHandler:
def get(self, args, type):
if args == '/':
args = '/index.html'
fin = open('htdocs' + args)
if type != "image":
fin = open('htdocs/' + args)
if type == "image":
fin = open('htdocs/' + args, 'rb')
# Read file contents
content = fin.read()
fin.close()
return content
Realize that I´m trying to make an HTTP 1.1, if you see anything out of pattern fell free to say thanks in advance.
I don't know where you've learnt how HTTP works but I'm pretty sure that you did not study the actual standard which you should do when implementing a protocol. Some notes about your implementation:
Line ends should be \r\n not \n. This is true for both responses from the server as requests from the client.
You are assuming that the clients requests is never larger than 1024 bytes and that it can be read within a single recv. But, requests can have arbitrary length and there is no guarantee that you get all within a single recv (TCP is a streaming protocol and not a message protocol).
While it is kind of ok to simply close the TCP connection after the body it would be better to include the length of the body in the Content-length header or use chunked transfer encoding.
The type of the content should be given by using the Content-Type header, i.e. Content-type: text/html for HTML and Content-type: image/jpeg for JPEG images. Without this browser might guess correctly or wrongly what the type might be or depending on the context might also insist on a proper content-type header.
Apart from that, if you debug such problems it is helpful to find out what gets actually exchanged between client and server. It might be that you've checked this for yourself but you did not include such information into your question. Your only error description is "...I recive the call but cannot preset the png/JPEG in the page" and then a dump of your code.
httpServer.py
Ended up like:
while True:
# Wait for client connections
client_connection, client_address = server_socket.accept()
# Handle client request
request = client_connection.recv(10240).decode()
content = handle_request(request)
# Send HTTP response
if content:
if str(content).find("html") > 0:
client_connection.send('HTTP/1.1 200 OK\n\n'.encode())
client_connection.send(content.encode())
else:
client_connection.send('HTTP/1.1 200 OK\r\n'.encode())
client_connection.send("Content-Type: image/jpeg\r\n".encode())
client_connection.send("Accept-Ranges: bytes\r\n\r\n".encode())
client_connection.send(content)
else:
response = 'HTTP/1.1 404 NOT FOUND\r\nFile Not Found'
client_connection.close()
And the Get method like:
class HTTPHandler:
def get(self, args, type):
if args == '/':
args = '/index.html'
fin = open('htdocs' + args)
if type != "image":
fin = open('htdocs/' + args)
if type.find("html") == -1:
image_data = open('htdocs/' + args, 'rb')
bytes = image_data.read()
# Content-Type: image/jpeg, image/png \n\n
content = bytes
fin.close()
return content
# Read file contents
content = fin.read()
fin.close()
return content
I am trying to write a simple HTTP client program using raw sockets in Python 3. However, the server does not return a response despite having been sent a simple HTTP request. My question is why the server doesn't return a response.
Here is my code:
from socket import *
BUF_LEN = 8192 * 100000
info = getaddrinfo('google.com', 80, AF_INET)
addr = info[-1][-1]
print(addr)
client = socket(AF_INET, SOCK_STREAM)
client.connect(addr)
client.send(b"GET /index.html HTTP1.1\r\nHost: www.google.com\r\n")
print(client.recv(BUF_LEN).decode("utf-8")) # print nothing
You've missed a blank line at the end and mis-specified the HTTP version without a slash:
>>> client.send(b"GET /index.html HTTP1.1\r\nHost: www.google.com\r\n")
Should be:
>>> client.send(b"GET /index.html HTTP/1.1\r\nHost: www.google.com\r\n\r\n")
50
>>> client.recv(BUF_LEN).decode("utf-8")
u'HTTP/1.1 302 Found\r\nCache-Control: private\r\nContent-Type: text/html; charset=UTF-8\r\nLocation: http://www.google.co.uk/index.html?gfe_rd=cr&ei=fIR7WJ7QGejv8AeZzbWgCw\r\nContent-Length: 271\r\nDate: Sun, 15 Jan 2017 14:17:32 GMT\r\n\r\n<HTML><HEAD><meta http-equiv....
The blank line tells the server its the end of the headers, and since this is a GET request there's no payload and so it can then return the content.
Without the / in the HTTP/1.1 spec Google's servers will return an Error: 400 Bad Request response.