I am trying to use this code to create an HTTP proxy cache server. When I run the code it starts running and connects to the port and everything but when I try to connect from the browser, for example, it opens a port on 55555 if I type in localhost:52523/www.google.com it works fine but when I try other sites specifically HTTP, for example, localhost:52523/www.microcenter.com or just localhost:52523/google.com it will display localhost didn’t send any data.
ERR_EMPTY_RESPONSE and shows an exception in the console though it creates the cache file on my computer.
I would like to find out how to edit the code so that I can access any website just as I would normally do on the browser without using the proxy server. It should be able to work with www.microcenter.com
import socket
import sys
import urllib
from urlparse import urlparse
Serv_Sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # socket.socket
function creates a socket.
port = Serv_Sock.getsockname()[1]
# Server socket created, bound and starting to listen
Serv_Sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # socket.socket
function creates a socket.
Serv_Sock.bind(('',port))
Serv_Sock.listen(5)
port = Serv_Sock.getsockname()[1]
# Prepare a server socket
print ("starting server on port %s...,"%(port))
def caching_object(splitMessage, Cli_Sock):
#this method is responsible for caching
Req_Type = splitMessage[0]
Req_path = splitMessage[1]
Req_path = Req_path[1:]
print "Request is ", Req_Type, " to URL : ", Req_path
#Searching available cache if file exists
url = urlparse(Req_path)
file_to_use = "/" + Req_path
print file_to_use
try:
file = open(file_to_use[5:], "r")
data = file.readlines()
print "File Present in Cache\n"
#Proxy Server Will Send A Response Message
#Cli_Sock.send("HTTP/1.0 200 OK\r\n")
#Cli_Sock.send("Content-Type:text/html")
#Cli_Sock.send("\r\n")
#Proxy Server Will Send Data
for i in range(0, len(data)):
print (data[i])
Cli_Sock.send(data[i])
print "Reading file from cache\n"
except IOError:
print "File Doesn't Exists In Cache\n fetching file from server \n
creating cache"
serv_proxy = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host_name = Req_path
print "HOST NAME:", host_name
try:
serv_proxy.connect((url.host_name, 80))
print 'Socket connected to port 80 of the host'
fileobj = serv_proxy.makefile('r', 0)
fileobj.write("GET " + "http://" + Req_path + " HTTP/1.0\n\n")
# Read the response into buffer
buffer = fileobj.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket
# and the corresponding file in the cache
tmpFile = open(file_to_use, "wb")
for data in buffer:
tmpFile.write(data)
tcpCliSock.send(data)
except:
print 'Illegal Request'
Cli_Sock.close()
while True:
# Start receiving data from the client
print 'Initiating server... \n Accepting connection\n'
Cli_Sock, addr = Serv_Sock.accept() # Accept a connection from client
#print addr
print ' connection received from: ', addr
message = Cli_Sock.recv(1024) #Recieves data from Socket
splitMessage = message.split()
if len(splitMessage) <= 1:
continue
caching_object(splitMessage, Cli_Sock)
Your errors are not related to URI scheme (http or https) but to files and socket use.
When you are trying to open a file with:
file = open(file_to_use[1:], "r")
you are passing an illegal file path (http://ebay.com/ in your example).
As you are working with URIs, you could use a parser like urlparse, so you can handle better the schema, hostname, etc...
For example:
url = urlparse(Req_path)
file_to_use = url.hostname
file = open(file_to_use, "r")
and use only the hostname as a file name.
Another problem is with the use of sockets. Function connect should receive hostname, not hostname with schema which is what you are doing. Again, with the help of the parser:
serv_proxy.connect((url.hostname, 80))
Besides that, you do not call listen on a client (see examples), so you can remove that line.
Finally, again to create the new file, use the hostname:
tmpFile = open(file_to_use, "wb")
Related
As I wrote on title, I have already successfully connected the server and client.
But the client can't display the HTML file.
I checked file path and send function. But can't find any fault.
When running the code, the code runs normally until connectionSocket.close().
But browser can't display the HTML file, just blank.
So, I checked the details and I found that connectionSocket.send(outputdata[i].encode()) send values, 1 or 3.
I don't know the reason but I'm sure that that is the cause.
Please give me your insight.
from socket import *
serverSocket = socket(AF_INET, SOCK_STREAM)
# Prepare a sever socket
TCPPort = 8000
BufferSize = 1024
serverSocket.bind((host, TCPPort))
serverSocket.listen(1)
while True:
# Establish the connection
print('Ready to serve...')
(connectionSocket,addr) = serverSocket.accept()
print('connectionSocket is:',connectionSocket)
try:
message = connectionSocket.recv(BufferSize)
print('message is:',message)
#filename = message.split()[1]
#print('filename is:', filename)
f = open('\HTML.html','r',encoding='UTF-8')
outputdata = f.read()
# Send one HTTP header line into socket
connectionSocket.send('HTTP/1.1 200 OK\r\n'.encode('UTF-8'))
# Send the content of the requested file to the client
for i in range(0,len(outputdata)):
connectionSocket.send(outputdata[i].encode())
connectionSocket.close()
except IOError:
connectionSocket.send('HTTP/1.1 404 Not Found'.encode('UTF-8'))
connectionSocket.send("<html><head></head><body><h1>404 Not Found</h1></body></html> ".encode('UTF-8'))
# Close client socket
connectionSocket.close()
serverSocket.close()
You need to make your server to respond by the HTTP protocol. In HTTP there are 2 newlines between headers and body and you need to send both together:
from socket import *
serverSocket = socket(AF_INET, SOCK_STREAM)
# Prepare a sever socket
TCPPort = 8000
BufferSize = 1024
serverSocket.bind(('127.0.0.1', TCPPort))
serverSocket.listen(1)
while True:
# Establish the connection
print('Ready to serve...')
(connectionSocket, addr) = serverSocket.accept()
print('connectionSocket is:', connectionSocket)
try:
message = connectionSocket.recv(BufferSize)
print('message is:', message)
#filename = message.split()[1]
#print('filename is:', filename)
#f = open('\HTML.html','r',encoding='UTF-8')
outputdata = "<html><body>foo</body></html>"
# Send one HTTP header line into socket
response = 'HTTP/1.1 200 OK\nConnection: close\n\n' + outputdata
connectionSocket.send(response.decode())
# Send the content of the requested file to the client
connectionSocket.close()
except IOError:
connectionSocket.send('HTTP/1.1 404 Not Found'.encode('UTF-8'))
connectionSocket.send(
"<html><head></head><body><h1>404 Not Found</h1></body></html> ".
encode('UTF-8')
)
# Close client socket
connectionSocket.close()
serverSocket.close()
Test, using: curl -X GET http://localhost:8000
Out:
<html><body>foo</body></html>
I am trying to use this code to create an HTTP proxy cache server. When I run the code it starts running and connects to the port and everything but when I try to connect from the browser, for example, it opens a port on 55555 if I type in localhost:52523/www.google.com it works fine but when I try other sites specifically HTTP, for example, localhost:52523/www.microcenter.com or just localhost:52523/google.com it will display localhost didn’t send any data.
ERR_EMPTY_RESPONSE and shows an exception in the console though it creates the cache file on my computer.
I would like to find out how to edit the code so that I can access any website just as I would normally do on the browser without using the proxy server. It should be able to work with www.microcenter.com
import socket
import sys
import urllib
from urlparse import urlparse
Serv_Sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # socket.socket
function creates a socket.
port = Serv_Sock.getsockname()[1]
# Server socket created, bound and starting to listen
Serv_Sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # socket.socket
function creates a socket.
Serv_Sock.bind(('',port))
Serv_Sock.listen(5)
port = Serv_Sock.getsockname()[1]
# Prepare a server socket
print ("starting server on port %s...,"%(port))
def caching_object(splitMessage, Cli_Sock):
#this method is responsible for caching
Req_Type = splitMessage[0]
Req_path = splitMessage[1]
Req_path = Req_path[1:]
print "Request is ", Req_Type, " to URL : ", Req_path
#Searching available cache if file exists
url = urlparse(Req_path)
file_to_use = "/" + Req_path
print file_to_use
try:
file = open(file_to_use[5:], "r")
data = file.readlines()
print "File Present in Cache\n"
#Proxy Server Will Send A Response Message
#Cli_Sock.send("HTTP/1.0 200 OK\r\n")
#Cli_Sock.send("Content-Type:text/html")
#Cli_Sock.send("\r\n")
#Proxy Server Will Send Data
for i in range(0, len(data)):
print (data[i])
Cli_Sock.send(data[i])
print "Reading file from cache\n"
except IOError:
print "File Doesn't Exists In Cache\n fetching file from server \n
creating cache"
serv_proxy = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host_name = Req_path
print "HOST NAME:", host_name
try:
serv_proxy.connect((url.host_name, 80))
print 'Socket connected to port 80 of the host'
fileobj = serv_proxy.makefile('r', 0)
fileobj.write("GET " + "http://" + Req_path + " HTTP/1.0\n\n")
# Read the response into buffer
buffer = fileobj.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket
# and the corresponding file in the cache
tmpFile = open(file_to_use, "wb")
for data in buffer:
tmpFile.write(data)
tcpCliSock.send(data)
except:
print 'Illegal Request'
Cli_Sock.close()
while True:
# Start receiving data from the client
print 'Initiating server... \n Accepting connection\n'
Cli_Sock, addr = Serv_Sock.accept() # Accept a connection from client
#print addr
print ' connection received from: ', addr
message = Cli_Sock.recv(1024) #Recieves data from Socket
splitMessage = message.split()
if len(splitMessage) <= 1:
continue
caching_object(splitMessage, Cli_Sock)
There is a few errors in the code :-
The first is that a GET request does not expect the protocol to be passed in as part of the call, nor does it expect the host, instead the GET should be restricted to only the path + query string.
An additional HOST header should be added which specifies which host you are using (i.e www.google.com ) some web servers may be setup to ignore this and instead send you a default page, but results are intermittent.
You should have a peek at the HTTP RFC which gives some other headers that can be passed via HTTP.
You could also install something like Fiddler or Wireshark and monitor some sample HTTP calls and see how the payload is supposed to look.
I have to create a web server in Python. Below is the code I am working on. When i execute it, I initially get no error and it prints "Ready to serve.." , but after opening a browser and running http://10.1.10.187:50997/HelloWorld.html (HelloWorld is an html file in the same folder as my python code, while 10.1.10.187 is my IP address and 50997) is the server port), I get a TypeError saying 'a bytes like object is required and not str". please help me in resolving this and kindly let me know if any other modifications are required.
#Import socket module
from socket import *
#Create a TCP server socket
#(AF_INET is used for IPv4 protocols)
#(SOCK_STREAM is used for TCP)
# Assign a port number
serverPort = 50997
serverSocket = socket(AF_INET, SOCK_STREAM)
#serverSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
#print ("hostname is: "), gethostname()
#print ("hostname is: "), socket.gethostname()
# Bind the socket to server address and server port
serverSocket.bind(("", serverPort))
# Listen to at most 1 connection at a time
serverSocket.listen(1)
# Server should be up and running and listening to the incoming connections
while True:
print ("Ready to serve...")
# Set up a new connection from the client
connectionSocket, addr = serverSocket.accept()
try:
# Receives the request message from the client
message = connectionSocket.recv(1024)
print ("Message is: "), message
filename = message.split()[1]
print ("File name is: "), filename
f = open(filename[1:])
outputdata = f.read()
connectionSocket.send("HTTP/1.1 200 OK\r\n\r\n")
for i in range(0, len(outputdata)):
connectionSocket.send(outputdata[i])
connectionSocket.send("\r\n")
# Close the client connection socket
connectionSocket.close()
except IOError:
# Send HTTP response message for file not found
connectionSocket.send("HTTP/1.1 404 Not Found\r\n\r\n")
connectionSocket.send("<html><head></head><body><h1>404 Not Found</h1></body></html>\r\n")
# Close the client connection socket
connectionSocket.close()
serverSocket.close()
The error I am exacly getting-
Ready to serve...
Message is:
File name is:
Traceback (most recent call last):
File "intro.py", line 56, in <module>
connectionSocket.send("HTTP/1.1 200 OK\r\n\r\n")
TypeError: a bytes-like object is required, not 'str'
You need to convert the string you are sending into bytes, using a text format. A good text format to use is UTF-8. You can implement this conversion like so:
bytes(string_to_convert, 'UTF-8')
or, in the context of your code:
connectionSocket.send(bytes("HTTP/1.1 404 Not Found\r\n\r\n","UTF-8"))
connectionSocket.send(bytes("<html><head></head><body><h1>404 Not Found</h1></body></html>\r\n","UTF-8"))`
I have a homework assignment which involves implementing a proxy cache server in Python for web pages. Here is my implementation of it
from socket import *
import sys
def main():
#Create a server socket, bind it to a port and start listening
tcpSerSock = socket(AF_INET, SOCK_STREAM) #Initializing socket
tcpSerSock.bind(("", 8030)) #Binding socket to port
tcpSerSock.listen(5) #Listening for page requests
while True:
#Start receiving data from the client
print 'Ready to serve...'
tcpCliSock, addr = tcpSerSock.accept()
print 'Received a connection from:', addr
message = tcpCliSock.recv(1024)
print message
#Extract the filename from the given message
filename = ""
try:
filename = message.split()[1].partition("/")[2].replace("/", "")
except:
continue
fileExist = False
try: #Check whether the file exists in the cache
f = open(filename, "r")
outputdata = f.readlines()
fileExist = True
#ProxyServer finds a cache hit and generates a response message
tcpCliSock.send("HTTP/1.0 200 OK\r\n")
tcpCliSock.send("Content-Type:text/html\r\n")
for data in outputdata:
tcpCliSock.send(data)
print 'Read from cache'
except IOError: #Error handling for file not found in cache
if fileExist == False:
c = socket(AF_INET, SOCK_STREAM) #Create a socket on the proxyserver
try:
srv = getaddrinfo(filename, 80)
c.connect((filename, 80)) #https://docs.python.org/2/library/socket.html
# Create a temporary file on this socket and ask port 80 for
# the file requested by the client
fileobj = c.makefile('r', 0)
fileobj.write("GET " + "http://" + filename + " HTTP/1.0\r\n")
# Read the response into buffer
buffr = fileobj.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket and the
# corresponding file in the cache
tmpFile = open(filename,"wb")
for data in buffr:
tmpFile.write(data)
tcpCliSock.send(data)
except:
print "Illegal request"
else: #File not found
print "404: File Not Found"
tcpCliSock.close() #Close the client and the server sockets
main()
I configured my browsers to use my proxy server like so
But my problem when I run it is that no matter what web page I try to access it returns a 404 error with the initial connection and then a connection reset error with subsequent connections. I have no idea why so any help would be greatly appreciated, thanks!
There are quite a number of issues with your code.
Your URL parser is quite cumbersome. Instead of the line
filename = message.split()[1].partition("/")[2].replace("/", "")
I would use
import re
parsed_url = re.match(r'GET\s+http://(([^/]+)(.*))\sHTTP/1.*$', message)
local_path = parsed_url.group(3)
host_name = parsed_url.group(2)
filename = parsed_url.group(1)
If you catch an exception there, you should probably throw an error because it is a request your proxy doesn't understand (e.g. a POST).
When you assemble your request to the destination server, you then use
fileobj.write("GET {object} HTTP/1.0\n".format(object=local_path))
fileobj.write("Host: {host}\n\n".format(host=host_name))
You should also include some of the header lines from the original request because they can make a major difference to the returned content.
Furthermore, you currently cache the entire response with all header lines, so you should not add your own when serving from cache.
What you have doesn't work, anyway, because there is no guarantee that you will get a 200 and text/html content. You should check the response code and only cache if you did indeed get a 200.
I have this code here for an HTTP Proxy server which works. How do I create another program called "Client"? The client will send HTTP GET requests to multiple web servers
via the proxy server. The client program connects to the proxy and sends HTTP GET requests for the following 3 websites: (www.google.com, www.yahoo.com, www.stackoverflow.com)
with an interval of 30 seconds.
-My overall question is how do i send HTTP GET requests to the proxy server from python, not my web browser?
OSX 10.10.3 Python 3.4
When i call this proxy in my terminal:
python 1869.py 2000
You can give any port number in place of 2000.
Output:
starting server ....
Initiating server...
Accepting connection
Then in my browser (im using the most update version of chrome) I type in:
localhost:2000/www.stackoverflow.com
And my terminal output is:
request is GET to URL : www.stackoverflow.com
/www.stackoverflow.com
File Present in Cache
HTTP/1.1 301 Moved Permanently
Content-Type: text/html; charset=UTF-8
Location: http://stackoverflow.com/
Date: Thu, 07 May 2015 17:45:40 GMT
Content-Length: 148
Connection: close
Age: 0
<head><title>Document Moved</title></head>
<body><h1>Object Moved</h1>This document may be found here</body>
Reading file from cache
Initiating server...
Accepting connection
Proxy code:
import socket
import sys
if len(sys.argv) <= 1:
print 'Usage: "python S.py port"\n[port : It is the port of the Proxy Server'
sys.exit(2)
# Server socket created, bound and starting to listen
Serv_Port = int(sys.argv[1]) # sys.argv[1] is the port number entered by the user
Serv_Sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # socket.socket function creates a socket.
# Prepare a server socket
print "starting server ...."
Serv_Sock.bind(('', Serv_Port))
Serv_Sock.listen(5)
def caching_object(splitMessage, Cli_Sock):
#this method is responsible for caching
Req_Type = splitMessage[0]
Req_path = splitMessage[1]
Req_path = Req_path[1:]
print "Request is ", Req_Type, " to URL : ", Req_path
#Searching available cache if file exists
file_to_use = "/" + Req_path
print file_to_use
try:
file = open(file_to_use[1:], "r")
data = file.readlines()
print "File Present in Cache\n"
#Proxy Server Will Send A Response Message
#Cli_Sock.send("HTTP/1.0 200 OK\r\n")
#Cli_Sock.send("Content-Type:text/html")
#Cli_Sock.send("\r\n")
#Proxy Server Will Send Data
for i in range(0, len(data)):
print (data[i])
Cli_Sock.send(data[i])
print "Reading file from cache\n"
except IOError:
print "File Doesn't Exists In Cache\n fetching file from server \n creating cache"
serv_proxy = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host_name = Req_path
print "HOST NAME:", host_name
try:
serv_proxy.connect((host_name, 80))
print 'Socket connected to port 80 of the host'
fileobj = serv_proxy.makefile('r', 0)
fileobj.write("GET " + "http://" + Req_path + " HTTP/1.0\n\n")
# Read the response into buffer
buffer = fileobj.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket
# and the corresponding file in the cache
tmpFile = open("./" + Req_path, "wb")
for i in range(0, len(buffer)):
tmpFile.write(buffer[i])
Cli_Sock.send(buffer[i])
except:
print 'Illegal Request'
Cli_Sock.close()
while True:
# Start receiving data from the client
print 'Initiating server... \n Accepting connection\n'
Cli_Sock, addr = Serv_Sock.accept() # Accept a connection from client
#print addr
print ' connection received from: ', addr
message = Cli_Sock.recv(1024) #Recieves data from Socket
splitMessage = message.split()
if len(splitMessage) <= 1:
continue
caching_object(splitMessage, Cli_Sock)
you can use httpie from linux' terminal:
http [get/post] http://host:port/link_name/
in your app you can use requests:
pip install requests
import requests
response = requests.get(url='url', proxies='proxies')
response.close()
print(response)