I have a script to get json and run it on my ubuntu server with a white ip
I accept json, but not completely, and after this acceptance, the script closes the connection and does not work
I think the problem is that I receive packets incorrectly, but why it does not accept the second packet and why it closes in an infinite loop is not clear to me due to my little experience
import os
import socket
from pathlib import Path
from dotenv import load_dotenv
import json
#Init .env
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)
#create webserver socket
def start_my_server():
socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_server.bind(('ip', port))
socket_server.listen(32768)
print('Working...')
global data
global HDRS
while True:
client_socket, address = socket_server.accept()
data = client_socket.recv(32768).decode('utf-8')
# content = 'Well done'.encode('utf-8')
HDRS = 'HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n'
# content = load_page_from_get_request(data)
client_socket.send(HDRS.encode('utf-8'))
# a = client_socket.send(HDRS.encode('utf-8'))
# print(a, '+'*20)
client_socket.shutdown(socket.SHUT_WR)
load_page_from_get_request(32768)
# print('end')
# socket_server.close()
def load_page_from_get_request(request_data):
HDRS = 'HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n'
HDRS_404 = 'HTTP/1.1 404 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n'
try:
with open('data.json', 'w') as output_file:
json.dump(data, output_file)
return HDRS.encode('utf-8')
except EOFError:
return HDRS_404.encode('utf-8')
# try
if __name__ == '__main__':
start_my_server()
The end version of my script
import os
import socket
from pathlib import Path
from dotenv import load_dotenv
import json
import datetime
#Init .env
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)
ip = os.environ['ip']
port = os.environ['port']
HDRS = 'HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n'
HDRS_404 = 'HTTP/1.1 404 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n'
#create webserver socket
def start_my_server():
socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_server.bind((ip, port))
socket_server.listen(32768)
print('Working...')
global data
while True:
client_socket, address = socket_server.accept()
print('loop', address)
data = client_socket.recv(0).decode('utf-8')
# content = load_page_from_get_request(data)
load_page_from_get_request(data)
client_socket.send(HDRS.encode('utf-8'))
client_socket.shutdown(socket.SHUT_WR)
# print('end')
# socket_server.close()
def load_page_from_get_request(request_data):
# HDRS = 'HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n'
# HDRS_404 = 'HTTP/1.1 404 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n'
with open(f'data.json{datetime.time}', 'wr') as output_file:
json.dump(data, output_file)
return HDRS.encode('utf-8')
# except EOFError:
# return HDRS_404.encode('utf-8')
# try
print('loop script')
if __name__ == '__main__':
start_my_server()
Related
I tried to make a web server on port 4460 but when i type "http://127.0.0.1:4460/" in the browser
address bar the browser says ERR_INVALID_HTTP_RESONSE.(Google Chrome).Browser is latest version.
The code did not raise any errors and did not send any bad_gateway requests.it did not access the .ico file.
Python ver:3.8.10
my code:
import socket
from socket import AF_INET,SOCK_STREAM
from threading import Lock
from pprint import pprint
from threadtools import threaded
from email.utils import format_datetime as fmd
import datetime
from deflate import gzip_compress
ms = (lambda x:x/1000)
socket.setdefaulttimeout(ms(700))
ol = Lock()
plok = Lock()
ENCODINGS = "utf-8 utf-16 cp936 latin-1".split()
response_header = b"""\
200 OK
Content-Type: text/html
Content-Length: $$CTXLN$$
Content-Encoding: gzip
Connection: close
Date: $$DATE$$
Keep-Alive: timeout=2, max=2
$$CTX$$"""
bad_gateway = b"""\
502 Bad Gateway
Content-type:text/html
Content-legth:0"""
def decode(x,verbose=False):
for enc in ENCODINGS:
flag = False
try:
return x.decode(enc)
except:
flag = True
finally:
print("Decoded in:"+enc) if(not flag)and verbose else None
return ""
def startswithany(a,lis):
for x in lis:
if a.startswith(x):
return True
return False
def is_newline(x):
return x in ("\r\n","\n")
def load_rsrc(acpt):
if "text/html" in acpt or "text/*" in acpt or "*/*" in acpt:
return open("response.html","rb").read()
elif "image/ico" in acpt or "image/*" in acpt:
return open("response.ico","rb").read()
else:
return b""
def handle_connection(cnct,addr):
global pending
with plok:
pending += 1
try:
if pending > 20:#Too many connections!!!
cnct.send(bad_gateway)
with ol:
print(f"----------------\nConnection from:{addr}\n")
has_ln = True
data = b""
ctx = ""
headers = {"Unknown-Lines":[]}
while True:
data = b""
while not decode(data).endswith("\n"):#\r\n ends with \n
try:
data += cnct.recv(1)
except:#timeout
has_ln = False
break
if not has_ln:
break
assert len(data)
data = decode(data).strip(" ")
assert not data.startswith(" ")
if is_newline(data):#empty line
continue
if startswithany(data,("GET","POST","PUT")):
headers["Request-Type"] = data.strip()
else:
dsp = data.split(":",1)
if len(dsp)!=2:
print(f"Unknown header:{data}")
headers["Unknown-Lines"].append(data)
else:
a,b = data.split(":",1)
b = b.strip()
headers[a] = b
with ol:
print(f"Headers:")
for k,v in headers.items():
print(f"{k}:{v}")
accept = headers.get("Accept","text/html")
accept = accept.split(",")
q = []
for i,x in enumerate(accept):
if ";q=" in x:
a,b = x.split(";q=")
b = float(b)
accept[i] = a
q.append(b)
rt = tuple(map(str.strip,headers.get("Request-Type","GET/HTTP/1.0").split("/")))
req = rt[0]#GET/POST/PUT
protocol = rt[1]#HTTP;NO SECURE SERVER FOR NOW
ver = rt[2]#version
assert ver in ("1.0","1.1")
now = datetime.datetime.now(datetime.timezone.utc)
datestr = fmd(now,True).encode()
ctx = load_rsrc(accept)
ln = str(len(ctx)+1).encode()
response = response_header.replace(b"$$CTXLN$$",ln)\
.replace(b"$$CTX$$",ctx)\
.replace(b"$$DATE$$",datestr)
response_cmpr = gzip_compress(response)
cnct.send(response_cmpr)
print("Sent:")
print(response.decode())
if headers.get("Connection","Keep-alive") == "Keep-alive":
import time
time.sleep(2)
finally:
cnct.close()
with plok:
pending -= 1
skt = socket.socket(AF_INET,SOCK_STREAM)
skt.bind(("",4460))
skt.listen(3)
skt.settimeout(None)
pending = 0
while True:
cn,ad = skt.accept()
handle_connection(cn,ad)
You are close to your goal. Making slight adjustments I got your snippet working. The main issue is in the HTTP response formatting, it should be defined as follow:
HTTP/1.1 200 OK <--- Missing HTTP/1.1 prefix
Content-Type: text/html
...
Keep-Alive: timeout=2, max=2
<--- Mind the extra newline here which is mandatory
$$CTX$$ <--- Browser will expect HTML here
I have adapted the MCVE your provided, please find below a working version for latest Edge and Firefox browsers.
import socket
from socket import AF_INET,SOCK_STREAM
from threading import Lock
from pprint import pprint
#from threadtools import threaded
from email.utils import format_datetime as fmd
import datetime
#from deflate import gzip_compress
ms = (lambda x:x/1000)
socket.setdefaulttimeout(ms(700))
ol = Lock()
plok = Lock()
ENCODINGS = "utf-8 utf-16 cp936 latin-1".split()
response_header = b"""\
HTTP/1.1 200 OK
Content-Type: text/html
Content-Length: $$CTXLN$$
Connection: close
Date: $$DATE$$
Keep-Alive: timeout=2, max=2
$$CTX$$"""
# Missing HTTP/1.1 Prefix
# The extra new line is required
bad_gateway = b"""\
HTTP/1.1 502 Bad Gateway
Content-type:text/html
Content-legth:0
"""
def decode(x,verbose=False):
for enc in ENCODINGS:
flag = False
try:
return x.decode(enc)
except:
flag = True
finally:
print("Decoded in:"+enc) if(not flag)and verbose else None
return ""
def startswithany(a,lis):
for x in lis:
if a.startswith(x):
return True
return False
def is_newline(x):
return x in ("\r\n","\n")
def load_rsrc(acpt):
if "text/html" in acpt or "text/*" in acpt or "*/*" in acpt:
#return open("response.html","rb").read()
return b"hello"
elif "image/ico" in acpt or "image/*" in acpt:
return b"icon"
else:
return b""
def handle_connection(cnct,addr):
global pending
with plok:
pending += 1
try:
if pending > 20:#Too many connections!!!
cnct.send(bad_gateway)
with ol:
print(f"----------------\nConnection from:{addr}\n")
has_ln = True
data = b""
ctx = ""
headers = {"Unknown-Lines":[]}
while True:
data = b""
while not decode(data).endswith("\n"):#\r\n ends with \n
try:
data += cnct.recv(1)
except:#timeout
has_ln = False
break
if not has_ln:
break
assert len(data)
data = decode(data).strip(" ")
assert not data.startswith(" ")
if is_newline(data):#empty line
continue
if startswithany(data,("GET","POST","PUT")):
headers["Request-Type"] = data.strip()
else:
dsp = data.split(":",1)
if len(dsp)!=2:
print(f"Unknown header:{data}")
headers["Unknown-Lines"].append(data)
else:
a,b = data.split(":",1)
b = b.strip()
headers[a] = b
with ol:
print(f"Headers:")
for k,v in headers.items():
print(f"{k}:{v}")
accept = headers.get("Accept","text/html")
accept = accept.split(",")
q = []
for i,x in enumerate(accept):
if ";q=" in x:
a,b = x.split(";q=")
b = float(b)
accept[i] = a
q.append(b)
rt = tuple(map(str.strip,headers.get("Request-Type","GET/HTTP/1.0").split("/")))
req = rt[0]#GET/POST/PUT
protocol = rt[1]#HTTP;NO SECURE SERVER FOR NOW
ver = rt[2]#version
assert ver in ("1.0","1.1")
now = datetime.datetime.now(datetime.timezone.utc)
datestr = fmd(now,True).encode()
ctx = load_rsrc(accept)
ln = str(len(ctx)+1).encode()
response = response_header.replace(b"$$CTXLN$$",ln)\
.replace(b"$$CTX$$",ctx)\
.replace(b"$$DATE$$",datestr)
#response_cmpr = gzip_compress(response)
cnct.send(response)
print("Sent:")
print(response.decode())
if headers.get("Connection","Keep-alive") == "Keep-alive":
import time
time.sleep(2)
finally:
cnct.close()
with plok:
pending -= 1
skt = socket.socket(AF_INET,SOCK_STREAM)
skt.bind(("",8080))
skt.listen(3)
skt.settimeout(None)
pending = 0
while True:
cn,ad = skt.accept()
handle_connection(cn,ad)
I create a simple web server using python socket programming. When I access it using a socket programmed client I get this response (which seems to be good):
HTTP/1.0 200 OK
Content-Length: 145
Content-Type: text/html
"""<!DOCTYPE html>
<html>
<body>
<h2>HTML Links</h2>
<p>Visit our HTML tutorial</p>
</body>
</html>"""
However, when I try to access 127.0.0.1:80 on the browser it says:
127.0.0.1 didn’t send any data. ERR_EMPTY_RESPONSE
Web Server Code:
import socket
import os
def get_content_type(filename):
index = filename.rfind('.')
extension = filename[index+1:len(filename)]
if(extension == 'txt' or extension == 'html'):
return 'Content-Type: text/html\n'
elif(extension == 'jpg'):
return 'Content Type: image/jpeg\n'
elif(extension == 'js'):
return 'Content Type: text/javascript; charset=UTF 8\n'
elif(extension == 'css'):
return 'Content Type: text/css\n'
pass
def check_client_request(client_request):
request_splitted = client_request.split()
if(len(request_splitted) != 3):
return False
if(request_splitted[0] != 'GET'):
return False
if(request_splitted[1].find('http://') != 0):
return False
if(request_splitted[1].count('/') < 3):
return False
if(request_splitted[2] != 'HTTP/1.1\\r\\n'):
return False
return True
def recieve_client_request(client_socket):
client_request = client_socket.recv(1024)
return client_request.decode('utf-8')
def handle_client_request(request):
try:
filename = request.split()[1].split('/')[3]
except:
return 'File not found'
if(filename == ''):
filename = 'index.html'
path = f'C:\\Users\\Eitan\\Desktop\\Python-Course\\SOCKETWEBSERVER\\{filename}'
print(path)
response = ''
if(os.path.isfile(path)):
try:
requested_file = open(path, 'r')
file_content = requested_file.read()
requested_file.close()
response = 'HTTP/1.0 200 OK\n'
content_length = len(file_content.encode('utf-8'))
response += f'Content-Length: {content_length}\n'
response += get_content_type(filename)
response += '\n'
response += f'"""{file_content}"""'
except:
response = 'HTTP/1.1 404 Not Found\n'
else:
response = 'HTTP/1.1 404 Not Found\n'
return response
def send_response(client_socket, response):
try:
client_socket.send(response.encode('utf-8'))
print('Response Sent')
except:
print('Couldnt send response.')
def main():
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind(('0.0.0.0', 80))
server_socket.listen(1)
while True:
client_socket = server_socket.accept()[0]
client_request = recieve_client_request(client_socket)
if(check_client_request(client_request)):
response = handle_client_request(client_request)
send_response(client_socket, response)
client_socket.close()
else:
client_socket.close()
if(__name__ == '__main__'):
main()
Client Code:
import socket
def main():
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client_socket.connect(('127.0.0.1', 80))
request = input("Command: ").encode('utf-8')
client_socket.send(request)
response = client_socket.recv(1024)
print(response.decode('utf-8'))
if(__name__ == '__main__'):
main()
if(request_splitted[1].find('http://') != 0):
return False
You expect the browser to send a request like this
GET http://domain/page HTTP/1.1
...
But, a normal HTTP request does not include protocol and host but only the page, i.e. it looks like this
GET /page HTTP/1.1
...
Since you treat the valid request from the browser as invalid you close the connection and thus no response is sent to the browser.
Note that HTTP is not that simple as it might look. There is an actual standard for this which is quite long and which you are expected to follow when implementing a HTTP server or client.
I wrote this code to manually make a GET request using only python sockets. It worked perfectly fine back in 2016 when I wrote it but now I need it again and I keep getting the error code 400 bad request. I tried switching python version but it's still the same. I have been looking through Stackoverflow questions, asking more or less the same thing I do, but I just can't get it to work. I would appreciate if anyone could help me out. Here is my code, I removed all the IO and only posted the networking code.
URL_PATTERN = re.compile("^(.*://)?([A-Za-z0-9\-\.]+)(:[0-9]+)?(.*)$")
HEADER_END = re.compile("\r\n\r\n")
URL_DATA = re.match(URL_PATTERN, INPUT_URL)
PROTOCOL = URL_DATA.groups()[0][:-3]
HOSTNAME = URL_DATA.groups()[1]
PATHNAME = URL_DATA.groups()[3] if URL_DATA.groups()[3] != "" else "/"
PORT = 80 if PROTOCOL == "http" else 443
BUFFER_SIZE = 4096
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((HOSTNAME, PORT))
s.send("GET " + PATHNAME + " HTTP/1.1\r\nHost: " + HOSTNAME + "\r\nConnection: close\r\n\r\n")
resp = s.recv(BUFFER_SIZE)
HEADER_INDEX = re.search(HEADER_END, resp).start()
HTTP_RESPONSE_HEADER = resp[:HEADER_INDEX]
s.close()
When I run my program on the URL https://doc.rust-lang.org/book/2018-edition/foreword.html
The variables from my program has the values:
PORT: 443
PROTOCOL: https
HOSTNAME: doc.rust-lang.org
PATHNAME: /book/2018-edition/foreword.html
And then I get the 400 bad request code back. I don't understand what I'm doing wrong and would appreciate any help I can get.
I believe it's all about SSL. For reference you can check this question Python socket server handle HTTPS request.
I suggest you use:
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
and create a secure socket:
s_sock = context.wrap_socket(s, server_hostname=HOSTNAME)
s_sock.connect((HOSTNAME, PORT))
Additionally you might need to encode the message.
At the end your code could look like:
import re
import socket
import ssl
URL_PATTERN = re.compile("^(.*://)?([A-Za-z0-9\-\.]+)(:[0-9]+)?(.*)$")
HEADER_END = re.compile("\r\n\r\n")
INPUT_URL = "https://doc.rust-lang.org/book/2018-edition/foreword.html"
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
URL_DATA = re.match(URL_PATTERN, INPUT_URL)
PROTOCOL = URL_DATA.groups()[0][:-3]
HOSTNAME = URL_DATA.groups()[1]
PATHNAME = URL_DATA.groups()[3] if URL_DATA.groups()[3] != "" else "/"
PORT = 80 if PROTOCOL == "http" else 443
BUFFER_SIZE = 4096
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s_sock = context.wrap_socket(s, server_hostname=HOSTNAME)
s_sock.connect((HOSTNAME, PORT))
message = "GET " + PATHNAME + " HTTP/1.1\r\nHost: " + HOSTNAME + "\r\nConnection: close\r\n\r\n"
s_sock.send(message.encode('utf-8'))
resp = bytearray()
while True:
part = s_sock.recv(BUFFER_SIZE)
if not part:
break
resp += part
s_sock.close()
resp_string = str(resp, 'utf-8')
HEADER_INDEX = re.search(HEADER_END, resp_string).start()
HTTP_RESPONSE_HEADER = resp_string[:HEADER_INDEX]
I built a simple HTTP server based python, and I would like to print on the console the IP of every client ( in this line - print("IP address of the client "+IP)). Can you help me please do so?
I attached the full code of the server, Thanks!
The server is an HTTP server based Python, I used select and socket.
The reason for the need of the client IP address is to create a dictionary of IP of users. Thanks!
import select, socket, queue, os
DEFAULT_URL = r'/signup.html'
ERROR_404_URL = r'/errors/404.html'
ROOT_PATH = r'/web'
REDIRECTION_LIST = [r"/index.html", r"/index.htm", r"index.html"]
IP = "0.0.0.0"
PORT = 12345
IMAGE_TYPES = ["png", "jpg", "bmp", "gif", "jpeg"]
SERVER_STATUS = {"OK": 200, "Redirect": 302, "Not Found": 404}
BUFFER_LENGTH = 1024
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# server.setblocking(0)
server.bind((IP, PORT))
server.listen(5)
inputs = [server]
outputs = [] #requests
message_queues = {}
users ={}
def get_file_data(file_path):
""" Get and return data from file in :param file_path"""
try:
file_handler = open(file_path, 'rb')
# read file content
response_content = file_handler.read()
file_handler.close()
return response_content
except Exception:
print ("Warning, file not found. Serving response code 404\n")
print("file path "+file_path)
print("Done")
return None
def get_file_info(client_request):
""" Get absolute response file path and response file type by parsing :param client_request"""
str_array = client_request.split(" ")
file_path_parameters = str_array[1]
strings = file_path_parameters.split("?")
if '?' in file_path_parameters:
get_req = strings[1]
get_request = get_req.split("=")
print("get_request "+get_request[0])
print("ip of the client "+IP) # HERE it should print the
#client IP
# print("string "+ strings[1])
file_path =strings[0]
print("file path " + file_path)
if file_path == r"/":
file_path = DEFAULT_URL
print("file path "+ file_path)
file_type = file_path.split(".")[1]
abs_file_path = ROOT_PATH + file_path # "/test.html"
print(abs_file_path)
return abs_file_path, file_type
def header(url, file_type):
######################################
# headers
######################################
headers = ""
http_version = "HTTP/1.1 200 OK"
content_length = str(os.path.getsize(url))
content_type = file_type
if content_type == "html":
headers = 'HTTP/1.1 200 OK' + '\nContent-Type: text/html; charset=utf-8 \nContent-Length: ' + str(content_length) + '\n\n'
elif content_type == "css":
headers = 'HTTP/1.1 200 OK\nContent-Type: text/css \nContent-Length: ' + str(content_length) + '\n\n'
elif content_type == "js":
headers = 'HTTP/1.1 200 OK' +'\nContent-Type: text/javascript; charset=utf-8 \nContent-Length: ' + str(content_length) + '\n\n'
elif file_type in IMAGE_TYPES:
headers = 'HTTP/1.1 200 OK\nContent-Type: image/xyz \nContent-Length: ' + content_length + '\n\n'
else:
headers = 'HTTP/1.1 200 OK\nContent-Type: ' + content_type + '\nContent-Length: ' + str(content_length) + '\n\n'
return headers.encode()
# further headers
# current_date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
# response_header += 'Date: ' + current_date + '\n'
# Important: "\n" twice - because html page body
# starts with first empty line
# response_header += 'Server: Allam-HTTP-Server\n\n'
# signal that the connection will be closed
# after completing the request
response_header += 'Connection: close\n\n'
print("response_header = ", response_header)
return response_header.encode() + file_data
def main():
""" Main loop for connect, read and write to/from sockets"""
while inputs:
readable, writable, exceptional = select.select(
inputs, outputs, inputs+outputs, 1)
for s in readable:
if s is server: #אם זה של הסרבר
new_client_socket, client_address = s.accept()
print("accepted")
# new_client_socket.setblocking(0)
inputs.append(new_client_socket)
message_queues[new_client_socket] = queue.Queue()
else:
data = s.recv(1024)
if data: #המחרוזת לא ריקה
message_queues[s].put(data)
if s not in outputs:
outputs.append(s)
else: # אם זה ריק כלומר להתנתק צריך להשמיד
if s in outputs:
outputs.remove(s)
inputs.remove(s)
s.close()
del message_queues[s]
for s in writable:
try:
next_msg = message_queues[s].get_nowait()
except queue.Empty:
outputs.remove(s)
else:
file_path, file_type = get_file_info(next_msg.decode())
file_data = get_file_data(file_path)
# file_size = len(file_data) # another way: num_of_bytes = os.stat(file_path).st_size
http_response = header(file_path, file_type)+file_data
s.send(http_response)
for s in exceptional:
inputs.remove(s)
if s in outputs:
outputs.remove(s)
s.close()
del message_queues[s]
if __name__ == '__main__':
main()
You get the client address from s.accept() -- it's a tuple of IP address (str) and port (int). Your code does not use this variable at all and cares only about the socket.
new_client_socket, client_address = s.accept()
You only pass the client request string to get_file_info, so it doesn't know anything about the client it is currently serving. Save the client address somewhere, maybe in a dict mapping sockets to such tuples?
More information: https://docs.python.org/3/library/socket.html#socket.socket.accept
I try to do simple async http client with asyncore:
This code works fine and output is (fast enought):
www.gmail.com : recv http code: 301
www.yandex.ru : recv http code: 200
www.python.org : recv http code: 200
www.google.ru : recv http code: 200
www.gravatar.com : recv http code: 302
www.com.com : recv http code: 302
www.yahoo.com : recv http code: 302
www.bom.com : recv http code: 301
But than i uncomment line with not exist host:
#c = AsyncHTTP('http://www.no-such-host.ru') #!this line breaks execution!
The execution breaks, code hangs for some time, output part of data and hangs with no last data output:
connection error: [Errno -5] No address associated with hostname
www.gmail.com : recv http code: 301
www.yandex.ru : recv http code: 200
www.yahoo.com : recv http code: 302
www.com.com : recv http code: 302
www.bom.com : recv http code: 301
www.gravatar.com : recv http code: 302
...
some hosts are lost here and long delay at start.
Why this happen and how to fix this?
# coding=utf-8
import asyncore
import string, socket
import StringIO
import mimetools, urlparse
class AsyncHTTP(asyncore.dispatcher):
# HTTP requestor
def __init__(self, uri):
asyncore.dispatcher.__init__(self)
self.uri = uri
# turn the uri into a valid request
scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
assert scheme == "http", "only supports HTTP requests"
try:
host, port = string.split(host, ":", 1)
port = int(port)
except (TypeError, ValueError):
port = 80 # default port
if not path:
path = "/"
if params:
path = path + ";" + params
if query:
path = path + "?" + query
self.request = "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n" % (path, host)
self.host = host
self.port = port
self.status = None
self.header = None
self.http_code = None
self.data = ""
# get things going!
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
#self.connect((host, port))
#return
try:
self.connect((host, port))
except Exception,e:
self.close()
self.handle_connect_expt(e)
def handle_connect(self):
self.send(self.request)
def handle_expt(self):
print "handle_expt error!"
self.close()
def handle_error(self):
print "handle_error error!"
self.close()
def handle_connect_expt(self,expt):
print "connection error:",expt
def handle_code(self):
print self.host," : ","recv http code: ",self.http_code
def handle_read(self):
data = self.recv(2048)
#print data
if not self.header:
self.data = self.data + data
try:
i = string.index(self.data, "\r\n\r\n")
except ValueError:
return # continue
else:
# parse header
fp = StringIO.StringIO(self.data[:i+4])
# status line is "HTTP/version status message"
status = fp.readline()
self.status = string.split(status, " ", 2)
self.http_code = self.status[1]
self.handle_code()
# followed by a rfc822-style message header
self.header = mimetools.Message(fp)
# followed by a newline, and the payload (if any)
data = self.data[i+4:]
self.data = ""
#header recived
#self.close()
def handle_close(self):
self.close()
c = AsyncHTTP('http://www.python.org')
c = AsyncHTTP('http://www.yandex.ru')
c = AsyncHTTP('http://www.google.ru')
c = AsyncHTTP('http://www.gmail.com')
c = AsyncHTTP('http://www.gravatar.com')
c = AsyncHTTP('http://www.yahoo.com')
c = AsyncHTTP('http://www.com.com')
c = AsyncHTTP('http://www.bom.com')
#c = AsyncHTTP('http://www.no-such-host.ru') #!this line breaks execution!
asyncore.loop()
ps: My system ubuntu 11.10 + python 2.7.2
You invoke a blocking name-resolution when you do self.connect((host, port)). Combined with your local DNS configuration, this is why your program has a long delay at startup.
An alternative to asyncore and figuring out how to do non-blocking name resolution yourself, you might think about using Twisted. Twisted's TCP connection setup API (mainly reactor.connectTCP or one of the APIs built on top of it) does not block. So a naive use of it will remain properly asynchronous.