Python socket performance drops considerably when used in parallel - python

I use pretty much standard code to transfer a file from one node to another using Python/socket.
Num-of-threads/Performance (comparing to sftp/scp)
1: 2x
2: 1.2x
3: 1x
4: 1x
5: 1x
It takes 25 sec to transfer 2.7G file over 10Gb network using Python/socket.
If I use fstp/scp it takes 50 sec to transfer the same file.
2 threads complete transfer in 47 sec sec using Python/socket script.
If I use fstp/scp it takes 55 sec to transfer the same 2 files in parallel.
3 threads transfer in 112 sec sec using Python/socket script.
fstp/scp does the same job in 112 sec (3 files in parallel).
Client code:
#client.py
import socket
import sys
import datetime as dt
e=sys.exit
n1=dt.datetime.now()
#s = socket.socket()
s = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM, proto=0)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
#s.setsockopt(socket.IPPROTO_TCP, socket.TCP_CORK, 1)
host = 'testserver'
port = %s # Reserve a port for your service.
s.connect((host, port))
f = open('/tmp/testfile.gz','rb')
print 'Sending..',
l = f.read(1024*1024)
while (l):
print '.',
s.send(l)
l = f.read(1024*1024)
f.close()
print "Done Sending"
s.shutdown(socket.SHUT_WR)
s.close
n2=dt.datetime.now()
diff=(n2-n1)
print diff.seconds
e(0)
Server code:
#server.py
import socket # Import socket module
import sys, time
e=sys.exit
#s = socket.socket() # Create a socket object
s= socket.socket(socket.AF_INET, type=socket.SOCK_STREAM, proto=0)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
host = socket.gethostname() # Get local machine name
port = %s # Reserve a port for your service.
print port
s.bind((host, port)) # Bind to the port
f = open('/tmp/testfile_%d.png','wb')
s.listen(5) # Now wait for client connection.
i=0
while True:
c, addr = s.accept() # Establish connection with client.
c.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
c.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
#c.setNoDelay(True)
#c.setsockopt(socket.IPPROTO_TCP, socket.TCP_CORK, 1)
print 'Got connection from', addr
print "Receiving..."
if netcat:
netcat.write('netcat from file writer')
i=0
l = c.recv(1024*1024)
while (l):
f.write(l)
l = c.recv(1024*1024)
i +=1
if 0 and i>20:
f.close()
e(0)
f.close()
#c.send('Thank you for connecting')
c.close()
s.shutdown(socket.SHUT_WR)
s.close()
print "Done Receiving"
e(0)
e(0)
When I run it in 2 jobs in parallel (different ports/shells) performance drops 50%.
Alternatively when I use sftp in parallel:
time sftp user#server://tmp/testfile.gz.gz test0.gz&
time sftp user#server://tmp/testfile.gz.gz test1.gz&
time sftp user#server://tmp/testfile.gz.gz test2.gz&
elapsed time does not change for 2 or 3 parallel jobs.

Related

Why I have some packets lost in localhost?

I try to develop a server and a client programs in Python 2.7, which can switch in UDP or in TCP, based on this echo program :
TCP vs. UDP socket latency benchmark
For now, i just try to program it as local host
When i run it in TCP (is_UDP = False), the server program shows me that there is no packet lost (total_perdu = 0)
But if i run it in UDP (is_UDP = True), it gives me some packets lost.
This is my code for the server :
import socket
from numpy import *
server_address = ("127.0.0.1", 4444)
client_address = ("127.0.0.1", 4445)
bufferSize = 4096
# is_UDP = True
is_UDP = False
# Create a datagram socket
if is_UDP == True:
UDP_Server_Socket_in = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
UDP_Server_Socket_in.bind(server_address)
UDP_Server_Socket_out = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
UDP_Server_Socket_out.connect(client_address)
connection = UDP_Server_Socket_in
print("UDP server is running...")
else :
TCP_Server_Socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
TCP_Server_Socket.bind(server_address)
TCP_Server_Socket.listen(1)
connection, client_address = TCP_Server_Socket.accept()
print("TCP server is running...")
t = 0
total_perdu = 0
i = 0
while(True):
i += 1
# Receive packet from client
data_2= connection.recv(bufferSize)
tab=fromstring(data_2,dtype="int32")
size=len(data_2)
while size<bufferSize:
data_2= connection.recv(bufferSize-size)
size+=len(data_2)
if data_2:
perdu=int(tab[0])-t-1
sperdu=""
if perdu>0:
total_perdu+=perdu
sperdu = "(%d)"%(perdu)
print "Receive data : %s %d %d %s" % (tab[0], len(tab), total_perdu,sperdu)
t=int(tab[0])
And this is my code for the client:
import socket
from numpy import *
import time
server_address = ("127.0.0.1", 4444)
client_address = ("127.0.0.1", 4445)
# Packets variables
packet_size = 1024
total_packet = 1000
bufferSize = 4*packet_size
# Variables initialization
error = 0
total_throughput = 0
total_latene = 0
total_ratio = 0
total_stop_time_1 = 0
total_stop_time_3 = 0
# Creation of a packet
send_tab = zeros(packet_size, int)
for i in range(0, packet_size):
send_tab[i] = i
data_size = (send_tab.size+8)*send_tab.itemsize
print "Data size : %d" % data_size
print "Tab : %s \n" % send_tab
# is_UDP = True
is_UDP = False
# Create a socket at client side
if is_UDP == True:
UDP_Client_Socket_out = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
UDP_Client_Socket_out.connect(server_address)
UDP_Client_Socket_in = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
UDP_Client_Socket_in.bind(client_address)
connection = UDP_Client_Socket_out
print("UDP client is running...")
else:
TCP_Client_Socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
TCP_Client_Socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 0)
TCP_Client_Socket.connect(server_address)
connection = TCP_Client_Socket
print("TCP client is running...")
start_time_0 = time.clock()
for packet_number in range(0,total_packet):
send_tab[0] = packet_number
# Send packet to server
start_time=time.clock()
sent = connection.send(send_tab)
if sent:
stop_time_1 = time.clock() - start_time
# Calculate throughput and ratio
throughput = data_size / (stop_time_1 * 1000000)
print "stop_time_1 \t%f" % stop_time_1
total_throughput += throughput
stop_time_3 = (time.clock() - start_time_0)
print "Results : \n"
print " Packet error : %d \n" % error
print " Thoughput: %f Mo/s \n " % (total_throughput/total_packet)
print " total_stop_time_1 : %f s \n " % (total_stop_time_1/total_packet)
print " stop_time_3 : %f \n" % stop_time_3
So, I have 3 questions about it :
Is it normal to have some packets which are lost even if I do it as a local host?
If yes, why?
Will I have the same problem if I program it in C?
From your code it looks like you expect to receive the UDP packets in the same order you are sending them. I don't think you are loosing packets, but rather the order in which they are received by the server is not the expected one (which is normal for UDP).
Also, you should take into consideration that UDP does not guarantee neither the order or the receiving of packets so your program should take this into consideration.
I would refactor the code and add the tab into a list, then sort it and check for the gaps (at the end of transmission).
Another way would be to send a reply from the server and make the check on the client (but this might increase the number if you deploy it on the Internet).

Non-blocking socket still continues to block on send()

I have a program interacting with two servers (all on localhost), which essentially forwards data from one server to the other.
I am trying to create a scenario where the server sending the initial data will close its connection to the middle-program because the server receiving the data has a very slow connection to the middle-program. The buffers of the middle-program should get filled, and it shouldn't be able to receive any data from the server sending data. As a result of this inactivity, the server sending data should timeout and close the connection, resulting in an in-complete data transfer.
I am trying to do it in the following way:
Program at server sending the data
import socket
interval_sec = 3
TCP_KEEPALIVE = 0x10
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
sock.setsockopt(socket.IPPROTO_TCP, TCP_KEEPALIVE, interval_sec)
sock.bind(('', 9000))
sock.listen(5)
while True:
conn, addr = sock.accept()
f = open('test.txt', 'r')
file_data = f.read(1024)
while (file_data):
conn.send(file_data)
file_data = f.read(1024)
f.close()
print "sent"
Middle program forwarding the data
import socket
import select
import time
import struct
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
recv_timeout = 2
sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVTIMEO, struct.pack('LL', recv_timeout, 0))
total_data=[]
total_sent=0;
data=0;
sock.connect(('localhost', 9000))
sock2.connect(('localhost', 8001))
sock2.setblocking(0)
sock.setblocking(0)
j=0
while 1:
read, write, errors = select.select([sock], [sock2], [], 2)
if read:
data=sock.recv(1024, socket.MSG_DONTWAIT)
total_data.append(data)
#time.sleep(10)
if write:
if data:
try:
data_sent=sock2.send(data, socket.MSG_DONTWAIT)
total_sent+=data_sent
data=data[data_sent:]
print data_sent
except socket.error, e:
if e.errno != errno.EAGAIN:
raise e
else:
print "Not writable"
Program at server receiving the data
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('', 8001))
sock.listen(5)
while True:
conn, addr = sock.accept()
with open('received_file', 'wb') as f:
while True:
file_data = conn.recv(100)
print('receiving data...')
if not file_data:
break
f.write(file_data)
f.close()
conn.close()
The problem I'm facing is that, the middle-program's sockets are blocking although they should be acting as non-blocking.
Are my programs fundamentally wrong, and I'm missing the whole point or do I need to make some minor tweaks?
Thanks :)

TCP vs. UDP socket latency benchmark

I have implemented a small benchmark for socket communication via TCP and UDP in Python. Surprisingly, TCP is almost exactly double as fast as UDP.
To avoid routing effects, server and client are running on the same Unix machine, but on different threads.
Maybe the code is useful. Here is the server code:
import socket
import sys
host = 'localhost'
port = 8888
buffersize = 8
server_address = (host, port)
def start_UDP_server():
socket_UDP = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
socket_UDP.bind(server_address)
print("UDP server is running...")
while True:
data, from_address = socket_UDP.recvfrom(buffersize)
if not data: break
socket_UDP.sendto(data, from_address)
socket_UDP.close()
def start_TCP_server():
socket_TCP = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_TCP.bind(server_address)
socket_TCP.listen(1)
print("TCP server is running...")
while True:
client, client_address = socket_TCP.accept()
while True:
data = client.recv(buffersize)
if not data: break
client.sendall(data)
client.close()
So you can run either start_TCP_server() or start_UDP_server().
On client side the code is:
import socket
import sys
import time
host = 'localhost'
port = 8888
buffersize = 8
server_address = (host, port)
client_address = (host, port+1)
N = 1000000
def benchmark_UDP():
socket_UDP = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
socket_UDP.bind(client_address)
print("Benchmark UDP...")
duration = 0.0
for i in range(0, N):
b = bytes("a"*buffersize, "utf-8")
start = time.time()
socket_UDP.sendto(b, server_address)
data, from_address = socket_UDP.recvfrom(buffersize)
duration += time.time() - start
if data != b:
print("Error: Sent and received data are bot the same")
print(duration*pow(10, 6)/N, "µs for UDP")
def benchmark_TCP():
socket_TCP = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_TCP.connect(server_address)
print("Benchmark TCP...")
duration = 0.0
for i in range(0, N):
b = bytes("a"*buffersize, "utf-8")
start = time.time()
socket_TCP.sendall(b)
data = socket_TCP.recv(buffersize)
duration += time.time() - start
if data != b:
print("Error: Sent and received data are bot the same")
print(duration*pow(10, 6)/N, "µs for TCP")
socket_TCP.close()
Like for the server you can start the benchmark by benchmark_TCP() or benchmark_UDP().
The results are about 25 µs for TCP, and about 54 µs for UDP on Unix and even worse for Windows (about 30 µs for TCP and more than 200 µs for UDP). Why? I would expect a minimal advantage for UDP.
Your TCP socket is connected but your UDP socket is not. This means extra processing for every send/receive on the UDP socket. Call connect on each side for the UDP socket, just like you call connect/accept on the TCP socket.
Programs like iperf do this to measure accurately.

Server that echos the client number?

Trying to make a server that tells the client what number he/she is. For example, once you connect it should say something like "Welcome client #5" or something along those lines. Right now I'm just trying to write it so that it simply reads a line in and echos it back. Im stuck on as far as getting it to show the clients number.
import socket
import sys
host = ''
port = 37373
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((host,port))
s.listen(1)
while 1:
s, address = s.accept()
data = s.recv(1024)
if data:
s.send(data)
s.close()
that is
import socket
import sys
Clinet_number = 0
host = ''
port = 37373
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((host,port))
s.listen(10) # number of queued connections
while 1:
Client_number += 1
s, address = s.accept()
data = s.recv(1024)
if data:
s.send(str(Client_number))
s.close()

Python server-client relationship over network problems

I wrote a program for my networking class that measures upload and download speeds by sending a file over a socket and timing the transfer, and I used Python. The problem I'm having is that the server and client can talk just fine when running on the same machine, but as soon as I put the server program on another machine on my network, no file transfer happens. They talk to each other (Client says "connected to server" and server says "connection from xxx.xxx.xxx.xxx") but the file transfer size and speed are shown as 0 and 0.
Here's the server code:
import util
import socket
import os
import shutil
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = ""
port = 12345
f = open("receivedfromclient.txt", "r+")
print "Waiting for clients..."
s.bind((host, port))
s.listen(5)
c, addr = s.accept()
print "Client connected:", addr
start = time.clock()
msg = c.recv(257024)
stop = time.clock()
duration = stop-start
f.write(str(msg))
b = os.path.getsize("receivedfromclient.txt")
print "File size = ", b, "bits"
print "Time to transfer from client = ", duration, " seconds"
bw = (b/duration)/1048576
print "The upload bit rate is ", bw, "Mpbs"
f.close()
shutil.copy("receivedfromclient.txt", "sendtoclient.txt")
f.open("sendtoclient.txt")
c.send(f.read())
f.close()
c.close()
s.close()
and the client code is similar:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = raw_input("Please enter host address: ")#socket.gethostname()
port = 12345
sendfile = raw_input("Please enter name of file to transfer: ")
f = open(sendfile,"rb")
g = open("receivedfromserver.txt","w")
print "Connecting to ", host, port
s.connect((host, port))
s.send(f.read())
and so on. Can anybody tell me what I'm doing wrong here?
Hmm - there are at least some problems:
The major one is, that IMHO it is not clear what you really want to do.
Here is your code with some remarks:
# import util <-- NOT NEEDED
import socket
import os
import shutil
import time # <-- Added
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = ""
port = 12345
f = open("receivedfromclient.txt", "r+")
print "Waiting for clients..."
s.bind((host, port))
s.listen(5)
c, addr = s.accept() # <-- FORGOTTEN ()
print "Client connected:", addr
start = time.clock()
msg = c.recv(257024) # <-- You need some loop here to get the whole file
stop = time.clock()
duration = stop-start
f.write(str(msg))
b = os.path.getsize("receivedfromclient.txt") # <-- = instead of .
print "File size = ", b, "bits"
print "Time to transfer from client = ", duration, " seconds"
bw = (b/duration)/1048576
print "The upload bit rate is ", bw, "Mpbs"
f.close()
shutil.copy("receivedfromclient.txt", "sendtoclient.txt")
f.open("sendtoclient.txt")
c.send(f.read())
f.close()
c.close()
s.close()
One problem here is, that start is in mostly all cases equal to stop - so you get a Division By Zero error in (b/duration).
In the client part at least a import socket is missing; the g is not needed at all.
Please explain further, what you want to do.
If you want to transfer files, there are a lot of ways to do (sftp, rsync, nc, ...).

Categories

Resources