I'm learning socket programming and Python and I need to create a server that accepts several files from a client.
when I receive the one of the files I get this error:
ValueError: invalid literal for int() with base 2: '<!DOCTYPE html PUBLIC "-//W3C//D'
I do not understand why!
Thanks for your help!
client.py
for files in directory:
try:
print files
filename = files
size = len(filename)
size = bin(size)[2:].zfill(16)
s.send(size)
s.send(filename)
filename = os.path.join(path,filename)
filesize = os.path.getsize(filename)
filesize = bin(filesize)[2:].zfill(32) #
s.send(filesize)
file_to_send = open(filename, 'rb')
l = file_to_send.read()
s.sendall(l)
file_to_send.close()
print 'File Sent'
except socket.error, e:
print "Error sending data: %s" % e
server.py
while True:
size = clientsocket.recv(16)
if not size:
break
size = int(size)
filename = clientsocket.recv(size)
filesize = clientsocket.recv(32)
filesize = int(filesize,2)
file_to_write = open("/home/giorgio/Scrivania/SERVER/Download/"+'new_'+filename, 'wb')
num_files += 1
chunksize = 1024
while filesize > 0:
if filesize < chunksize:
chunksize = filesize
data = clientsocket.recv(chunksize)
file_to_write.write(data)
filesize -= len(data)
file_to_write.close()
print 'File received successfully'
serversock.close()
while True:
size = clientsocket.recv(16)
if not size:
break
size = int(size)
filename = clientsocket.recv(size)
filesize = clientsocket.recv(32)
#filesize = int(filesize,2) #This is not required and this is causing the error so remove it.
file_to_write = open("/home/giorgio/Scrivania/SERVER/Download/"+'new_'+filename, 'wb')
num_files += 1
chunksize = 1024
while filesize !="": #update to empty string comparison
if filesize < chunksize:
chunksize = filesize
data = clientsocket.recv(chunksize)
file_to_write.write(data)
filesize = len(data)
file_to_write.close()
print 'File received successfully'
serversock.close()
i have added comments for you to see the possible fixes.
Related
I am trying to split a large files into 50Mb chunks and save them in another files. After running some read/write operations, some of my chunks were smaller than 50Mb (43Mb,17Mb and so on). Although, I wrote the same code in Java and It has the same problem. What is wrong? my codes are following bellow:
By the way, What we can do to speed up this code to split into chunks faster?
try:
f = open(self.__filename, 'rb')
except (OSError, IOError), e:
raise FileSplitterException, str(e)
bname = (os.path.split(self.__filename))[1]
fsize = os.path.getsize(self.__filename)
self.__chunksize = int(float(fsize)/float(self.__numchunks))
chunksz = self.__chunksize
total_bytes = 0
for x in range(self.__numchunks):
chunkfilename = bname + '-' + str(x+1) + self.__postfix
if x == self.__numchunks - 1:
chunksz = fsize - total_bytes
try:
print 'Writing file',chunkfilename
data = f.read(chunksz)
total_bytes += len(data)
chunkf = file(chunkfilename, 'wb')
chunkf.write(data)
chunkf.close()
except (OSError, IOError), e:
print e
continue
except EOFError, e:
print e
break
The code in the question seems to be focussed on producing a set number of chunks rather than files of 50MB in size.
This code produces 50MB files.
import os
try:
f = open('big.txt', 'rb')
except (OSError, IOError), e:
raise FileSplitterException, str(e)
bname = (os.path.split('big.txt'))[1]
chunksz = 50 * 1000 * 1000 # metric MB - use 1024 * 1024 for binary MB (MiB)
counter = 0
while True:
chunkfilename = bname + '-' + str(counter+1) + '.foo'
try:
print 'Writing file',chunkfilename
data = f.read(chunksz)
if not data:
# We have reached the end of the file, end the script.
break
chunkf = file(chunkfilename, 'wb')
chunkf.write(data)
chunkf.close()
except (OSError, IOError), e:
print e
continue
except EOFError, e:
print e
break
counter += 1
Some aspects of the code are considered poor style in modern python - for example not using a context manager to open files - but I haven't changed these in case the OP is on an old python like 2.5.
Your question is unclear because you haven't included a Minimal, Complete, and Verifiable example—so I don't know exactly what's wrong with your code. However after creating / simulating my guess as to the missing parts, I was able to come up with something that does exactly what you want, I think.
import os
class FileSplitterException(Exception): pass
class FileSplitter(object):
def __init__(self, filename, chunksize):
if not os.path.isfile(filename):
raise FileSplitterException(
"File: {!r} does not exist".format(filename))
self._filename = filename
self._postfix = 'chunk'
self._chunksize = chunksize
def split(self):
bname = os.path.splitext(self._filename)[0]
fsize = os.path.getsize(self._filename)
chunks, partial = divmod(fsize, self._chunksize)
if partial:
chunks += 1
with open(self._filename, 'rb') as infile:
for i in range(chunks):
chunk_filename = os.path.join('{}-{}.{}'.format(
bname, i, self._postfix))
with open(chunk_filename, 'wb') as outfile:
data = infile.read(self._chunksize)
if data:
outfile.write(data)
else:
FileSplitterException('unexpected EOF encountered')
if __name__ == '__main__':
import glob
filename = 'big_file.txt'
chunksize = 1 * 1024 * 1024 # 1 Mb
print('splitting {} into {:,} sized chunks'.format(filename, chunksize))
fs = FileSplitter(filename, chunksize)
fs.split()
print('chunk files written:')
bname = os.path.splitext(filename)[0]
for chunkname in sorted(glob.glob(bname + '-*.' + fs._postfix)):
fsize = os.path.getsize(chunkname)
print(' {}: size: {:,}'.format(chunkname, fsize))
I would like to write a program that takes a .dmg file that is 1.6 GB and split it into 100 MB chunks.
I would like to also write another program that later can put everything back together so that it can be mounted and used.
I am very new to Python (and any type of programming language in general) and cannot find anything on here about this specific thing. Let me know if I am using incorrect terminology too so that I can learn how to search more effectively.
Thanks!
Try this example:
split.py
import sys, os
kilobytes = 1024
megabytes = kilobytes * 1000
chunksize = int(1.4 * megabytes)
def split(fromfile, todir, chunksize=chunksize):
if not os.path.exists(todir):
os.mkdir(todir)
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir, fname))
partnum = 0
input = open(fromfile, 'rb')
while 1:
chunk = input.read(chunksize)
if not chunk: break
partnum = partnum+1
filename = os.path.join(todir, ('part%04d' % partnum))
fileobj = open(filename, 'wb')
fileobj.write(chunk)
fileobj.close()
input.close( )
assert partnum <= 9999
return partnum
if __name__ == '__main__':
try:
parts = split('/Users/example/Desktop/SO/st/example.mp4', '/Users/example/Desktop/SO/st/new', 2000000) # 100000000 == 100 mb
except:
print('Error during split')
for join:
join.py
import os, sys
readsize = 1024
def join(fromdir, tofile):
output = open(tofile, 'wb')
parts = os.listdir(fromdir)
parts.sort( )
for filename in parts:
filepath = os.path.join(fromdir, filename)
fileobj = open(filepath, 'rb')
while 1:
filebytes = fileobj.read(readsize)
if not filebytes: break
output.write(filebytes)
fileobj.close( )
output.close( )
if __name__ == '__main__':
try:
join('/Users/example/Desktop/SO/st/new', 'example_join.mp4')
except:
print('Error joining files:')
else:
print('Join complete!')
I wrote a simple file downloader in python, which has to try and download all the files from this website (which are avaible) , i tried the try function but it didn`t work, please if someone has some advice on how could I skip the bad urls
import urllib2
for x in range(1, 20000):
url = "http://etutorium.ru//webinaryService//users/seminar#aktivcorp.com/archive/" + str(x) + "/seminar#aktivcorp.com" + str(x) + ".flv"
file_name = None
u = None
f = None
meta = None
file_size = None
file_size_dl = None
block_sz = None
status = None
file_name = url.split('/')[-1]
u = urllib2.urlopen(url)
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
print "Downloading: %s Bytes: %s" % (file_name, file_size)
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status,
f.close()
You should enclose your request in a try catch exception block.
try:
u = urllib2.urlopen(url)
except urllib2.URLError, e:
#do something with the exception
except urllib2.HTTPError, e:
#do something else perhaps?
import dropbox
client = dropbox.client.DropboxClient('<token>')
f = open('/ssd-scratch/abhishekb/try/1.mat', 'rb')
response = client.put_file('/data/1.mat', f)
I want to upload a big file to dropbox. How can I check the progress? [Docs]
EDIT:
The uploader offeset is same below somehow. What am I doing wrong
import os,pdb,dropbox
size=1194304
client = dropbox.client.DropboxClient(token)
path='D:/bci_code/datasets/1.mat'
tot_size = os.path.getsize(path)
bigFile = open(path, 'rb')
uploader = client.get_chunked_uploader(bigFile, size)
print "uploading: ", tot_size
while uploader.offset < tot_size:
try:
upload = uploader.upload_chunked()
print uploader.offset
except rest.ErrorResponse, e:
print("something went wrong")
EDIT 2:
size=1194304
tot_size = os.path.getsize(path)
bigFile = open(path, 'rb')
uploader = client.get_chunked_uploader(bigFile, tot_size)
print "uploading: ", tot_size
while uploader.offset < tot_size:
try:
upload = uploader.upload_chunked(chunk_size=size)
print uploader.offset
except rest.ErrorResponse, e:
print("something went wrong")
upload_chunked, as the documentation notes:
Uploads data from this ChunkedUploader's file_obj in chunks, until an
error occurs. Throws an exception when an error occurs, and can be
called again to resume the upload.
So yes, it uploads the entire file (unless an error occurs) before returning.
If you want to upload a chunk at a time on your own, you should use upload_chunk and commit_chunked_upload.
Here's some working code that shows you how to upload a single chunk at a time and print progress in between chunks:
from io import BytesIO
import os
from dropbox.client import DropboxClient
client = DropboxClient(ACCESS_TOKEN)
path = 'test.data'
chunk_size = 1024*1024 # 1MB
total_size = os.path.getsize(path)
upload_id = None
offset = 0
with open(path, 'rb') as f:
while offset < total_size:
offset, upload_id = client.upload_chunk(
BytesIO(f.read(chunk_size)),
offset=offset, upload_id=upload_id)
print('Uploaded so far: {} bytes'.format(offset))
# Note the "auto/" on the next line, which is needed because
# this method doesn't attach the root by itself.
client.commit_chunked_upload('auto/test.data', upload_id)
print('Upload complete.')
I am code a download function in python. The file size >1GB. The server is linux, HTTP server is Karrigell. Client is browse, Firefox or IE. I meet a big trouble.
At first, I use sys.stdout() to send file content.
file = open(path, 'rb')
size = os.path.getsize(path)
RESPONSE['Pragma'] = 'public'
RESPONSE['Expires'] = '0'
RESPONSE['Cache-Control'] = 'must-revalidate, pre-check=0'
RESPONSE['Content-Disposition'] = 'attachment; filename="' + os.path.basename(path) + '"'
RESPONSE['Content-type'] = "application/octet-stream"
RESPONSE['Content-Transfer-Encoding'] = 'binary'
RESPONSE['Content-length'] = str(os.path.getsize(path))
sys.stdout.flush()
chunk_size = 10000
handle = open(path, "rb")
while True:
buffer = handle.read(chunk_size)
if buffer:
STDOUT(buffer)
else:
break
sys.stdout.flush()
The problem is the server out of memory! I know, stdout write content to memory first, then memory send to socket.
So, I modify the function. Send content to socket directly. I use the py-sendfile module. http://code.google.com/p/py-sendfile/
file = open(path, 'rb')
size = os.path.getsize(path)
sock = REQUEST_HANDLER.sock
sock.sendall("""HTTP/1.1 200 OK\r\nPragma: no-cache\r\nExpires: 0\r\nCache-Control: no-cache, no-store\r\nContent-Disposition: attachment; filename="%s"\r\nContent-Type: application/octet-stream\r\nContent-Length: %u\r\nContent-Range: bytes 0-4096/%u\r\nLocation: "%s"\r\n\r\n""" % (os.path.basename(path), size, size, os.path.basename(path)))
offset = 0
nbytes = 4096
while 1:
try:
sent = sendfile.sendfile(sock.fileno(), file.fileno(), offset, nbytes)
except OSError, err:
if err.errno in (errno.EAGAIN, errno.EBUSY): # retry
continue
raise
else:
if sent == 0:
break # done
offset += sent
This time, the server memory is OK, but browse die! The browse memory rise quickly! Not free
until the socket accept whole file content.
I don't know how to deal with these problems. I think the second idea is right, send content to socket directly. But why browse can't free memory while accept data?
You should try to download the file in chunks. This is an example that works for me using urllib2
import os
import urllib2
import math
def downloadChunks(url):
"""Helper to download large files
the only arg is a url
this file will go to a temp directory
the file will also be downloaded
in chunks and print out how much remains
"""
baseFile = os.path.basename(url)
#move the file to a more uniq path
os.umask(0002)
temp_path = "/tmp/"
try:
file = os.path.join(temp_path,baseFile)
req = urllib2.urlopen(url)
total_size = int(req.info().getheader('Content-Length').strip())
downloaded = 0
CHUNK = 256 * 10240
with open(file, 'wb') as fp:
while True:
chunk = req.read(CHUNK)
downloaded += len(chunk)
print math.floor( (downloaded / total_size) * 100 )
if not chunk: break
fp.write(chunk)
except urllib2.HTTPError, e:
print "HTTP Error:",e.code , url
return False
except urllib2.URLError, e:
print "URL Error:",e.reason , url
return False
return file