I have Clickhouse version 20.8.3.18 and python3 installed on a vm stress testing Cache dictionaries. After a certain number of entries the query using clickhouse_driver, I'll get the error
Unexpected EOF while reading bytes
Is this an error due to the driver/python related or due to the cache being maxed on the system. For example this happens on a file size 203 columns and 10000 rows on a machine with 32Gb of RAM and 256Gb of SSD memory, a csv file of around 66Mb which seems quite small for such an error. The query I'm running is:
SELECT
dictGet('CacheDictionary', 'date', toUInt64(number)) AS date,
SUM(dictGet('CacheDictionary', 'filterColumn', toUInt64(number))) AS val,
AVG(dictGet('CacheDictionary', 'filterColumn', toUInt64(number))) AS avg
FROM numbers(1, 10000)
GROUP BY date
An example entry of the csv file is:
20000,2021-02-05,6867,0.5314826651111791,OA9SMRN54LC3MTDW,D6S8AYXZ3JVSHPCY,12UQV1JR87MT00EP,3WBT23MA2QN6URA7,YGKJR5577BP6S3AD,2T90WPW1REOZA0L9,JQG8Z6FXXIX2788M,OAOVV1YX3A6HKQV8,FISBMOAHEXHAAKEY,XAULW5F90T3VEMUL,RAAZ5TM5XL7GRC1F,B16JEGDHXUXFI2R9,DETSZ7BR45CRAIA7,Z2X53PAQYCSBHPU3,SRISC0ZLWXC2DP34,KO2M3044JX5JCB74,ML776REFIX3Z1L78,ND6PXBOR135SWFSB,ZF4K45N2AIGFAK0L,RFE3EHCKC5EPYE2V,NJKM5T8UUD5NRDPX,O57IQW0670LP00I9,F0EBZ3BXHPETCFSY,RUZ7VH2IM0DIZ4UC,08BP467WG7ROEHTJ,9LSTNLUA240T2K4D,5L4PIRKMK746QW5Q,2VX3SER8ULU93NZG,Z0MZ9C3TTPR6WFDV,KB32XWCR67AWGSIB,PDM8QJ34X4EOTVN1,P7TUVP8Q1YF9S746,YDFDBCG6S2EXYPNW,55RN0F4UMGF3ABQZ,RRF895J8LQSLI48U,54OQWCJODIEQLRQF,D5ZJPGAG7CCO4LWA,UQDWEXPI184UUJQD,3QF6QAS32ITRL8JH,FPQ324RO04LNVAMO,ZJ6QCWNQCBQOE7F5,6OWVEVWHNSZILC6E,GIUD29OIFF3LUCCX,VGBJHKW32BUNUSDH,908TDRODVZIIC5O8,UCIU38BXEREJMO4M,5LKJ23ER4CKUZ88J,A1GBKPPM10L8X5RM,BB3SAVWF3CNBDXHO,279MIC1OXTDS2PFP,J6UVFJE8RGFK4LDN,3CE12GT27GX0WVWU,PNNTRLDFVJQ0TCRK,MI7XOHWUQX3W938H,LKZPV4K0BA6OE3R0,YJMLI82UBLSZWP7U,JORNKD1MSVECXBRF,CO5KKJIL1FHEYA11,GXVXWDOI538WCLC0,OPODB2R2ITSX0E6J,3VE7SOJZL3DKIES7,5LPXB17GJ94S86HL,UQ0DZVUDMBD39LC3,KSSVOBUKMZC7T89M,P6YL0WW22NOM5A36,RA46SZF4ZLO5YWUM,TUTMJ34X4040USXX,09HPKJAD58P3FVMP,DM0NJVFYKR2653HH,HP869NM4Y2EBE3ND,RVKP40RPBOPB6RPQ,WI3QXYA5XIWJUFUK,770L6U5KAEPKKJC1,2H0XNUDM41QBAZWB,8AWJ2Y7RB9F2WTT0,Y6T3PIPLU3FCBZCU,CY8SCO15RNUWQU2B,DRC88XH21J9ADT6Z,MLZ2JN7F8MXVBHBI,2YSUVHRL4V0EVHXF,Y0U12EBQSEVE6W6X,A6RRJY191S0JOXJH,4F12P4K0SJ6EDKSD,THCRJ2ZEXGM1RUM4,PF0OUAULUNIW0W9X,EK1249WXC0C2KKY8,11WEDAAJL7BL4T4U,4K8OP1WXSN1MIXPF,8D0WNN1672A6WK07,5RLYH7K00ZSR1LL2,EKEXBG87U1X6UOLL,YWK3V1F7MTAF9T19,XZ8ZF0XO5V8TCBPS,A3RX8X8A8I11Z8X3,77P2Q5WRSTL4ERAI,00BGNPDYFSVG5F81,5KTUM76C42VTP4I7,TA933GZZN8OQ20QJ,612WNQ74RDHMBWX3,D41HNOBPX11GFYWO,OGR4A0EPCSS00XL6,QIOH165Y5JGKJMFC,TF2R9TFC5TJN2PER,TYNXWI46H7I83O77,JMD5DOEV4U628SDK,D7ECJH43FEC77UCJ,FKA9AT5J20QI3MQP,7QSU0I8VRRLUMD7R,6OJ1O2XI2QJXP6W2,UD2QVJXNUFRCAO43,GS3TZUW8U6Z8EWWQ,QD79GBSO6D6GCAZ1,GQ5TUY2FMJSNMTRK,OGOYL2PD64E2DOOQ,Q733OU5P7J7SAFS1,GBS7MV5QOMQ4E89N,SB8MIQ1P37HMQZBJ,Z6G96BM7FL4150H3,05PS81HW528971RM,6F3KFLYT0345GI43,G65CDWEORNH3OUCY,12F43L99AZ84PDWR,GQQVWMTMS471WAWD,F1DFWRJ1F9M9MUTT,1M734H07IQAW49Q3,OPSRG5J7370227XE,BIPNR22KFF71MKQN,PV7DWGCQF5551FKT,YPGQVGUP37MRJY2B,RILKP96QV69WBW2D,4RXDCJURAVCQEGLX,XGIPC0AK1K0I6KDP,HMSE306L5NAK62LC,YAZHMS2UHGMWIB44,RZCAVUM45YTNV23T,3B7K07XPRTE8OMW1,FTP48ED5DQ4K3DM8,WW419RRJ2WU1F15L,85FWD49J0ARSUGI9,4U4768ANPCJ46K5P,EJ24BNUA6OZMUDEL,6Z27W6BN36GO8QWU,5AMZ4UU819GSI454,KMNIEJ2V5PI83KGP,APT4CYG8M5FM0BSW,IME5VRP08W468DZE,6BT4W0ZAW6C7993L,DRD6Q4P8BZVDG37U,2R1OEWQFV5J597AF,CKS41A6PXKVYICAG,OQYZ9UOQRVS3LLTF,JA3PZSAXFCJVZVLB,J23BP73T6GNC0Z08,GWOJXMXDVHCRE51Y,I826DE6KEVQK2PFC,6FF5LWM61KCM4C9K,P16P80EIX2X87OZO,O5GEOEO72CDV4GAX,UMKFUKMV6U0L5PM5,U64YI4G53LR3SC6J,CLML8KPAL697KYYJ,LMH2W0STEJ5H2J2S,AL61EP61ZR3GOPN3,Z3AEUMZSX4MQJ6M6,IS5RFEWIJ8XHYNK0,TNE1BS4JYN280PIF,67IER2YS6N2XHEW1,63P3O4X42T2INRT4,XYV043108XRK7Y4S,RW0HN600K0GQXF4Y,BZ1ZE6IBB4B72A81,QHAINYDIZX7838YI,7FFCKG3XJSZ2DIHJ,DF6C1OMPC1ETFPDZ,1EJ3EW0TXKVBC88R,WX6HG8FD021VFZ2S,W4OB9NZRODSTM96M,6GDA3L5CLBPVTPWQ,1Y4U7BL9UHPBJVIX,Y31SUUZ0JF2AXZWO,PL2I18PA0SVXG85E,TEY1HC97QMZ5YXMI,T49EVLLM43AI4OG3,0SDNMLWY85Z7NENX,4446QKGO8UL6RERT,IMEAM22I51GT4ZHY,HUCLC93NIUG0C5R0,5VPBRUUVMBXP7HJY,XCOOPM3JU5VHQ94T,3LRZGAF451G9XDIN,Y6VIN1E31NYRLA2N,RAROO2EM5Q9NJRG9,NUQ2QJ9M6T5KRCHK,WQKKQK8UBB30GRWI,20SOMMKD08FYAENW,1G9K4UFWAI8Q7Z8K,XLG898A4MQXZHVYR,FPT67A7VDLVZEWYH,6DQ6417FF07FORXZ,10RUAPY5KGAYBZZD
I've posted part of the code trying to find the maximum number of cache items stored, along with the queries executed for each. In selectBenchmark the string correspond to the query above. The parameters for each are fairly self explanatory (the xmlFile is the dictionary created in /etc/lib/clickhouse-server).
def cacheMaxItems(csvRead, xmlFile, benchmarkType, columnStepSize, rowStepSize):
maxCache = []
os.system('rm -f ' + csvRead)
os.system('bash /root/restartCH.sh')
for j in range(1, 13):
outputCSV = '/root/results' + benchmarkType + '/cacheResults' + str(j*columnStepSize) + '.csv'
with open(outputCSV, 'w') as fp:
wr = csv.writer(fp)
wr.writerow([benchmarkType + ': Number of rows', 'Loading time', 'Mean', 'Variance', 'Skewness', 'Number of Columns: ' + str(j*columnStepSize)])
for i in range(1, 10000):
if i%5 == 0:
os.system('bash /root/restartCH.sh')
createCSV(10000, j*columnStepSize, csvRead)
try:
clickhouseDictionary(rowStepSize*i*j*columnStepSize, j*columnStepSize, xmlFile, csvRead, 'Cache')
if benchmarkType == 'Random':
results = selectBenchmark(i*rowStepSize, j*columnStepSize, 'Random', 'Cache')
elif benchmarkType == 'Consecutive':
results = selectBenchmark(i*rowStepSize, j*columnStepSize, 'Consecutive', 'Cache')
elif benchmarkType == 'CPU':
results = selectBenchmark(i*rowStepSize, j*columnStepSize, 'CPU', 'Cache')
results.insert(0, i*rowStepSize)
with open(outputCSV, 'a') as fp:
wr = csv.writer(fp)
wr.writerow(results)
print('Successfully loaded and queried cache of size ' + str(rowStepSize*i*j*columnStepSize) + '.')
except Exception as ex:
print(ex)
os.system('rm -f ' + csvRead)
os.system('bash /root/restartCH.sh')
maxCache.append([j*columnStepSize, (i-1)*rowStepSize])
print(maxCache)
break
return maxCache
def selectBenchmark(numberOfRows, numberOfColumns, benchmarkType, dictType):
client = Client('localhost', port=9000, database='system')
client.execute('SYSTEM RELOAD DICTIONARY ' + dictType + 'Dictionary')
loadingTime = client.last_query.elapsed
client.execute('SELECT dictGet(\'' + dictType + 'Dictionary\', \'random0\', toUInt64(1))', query_id=str(uuid.uuid4()))
loadingTime += client.last_query.elapsed
loop = True
counter = 0
j=0
while loop:
times = []
for i in range(0, 31):
query_id = str(uuid.uuid4())
string = stringGen(numberOfRows, numberOfColumns, benchmarkType, dictType)
client.execute(string, query_id = query_id)
times.append(client.last_query.elapsed)
if max(times) > loadingTime:
loadingTime = max(times)
stats = transformedMLE(times)
redactedTimes = [x for x in times if (stats[0]-3*np.sqrt(stats[1])) < x < (stats[0]+3*np.sqrt(stats[1]))]
if len(times) - len(redactedTimes) <= 3:
loop = False
elif j > 15:
print('High variance query')
loop = False
j+=1
result = transformedMLE(redactedTimes)
loadingTime = loadingTime - result[0]
result.insert(0, loadingTime)
client.disconnect()
return result
The restartCH.sh file is
service clickhouse-server forcerestart
as the cache overflow often blocks the restart command.
There is no output to the server error logs indicating that this is a problem with the python driver, perhaps reading the large amounts of data being returned. I also get the 'Killed' python output which also points towards cache issues, which is to be expected as I'm benchmarking cache dictionaries.
Unexpected EOF while reading bytes -- it's python driver error.
Check clickhouse-server.log for real error.
20.8.3.18 is out support , please upgrade to 20.8.12.2
I was running into a similar problem on Ubuntu when starting the server binary directly using "2>&1 /dev/null &" to suppress the output from stderr and stdout to /dev/null, Python driver was throwing the error but server would still be working when connecting via the clickhouse client binary command-line. Issue was resolved by tweaking the server startup script to just redirect stderr with " 2> /dev/null &" (referring to https://www.baeldung.com/linux/silencing-bash-output difference between using 2> and 2>&1).
I am not sure what I am doing wrong here but I am trying to open a file, trace1.flow, read the header information then throw the source IP and destination IP into dictionaries. This is done in Python running on a Fedora VM. I am getting the following error:
(secs, nsecs, booted, exporter, mySourceIP, myDestinationIP) = struct.unpack('IIIIII',myBuf)
struct.error: unpack requires a string argument of length 24
Here is my code:
import struct
import socket
#Dictionaries
uniqSource = {}
uniqDestination = {}
def int2quad(i):
z = struct.pack('!I', i)
return socket.inet_ntoa(z)
myFile = open('trace1.flow')
myBuf = myFile.read(8)
(magic, endian, version, headerLen) = struct.unpack('HBBI', myBuf)
print "Magic: ", hex(magic), "Endian: ", endian, "Version: ", version, "Header Length: ", headerLen
myFile.read(headerLen - 8)
try:
while(True):
myBuf = myFile.read(24)
(secs, nsecs, booted, exporter, mySourceIP, myDestinationIP) = struct.unpack('IIIIII',myBuf)
mySourceIP = int2quad(mySourceIP)
myDestinationIP = int2quad(myDestinationIP)
if mySourceIP not in uniqSource:
uniqSource[mySourceIP] = 1
else:
uniqSource[mySourceIP] += 1
if myDestinationIP not in uniqDestination:
uniqDestination[myDestinationIP] = 1
else:
uniqDestination[myDestinationIP] += 1
myFile.read(40)
except EOFError:
print "END OF FILE"
You seem to assume that file.read will raise EOFError on end of file, but this error is only raised by input() and raw_input(). file.read will simply return a string that's shorter than requested (possibly empty).
So you need to check the length after reading:
myBuf = myFile.read(24)
if len(myBuf) < 24:
break
Perhaps your have reached end-of-file. Check the length of myBuf:
len(myBuf)
It's probably less than 24 chars long. Also you don't need those extra parenthesis, and try to specify duplicated types using 'nI' like this:
secs, nsecs, booted, exporter, mySourceIP, myDestinationIP = struct.unpack('6I',myBuf)
I want to write a program that sends an e-mail to one or more specified recipients when a certain event occurs. For this I need the user to write the parameters for the mail server into a config. Possible values are for example: serveradress, ports, ssl(true/false) and a list of desired recipients.
Whats the user-friendliest/best-practice way to do this?
I could of course use a python file with the correct parameters and the user has to fill it out, but I wouldn't consider this user friendly. I also read about the 'config' module in python, but it seems to me that it's made for creating config files on its own, and not to have users fill the files out themselves.
Are you saying that the fact that the config file would need to be valid Python makes it unfriendly? It seems like having lines in a file like:
server = 'mail.domain.com'
port = 25
...etc would be intuitive enough while still being valid Python. If you don't want the user to have to know that they have to quote strings, though, you might go the YAML route. I use YAML pretty much exclusively for config files and find it very intuitive, and it would also be intuitive for an end user I think (though it requires a third-party module - PyYAML):
server: mail.domain.com
port: 25
Having pyyaml load it is simple:
>>> import yaml
>>> yaml.load("""a: 1
... b: foo
... """)
{'a': 1, 'b': 'foo'}
With a file it's easy too.
>>> with open('myconfig.yaml', 'r') as cfile:
... config = yaml.load(cfile)
...
config now contains all of the parameters.
I doesn't matter technically proficient your users are; you can count on them to screw up editing a text file. (They'll save it in the wrong place. They'll use MS Word to edit a text file. They'll make typos.) I suggest making a gui that validates the input and creates the configuration file in the correct format and location. A simple gui created in Tkinter would probably fit your needs.
I've been using ConfigParser. It's designed to read .ini style files that have:
[section]
option = value
It's quite easy to use and the documentation is pretty easy to read. Basically you just load the whole file into a ConfigParser object:
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('configfile.txt')
Then you can make sure the users haven't messed anything up by checking the options. I do so with a list:
OPTIONS =
['section,option,defaultvalue',
.
.
.
]
for opt in OPTIONS:
section,option,defaultval = opt.split(',')
if not config.has_option(section,option):
print "Missing option %s in section %s" % (option,section)
Getting the values out is easy too.
val = config.get('section','option')
And I also wrote a function that creates a sample config file using that OPTIONS list.
new_config = ConfigParser.ConfigParser()
for opt in OPTIONS:
section,option,defaultval = opt.split(',')
if not new_config.has_section(section):
new_config.add_section(section)
new_config.set(section, option, defaultval)
with open("sample_configfile.txt", 'wb') as newconfigfile:
new_config.write(newconfigfile)
print "Generated file: sample_configfile.txt"
What are the drawbacks of such a solution:
ch = 'serveradress = %s\nport = %s\nssl = %s'
a = raw_input("Enter the server's address : ")
b = 'a'
bla = "\nEnter the port : "
while not all(x.isdigit() for x in b):
b = raw_input(bla)
bla = "Take care: you must enter digits exclusively\n"\
+" Re-enter the port (digits only) : "
c = ''
bla = "\nChoose the ssl option (t or f) : "
while c not in ('t','f'):
c = raw_input(bla)
bla = "Take care: you must type f or t exclusively\n"\
+" Re-choose the ssl option : "
with open('configfile.txt','w') as f:
f.write(ch % (a,b,c))
.
PS
I've read in the jonesy's post that the value in a config file may have to be quoted. If so, and you want the user not to have to write him/her-self the quotes, you simply add
a = a.join('""')
b = b.join('""')
c = c.join('""')
.
EDIT
ch = 'serveradress = %s\nport = %s\nssl = %s'
d = {0:('',
"Enter the server's address : "),
1:("Take care: you must enter digits exclusively",
"Enter the port : "),
2:("Take care: you must type f or t exclusively",
"Choose the ssl option (t or f) : ") }
def func(i,x):
if x is None:
return False
if i==0:
return True
elif i==1:
try:
ess = int(x)
return True
except:
return False
elif i==2:
if x in ('t','f'):
return True
else:
return False
li = len(d)*[None]
L = range(len(d))
while True:
for n in sorted(L):
bla = d[n][1]
val = None
while not func(n,val):
val = raw_input(bla)
bla = '\n '.join(d[n])
li[n] = val.join('""')
decision = ''
disp = "\n====== If you choose to process, =============="\
+"\n the content of the file will be :\n\n" \
+ ch % tuple(li) \
+ "\n==============================================="\
+ "\n\nDo you want to process (type y) or to correct (type c) : "
while decision not in ('y','c'):
decision = raw_input(disp)
disp = "Do you want to process (type y) or to correct (type c) ? : "
if decision=='y':
break
else:
diag = False
while not diag:
vi = '\nWhat lines do you want to correct ?\n'\
+'\n'.join(str(j)+' - '+line for j,line in enumerate((ch % tuple(li)).splitlines()))\
+'\nType numbers of lines belonging to range(0,'+str(len(d))+') separated by spaces) :\n'
to_modify = raw_input(vi)
try:
diag = all(int(entry) in xrange(len(d)) for entry in to_modify.split())
L = [int(entry) for entry in to_modify.split()]
except:
diag = False
with open('configfile.txt','w') as f:
f.write(ch % tuple(li))
print '-*- Recording of the config file : done -*-'