Related
I created ten threads to process item in a global list
but I do not why only the first worker do this work
and also, the main thread finished before sub-threads finished, even though I had used the thread.join().
here is the code, I think the problem may because I use while loop in myThread.run . but I do not know how to tell these threads keep working before the global list is empty.
# coding=utf-8
import threading
import numpy as np
dfs = ['units' + str(i).zfill(5) for i in range(250)]
units = dfs.copy()
k = [str(i).zfill(5) for i in range(500, 21800000)]
units.extend(k)
np.random.shuffle(units)
marker = []
def working_fun(df, unit):
global marker
if unit in df:
threadlock.acquire()
marker.append(int(unit[5:]))
class myThread(threading.Thread):
def __init__(self, name):
threading.Thread.__init__(self)
self.name = name
self.work_load = []
def run(self):
global dfs
print("start thread" + self.name)
while True:
threadlock.acquire()
if units != []:
unit = units.pop()
else:
unit = None
threadlock.release()
if unit is not None:
self.work_load.append(unit)
working_fun(dfs, unit)
else:
print('------', self.name, '--finish---', len(self.work_load), '--------')
break
threadlock = threading.RLock()
thds = []
for i in range(10):
thd = myThread(name='thd' + str(i))
thds.append(thd)
for thd in thds:
thd.start()
thd.join()
print('output:', marker)
Try it this way:
import numpy
import multiprocessing
# Same as before
dfs = ['units' + str(i).zfill(5) for i in range(250)]
units = dfs.copy()
k = [str(i).zfill(5) for i in range(500, 21800000)]
units.extend(k)
numpy.random.shuffle(units)
# Almost the same as before
def working_fun(inp):
df, unit = inp
if unit in df:
return int(unit[5:])
# This is needed for multiprocessing/threading
if __name__ == "__main__":
# Create a pool of workers (10 in this case)
with multiprocessing.Pool(10) as pool:
# Map some (global) iterable on the pool of workers
result = pool.map(working_fun, [(dfs, unit) for unit in units])
# Show the results (note that the function returns None if the unit is not in df)
print([r for r in result if r is not None])
Output:
$ python test.py
[1, 75, 139, 24, 101, 72, 156, 55, 58, 235, 14, 123, 177, 112, 168, 178, 173, 162, 104, 226, 230, 205, 69, 100, 246, 18, 117, 149, 37, 214, 206, 26, 136, 87, 144, 79, 50, 222, 7, 133, 36, 41, 30, 163, 103, 187, 6, 225, 15, 223, 234, 138, 126, 19, 64, 224, 39, 145, 130, 42, 11, 221, 128, 213, 204, 2, 45, 220, 242, 109, 59, 238, 232, 68, 152, 107, 148, 83, 197, 241, 118, 32, 90, 99, 22, 119, 0, 67, 48, 181, 71, 193, 95, 29, 113, 40, 134, 218, 141, 27, 121, 8, 207, 110, 60, 237, 47, 94, 73, 157, 184, 78, 159, 49, 202, 239, 124, 215, 127, 209, 62, 4, 52, 82, 74, 9, 199, 158, 188, 3, 61, 180, 57, 219, 245, 38, 16, 190, 12, 17, 175, 46, 196, 125, 194, 76, 129, 161, 81, 93, 137, 155, 174, 54, 35, 25, 115, 140, 216, 23, 21, 233, 77, 33, 92, 208, 120, 86, 165, 70, 135, 28, 91, 66, 85, 169, 203, 211, 114, 154, 122, 217, 247, 31, 147, 96, 142, 191, 10, 183, 80, 179, 189, 56, 105, 160, 228, 185, 132, 5, 53, 106, 13, 210, 182, 89, 192, 153, 170, 111, 65, 212, 186, 151, 200, 248, 229, 102, 240, 198, 176, 43, 131, 166, 236, 231, 116, 172, 146, 88, 44, 98, 227, 20, 34, 164, 108, 171, 244, 243, 195, 150, 249, 97, 167, 51, 201, 84, 63, 143]
I have some code that creates a list with numbers, from 1 to 407. What I want to do it to take the numbers of the "ultimate" and "super_rare" list out of the "common" list. How can I do that? This is the general code I have.
import random
def common(x):
list = []
for i in range(1,x+1):
list.append(i)
return (list)
cid = common(407)
ultimate = [404, 200, 212, 15, 329, 214, 406, 259, 126, 160, 343, 180, 169, 297, 226, 305, 250, 373, 142, 357, 181, 113, 149, 399, 287, 341, 37, 284, 41, 328, 400, 217, 253, 204, 290, 18, 174, 36, 310, 303, 6, 108, 47, 298, 130]
super_rare = [183, 349, 134, 69, 103, 342, 83, 380, 93, 56, 86, 95, 147, 161, 403, 197, 215, 312, 375, 359, 263, 221, 340, 102, 153, 234, 54, 7, 238, 193, 90, 367, 197, 397, 33, 366, 334, 222, 394, 371, 313, 83, 276, 35, 351, 83, 347, 170, 57, 201, 137, 188, 179, 170, 65, 107, 234, 48, 2, 85, 74, 221, 23, 171, 101, 377, 63, 248, 102, 272, 129, 276, 86, 88, 51, 197, 248, 202, 244, 153, 138, 101, 330, 68, 368, 292, 340, 315, 185, 219, 381, 89, 274, 175, 385, 19, 257, 313, 191, 211]
def new_list(cid, ultimate):
new_list = []
for i in range(len(cid)):
new_list.append(cid[i])
for i in range(len(ultimate)):
new_list.remove(ultimate[i])
return (new_list)
#print (new_list(cid, ultimate))
cid_mod0 = new_list(cid, ultimate)
cid_mod1 = new_list(cid_mod0, super_rare)
print (cid_mod0)
Most of the prints and whatnot are just tries to see if it's working.
I recommend using sets for this. You can check if an item is in a set in constant time. For example:
import random
def common(x):
return list(range(1, x + 1))
cid = common(407)
ultimate = { 404, 200, ... }
super_rare = { 183, 349, ... }
def list_difference(l, s):
return [ elem for elem in l if elem not in s ]
cid_mod0 = list_difference(cid, ultimate)
cid_mod1 = list_difference(cid_mod0, super_rare)
If you don't care about the order of your resulting list you can use a set for that as well for a bit more convenience:
import random
def common(x):
return list(range(1, x + 1))
cid = set(common(407))
ultimate = { 404, 200, ... }
super_rare = { 183, 349, ... }
cid_mod0 = cid - ultimate
cid_mod1 = cid_mod0 - super_rare
Use this loop to remove the elements out of common that are in the super_rare and ultimate lists:
for x, cnum in enumerate(cid):
if cnum in ultimate or cnum in super_rare:
del cid[x]
print(cid)
The loop assumes you have a list named cid that is already established.
If you want to keep the original order for cid, you could try to use OrderDict to convert cid as an ordered dict object, and then remove keys that you want, the code would be something like:
from random import choices, seed
from collections import OrderedDict
seed(123)
ultimate = [404, 200, 212, 15, 329, 214, 406, 259, 126, 160, 343, 180, 169, 297, 226, 305, 250, 373, 142, 357, 181, 113, 149, 399, 287, 341, 37, 284, 41, 328, 400, 217, 253, 204, 290, 18, 174, 36, 310, 303, 6, 108, 47, 298, 130]
super_rare = [183, 349, 134, 69, 103, 342, 83, 380, 93, 56, 86, 95, 147, 161, 403, 197, 215, 312, 375, 359, 263, 221, 340, 102, 153, 234, 54, 7, 238, 193, 90, 367, 197, 397, 33, 366, 334, 222, 394, 371, 313, 83, 276, 35, 351, 83, 347, 170, 57, 201, 137, 188, 179, 170, 65, 107, 234, 48, 2, 85, 74, 221, 23, 171, 101, 377, 63, 248, 102, 272, 129, 276, 86, 88, 51, 197, 248, 202, 244, 153, 138, 101, 330, 68, 368, 292, 340, 315, 185, 219, 381, 89, 274, 175, 385, 19, 257, 313, 191, 211]
cid = OrderedDict.fromkeys(choices(range(407), k=407))
_ = map(cid.pop, set(ultimate + super_rare))
result = cid.keys()
If you don't need the original order, you could try to convert cid as a dict, it's superfast to remove key from a hashmap, the code would be something like:
cid = dict.fromkeys(range(407))
_ = map(cid.pop, set(ultimate + super_rare))
result = cid.keys()
Apart from the dictionary method, you can also try to convert everything into a set variable like the following:
result = set(range(407)) - set(ultimate) - set(super_rare)
Hope it helps.
You can create your target list that includes all the numbers from the target range, but without those numbers from ultimate and super_rare list, by list comprehension:
my_filtered_list = [i for i in range(1, 408) if i not in ultimate and i not in super_rare]
print(my_filtered_list)
Make a union set of both sets of numbers you want to exclude.
>>> su = set(ultimate) | set(super_rare)
Then filter the input list based on whether the value is not present in the set.
>>> list(filter(lambda i: i not in su, cid))
[1, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 16, 17, 20, 21, 22, 24, 25, 26, 27, 28,
29, 30, 31, 32, 34, 38, 39, 40, 42, 43, 44, 45, 46, 49, 50, 52, 53, 55, 58, 59,
60, 61, 62, 64, 66, 67, 70, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 84, 87,
91, 92, 94, 96, 97, 98, 99, 100, 104, 105, 106, 109, 110, 111, 112, 114, 115,
116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 127, 128, 131, 132, 133, 135,
136, 139, 140, 141, 143, 144, 145, 146, 148, 150, 151, 152, 154, 155, 156, 157,
158, 159, 162, 163, 164, 165, 166, 167, 168, 172, 173, 176, 177, 178, 182, 184,
186, 187, 189, 190, 192, 194, 195, 196, 198, 199, 203, 205, 206, 207, 208, 209,
210, 213, 216, 218, 220, 223, 224, 225, 227, 228, 229, 230, 231, 232, 233, 235,
236, 237, 239, 240, 241, 242, 243, 245, 246, 247, 249, 251, 252, 254, 255, 256,
258, 260, 261, 262, 264, 265, 266, 267, 268, 269, 270, 271, 273, 275, 277, 278,
279, 280, 281, 282, 283, 285, 286, 288, 289, 291, 293, 294, 295, 296, 299, 300,
301, 302, 304, 306, 307, 308, 309, 311, 314, 316, 317, 318, 319, 320, 321, 322,
323, 324, 325, 326, 327, 331, 332, 333, 335, 336, 337, 338, 339, 344, 345, 346,
348, 350, 352, 353, 354, 355, 356, 358, 360, 361, 362, 363, 364, 365, 369, 370,
372, 374, 376, 378, 379, 382, 383, 384, 386, 387, 388, 389, 390, 391, 392, 393,
395, 396, 398, 401, 402, 405, 407]
If you don't want to use filter, just use a list comprehension.
>>> [v for v in cid if v not in su]
You could also do the whole thing with sets like
>>> list(set(cid) - (set(ultimate) | set(super_rare)))
Others have suggested this already (I take no credit). I'm not sure how guaranteed it is to come back in the right order. Seems to be ok on my py2 and py3, but doing the last step as a list will give you the order absolutely guaranteed (not as an implementation detail) and wont need converting back to a list as a final step.
If you want to see the changes in the original list, you can just assign back to the original variable.
cid = [v for v in cid if v not in su]
This is assigning a different list to the same variable though, so other holders of references to that list won't see the changes. You can call id(cid) before and after the assignment to see that its a different list.
If you wanted to assign back to the exact same list instance you can use
cid[:] = [v for v in cid if v not in su]
and the id will remain the same.
I am using OpenCV 2.4.9 Python knnMatch where the query descriptors come directly from detectAndCompute and are formatted correctly, but the train descriptors will come from a list I made in a different program.
When I get the descriptors from my other program, they look like:
[array([ 14, 21, 234, 147, 215, 115, 190, 215, 94, 231, 31, 34, 200,
124, 127, 104, 255, 123, 179, 147, 180, 240, 61, 226, 111, 95,
159, 131, 151, 127, 253, 231], dtype=uint8), array([162, 150, 101, 219, 117, 151, 173, 113, 93, 29, 81, 23, 232,
13, 60, 133, 221, 2, 147, 165, 242, 188, 120, 221, 39, 26,
154, 194, 87, 140, 245, 252], dtype=uint8)]
That would be 2 descriptors.
How can I format these so I do not get the "OpenCV Error: Unsupported format or combination of formats" error when matching these descriptors with those coming straight out of detectAndCompute? I have tried using np.asarray(list, np.float32) to no avail. If I do:
[[d for d in des] for des in list] with list as the train descriptors then the two lists will LOOK the same but I get the same error!
list = [[d for d in des] for des in list]
list = np.asarray(list, np.uint8)
for d in list:
for x in d:
x = x.astype(np.uint8)
I have written a small Cython tool for in-place sorting of structures exposing the buffer protocol in Python. It's a work in progress; please forgive any mistakes. This is just for me to learn.
In my set of unit tests, I am working on testing the in-place sort across many different kinds of buffer-exposing data structures, each with many types of underlying data contained in them. I can verify it is working as expected for most cases, but the case of bytearray is very peculiar.
If you take it for granted that my imported module b in the code below is just performing a straightforward heap sort in Cython, in-place on the bytearray, then the following code sample shows the issue:
In [42]: a #NumPy array
Out[42]: array([ 9, 148, 115, 208, 243, 197], dtype=uint8)
In [43]: byt = bytearray(a)
In [44]: byt
Out[44]: bytearray(b'\t\x94s\xd0\xf3\xc5')
In [45]: list(byt)
Out[45]: [9, 148, 115, 208, 243, 197]
In [46]: byt1 = copy.deepcopy(byt)
In [47]: b.heap_sort(byt1)
In [48]: list(byt1)
Out[48]: [148, 197, 208, 243, 9, 115]
In [49]: list(bytearray(sorted(byt)))
Out[49]: [9, 115, 148, 197, 208, 243]
What you can see is that when using sorted, the values are iterated and treated like Python integers for the purpose of sorting, then placed back into a new bytearray.
But the in-place sort, in line 47-48 shows that the bytes are being interpreted as signed integers, and are sorted by their 2's complement value, putting number >= 128, since they are negative, towards the left.
I can confirm it by running over the whole range 0-255:
In [50]: byt = bytearray(range(0,256))
In [51]: b.heap_sort(byt)
In [52]: list(byt)
Out[52]:
[128,
129,
130,
131,
132,
133,
134,
135,
136,
137,
138,
139,
140,
141,
142,
143,
144,
145,
146,
147,
148,
149,
150,
151,
152,
153,
154,
155,
156,
157,
158,
159,
160,
161,
162,
163,
164,
165,
166,
167,
168,
169,
170,
171,
172,
173,
174,
175,
176,
177,
178,
179,
180,
181,
182,
183,
184,
185,
186,
187,
188,
189,
190,
191,
192,
193,
194,
195,
196,
197,
198,
199,
200,
201,
202,
203,
204,
205,
206,
207,
208,
209,
210,
211,
212,
213,
214,
215,
216,
217,
218,
219,
220,
221,
222,
223,
224,
225,
226,
227,
228,
229,
230,
231,
232,
233,
234,
235,
236,
237,
238,
239,
240,
241,
242,
243,
244,
245,
246,
247,
248,
249,
250,
251,
252,
253,
254,
255,
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99,
100,
101,
102,
103,
104,
105,
106,
107,
108,
109,
110,
111,
112,
113,
114,
115,
116,
117,
118,
119,
120,
121,
122,
123,
124,
125,
126,
127]
I know this is difficult to reproduce. You can build the linked package with Cython if you want, and then import src.buffersort as b to get the same sort functions I am using.
I've tried reading through the source code for bytearray in Objects/bytearrayobject.c, but I see some references to long and a few calls to PyInt_FromLong ...
This makes me suspect that the underlying C-level data of a bytearray is represented as a signed integer in C, but the conversion to Python int from raw bytes means it is unsigned between 0 and 255 in Python. I can only assume this is true ... though I don't see why Python should interpret the C long as unsigned, unless that is merely a convention for bytearray that I didn't see in the code. But if so, why wouldn't an unsigned integer be used on the C side as well, if the bytes are always treated by Python as unsigned?
If true, what should be considered the "right" result of the in-place sort? Since they are "just bytes" either interpretation is valid, I guess, but in Python spirit I think their should be one way which is considered the standard.
To match output of sorted, will it be sufficient on the C side to cast values to unsigned long when dealing with bytearray?
Does Python bytearray use signed integers in the C representation?
It uses chars. Whether those are signed depends on the compiler. You can see this in Include/bytearrayobject.h. Here's the 2.7 version:
/* Object layout */
typedef struct {
PyObject_VAR_HEAD
/* XXX(nnorwitz): should ob_exports be Py_ssize_t? */
int ob_exports; /* how many buffer exports */
Py_ssize_t ob_alloc; /* How many bytes allocated */
char *ob_bytes;
} PyByteArrayObject;
and here's the 3.5 version:
typedef struct {
PyObject_VAR_HEAD
Py_ssize_t ob_alloc; /* How many bytes allocated in ob_bytes */
char *ob_bytes; /* Physical backing buffer */
char *ob_start; /* Logical start inside ob_bytes */
/* XXX(nnorwitz): should ob_exports be Py_ssize_t? */
int ob_exports; /* How many buffer exports */
} PyByteArrayObject;
If true, what should be considered the "right" result of the in-place sort?
A Python bytearray represents a sequence of integers in the range 0 <= elem < 256, regardless of whether the compiler considers chars to be signed. You should probably sort it as a sequence of integers in the range 0 <= elem < 256, rather than as a sequence of signed chars.
To match output of sorted, will it be sufficient on the C side to cast values to unsigned long when dealing with bytearray?
I don't know enough about Cython to say what the correct code change would be.
I have a problem with this algorithm:
function crc16 = crc16eval(D)
% CRC16EVAL CRC-CCITT check with the polynomial: x^16+x^12+x^5+1
D = uint16(D);
crchi = 255;
crclo = 255;
t = '00102030405060708191a1b1c1d1e1f112023222524272629383b3a3d3c3f3e32434041464744454a5b58595e5f5c5d53626160676665646b7a79787f7e7d7c74858687808182838c9d9e9f98999a9b95a4a7a6a1a0a3a2adbcbfbeb9b8bbbab6c7c4c5c2c3c0c1cedfdcdddadbd8d9d7e6e5e4e3e2e1e0effefdfcfbfaf9f8f9181b1a1d1c1f1e110003020504070608393a3b3c3d3e3f30212223242526272b5a59585f5e5d5c53424140474645444a7b78797e7f7c7d72636061666764656d9c9f9e99989b9a95848786818083828cbdbebfb8b9babbb4a5a6a7a0a1a2a3afdedddcdbdad9d8d7c6c5c4c3c2c1c0cefffcfdfafbf8f9f6e7e4e5e2e3e0e1e';
crc16htab = hex2dec(reshape(t,2,length(t)/2)');
t = '0021426384a5c6e708294a6b8cadceef31107352b594f7d639187b5abd9cffde62432001e6c7a4856a4b2809eecfac8d53721130d7f695b45b7a1938dffe9dbcc4e586a740610223cced8eaf48690a2bf5d4b79671503312fddcbf9e79583b1aa687e4c522036041ae8feccd2a0b684997b6d5f4133251709fbeddfc1b3a597888a9caeb0c2d4e6f80a1c2e304254667b998fbda3d1c7f5eb190f3d235147756eacba8896e4f2c0de2c3a08166472405dbfa99b85f7e1d3cd3f291b0577615344c6d0e2fc8e98aab44650627c0e182a37d5c3f1ef9d8bb9a75543716f1d0b3922e0f6c4daa8be8c926076445a283e0c11f3e5d7c9bbad9f81736557493b2d1f0';
crc16ltab = hex2dec(reshape(t,2,length(t)/2)');
for k = 1:length(D)
ix = double(bitxor(crchi,D(k)))+1;
crchi = bitxor(crclo,crc16htab(ix));
crclo = crc16ltab(ix);
end
crc16 = crchi*256+crclo;
end
I need translate that code to Python, and I have done the next:
def crc16eval(D):
crchi = 255
crclo = 255
t = '00102030405060708191a1b1c1d1e1f112023222524272629383b3a3d3c3f3e32434041464744454a5b58595e5f5c5d53626160676665646b7a79787f7e7d7c74858687808182838c9d9e9f98999a9b95a4a7a6a1a0a3a2adbcbfbeb9b8bbbab6c7c4c5c2c3c0c1cedfdcdddadbd8d9d7e6e5e4e3e2e1e0effefdfcfbfaf9f8f9181b1a1d1c1f1e110003020504070608393a3b3c3d3e3f30212223242526272b5a59585f5e5d5c53424140474645444a7b78797e7f7c7d72636061666764656d9c9f9e99989b9a95848786818083828cbdbebfb8b9babbb4a5a6a7a0a1a2a3afdedddcdbdad9d8d7c6c5c4c3c2c1c0cefffcfdfafbf8f9f6e7e4e5e2e3e0e1e'
# crc16htab = hex2dec(reshape(t,2,length(t)/2)');
tarray = [int(n, 16) for n in t] # Recorro el string t, y por cada caracter creo un nuevo entero en el array
crc16htab = reshape(tarray, (2, (len(t)/2) )).transpose()
#print crc16htab
t = '0021426384a5c6e708294a6b8cadceef31107352b594f7d639187b5abd9cffde62432001e6c7a4856a4b2809eecfac8d53721130d7f695b45b7a1938dffe9dbcc4e586a740610223cced8eaf48690a2bf5d4b79671503312fddcbf9e79583b1aa687e4c522036041ae8feccd2a0b684997b6d5f4133251709fbeddfc1b3a597888a9caeb0c2d4e6f80a1c2e304254667b998fbda3d1c7f5eb190f3d235147756eacba8896e4f2c0de2c3a08166472405dbfa99b85f7e1d3cd3f291b0577615344c6d0e2fc8e98aab44650627c0e182a37d5c3f1ef9d8bb9a75543716f1d0b3922e0f6c4daa8be8c926076445a283e0c11f3e5d7c9bbad9f81736557493b2d1f0'
# crc16ltab = hex2dec(reshape(t,2,length(t)/2)');
tarray = [int(n, 16) for n in t] # Recorro el string t, y por cada caracter creo un nuevo entero en el array
crc16ltab = reshape(tarray, (2, (len(t)/2))).transpose()
#print crc16ltab
for k in range(len(D)):
ix = crchi ^ D[k]
crchi = crclo ^ crc16htab[ix]
crclo = crc16ltab[ix]
return crchi*256+crclo
My problem is the next:
When I execute de Python code, It's take a loooong time to calculate de xor, I think the the problem is that
crclo = crc16ltab[ix]
is a matrix and that's take a long time to calculate. Which is the problem?
The pseudo-code of this algorithm is the next:
The algorithm for the CRC-CCITT is below described. Note that all operations are on bytes.
A = new byte
B = temp byte
CRCHI = High byte (most significant) of the 16-bit CRC
CRCLO = Low byte (least significant) of the 16-bit CRC
START:
FOR A = FIRST_BYTE TO LAST_BYTE IN BLOCK DO:
A = A XOR CRCHI
CRCHI = A
SHIFT A RIGHT FOUR TIMES (ZERO FILL)
A = A XOR CRCHI {IJKLMNOP}
CRCHI = CRCLO { swap CRCHI, CRCLO }
CRCLO = A
ROTATE A LEFT 4 TIMES {MNOPIJKL}
B=A { temp save }
ROTATE A LEFT ONCE {NOPIJKLM}
A = A AND $1F {000IJLLM}
CRCHI = A XOR CRCHI
A = B AND $F0 {MNOP0000}
CRCHI = A XOR CRCHI { CRCHI complete }
ROTATE B LEFT ONCE {NOP0000M}
B = B AND $ E0 {NOP00000}
CRCLO = B XOR CRCLO { CRCLO complete }
DOEND;
FINISH.
My question is: Why my python code take long time to execute? What is wrong? The problem I think is in
for k in range(len(D)):
ix = crchi ^ D[k]
crchi = crclo ^ crc16htab[ix]
crclo = crc16ltab[ix]
Thanks a lot!
I recommend a read of Ross Williams, A painless guide to CRC algorigthms which will teach you everything you ever wanted to know about CRC's and how to calculate them quickly.
Here is a conversion of the CCITT CRC algorithm used in the linux kernel. It may or may not be the same as what you are calculating as (if you read the above) you'll realise that there are quite a lot of knobs to twiddle when calculating CRCs.
# This mysterious table is just the CRC of each possible byte. It can be
# computed using the standard bit-at-a-time methods. The polynomial can
# be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
# Add the implicit x^16, and you have the standard CRC-CCITT.
# From linux kernel lib/crc-ccitt.c
_crc_table = (
0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
)
def update_crc(data, crc, table=_crc_table):
"""
Add a byte to the crc calculation
"""
return (crc >> 8) ^ table[(crc ^ data) & 0xff]
def calculate_crc(data, crc=0xFFFF, table=_crc_table):
"""
Calculates the CRC for the data string passed in
"""
for c in data:
crc = update_crc(ord(c), crc, table)
return crc
print "%04X" % calculate_crc("Hello")
If you need to calculate CRC16 only (just result, not code), you could use PyCRC or CRC-16.
This was the final solution, too late to post here maybe... But I think that maybe It's useful for someone.
# CRC16EVAL CRC-CCITT check with the polynomial: x^16+x^12+x^5+1
def crc16eval(D):
crchi = 255
crclo = 255
crc16htab = [0, 16, 32, 48, 64, 80, 96, 112, 129, 145, 161, 177, 193, 209, 225, 241, 18, 2, 50, 34, 82, 66, 114, 98, 147, 131, 179, 163, 211, 195, 243, 227, 36, 52, 4, 20, 100, 116, 68, 84, 165, 181, 133, 149, 229, 245, 197, 213, 54, 38, 22, 6, 118, 102, 86, 70, 183, 167, 151, 135, 247, 231, 215, 199, 72, 88, 104, 120, 8, 24, 40, 56, 201, 217, 233, 249, 137, 153, 169, 185, 90, 74, 122, 106, 26, 10, 58, 42, 219, 203, 251, 235, 155, 139, 187, 171, 108, 124, 76, 92, 44, 60, 12, 28, 237, 253, 205, 221, 173, 189, 141, 157, 126, 110, 94, 78, 62, 46, 30, 14, 255, 239, 223, 207, 191, 175, 159, 143, 145, 129, 177, 161, 209, 193, 241, 225, 16, 0, 48, 32, 80, 64, 112, 96, 131, 147, 163, 179, 195, 211, 227, 243, 2, 18, 34, 50, 66, 82, 98, 114, 181, 165, 149, 133, 245, 229, 213, 197, 52, 36, 20, 4, 116, 100, 84, 68, 167, 183, 135, 151, 231, 247, 199, 215, 38, 54, 6, 22, 102, 118, 70, 86, 217, 201, 249, 233, 153, 137, 185, 169, 88, 72, 120, 104, 24, 8, 56, 40, 203, 219, 235, 251, 139, 155, 171, 187, 74, 90, 106, 122, 10, 26, 42, 58, 253, 237, 221, 205, 189, 173, 157, 141, 124, 108, 92, 76, 60, 44, 28, 12, 239, 255, 207, 223, 175, 191, 143, 159, 110, 126, 78, 94, 46, 62, 14, 30]
crc16ltab = [0, 33, 66, 99, 132, 165, 198, 231, 8, 41, 74, 107, 140, 173, 206, 239, 49, 16, 115, 82, 181, 148, 247, 214, 57, 24, 123, 90, 189, 156, 255, 222, 98, 67, 32, 1, 230, 199, 164, 133, 106, 75, 40, 9, 238, 207, 172, 141, 83, 114, 17, 48, 215, 246, 149, 180, 91, 122, 25, 56, 223, 254, 157, 188, 196, 229, 134, 167, 64, 97, 2, 35, 204, 237, 142, 175, 72, 105, 10, 43, 245, 212, 183, 150, 113, 80, 51, 18, 253, 220, 191, 158, 121, 88, 59, 26, 166, 135, 228, 197, 34, 3, 96, 65, 174, 143, 236, 205, 42, 11, 104, 73, 151, 182, 213, 244, 19, 50, 81, 112, 159, 190, 221, 252, 27, 58, 89, 120, 136, 169, 202, 235, 12, 45, 78, 111, 128, 161, 194, 227, 4, 37, 70, 103, 185, 152, 251, 218, 61, 28, 127, 94, 177, 144, 243, 210, 53, 20, 119, 86, 234, 203, 168, 137, 110, 79, 44, 13, 226, 195, 160, 129, 102, 71, 36, 5, 219, 250, 153, 184, 95, 126, 29, 60, 211, 242, 145, 176, 87, 118, 21, 52, 76, 109, 14, 47, 200, 233, 138, 171, 68, 101, 6, 39, 192, 225, 130, 163, 125, 92, 63, 30, 249, 216, 187, 154, 117, 84, 55, 22, 241, 208, 179, 146, 46, 15, 108, 77, 170, 139, 232, 201, 38, 7, 100, 69, 162, 131, 224, 193, 31, 62, 93, 124, 155, 186, 217, 248, 23, 54, 85, 116, 147, 178, 209, 240]
for k in range(len(D)):
ix = crchi ^ D[k]
crchi = crclo ^ crc16htab[ix]
crclo = crc16ltab[ix]
return crchi*256+crclo
Antonio.