Related
I am working with cross_validate using several scoring methods. The results variable is a list of dicts. I am trying to access the F1-scores for the different models using results['test_f1_score'] but I am getting a TypeError: list indices must be integers or slices, not tuple. What is best way to get the F1 scores from the list?
Here is the list generated from the results variable:
[{'fit_time': array([3.25437379, 3.31360006, 4.55807948, 4.13171291, 4.19844866,
5.00323677, 4.02107215, 4.18458176, 4.82133555, 4.25998092,
3.76690507, 3.97073889, 4.82378697, 4.32887745, 4.66021585,
4.54414296, 5.0830574 , 4.85126257, 5.40511918, 5.04520845,
5.11681008, 4.93635201, 4.74664378, 5.08661175, 4.32240748,
4.10375977, 4.31480002, 4.04965734, 3.78688979, 3.88911653]),
'score_time': array([0.14319539, 0.12677526, 0.10101032, 0.09385777, 0.06942415,
0.11760497, 0.06548786, 0.05601525, 0.07890821, 0.07988477,
0.06272411, 0.07337427, 0.07825518, 0.04193068, 0.04330111,
0.04155302, 0.05201483, 0.0699327 , 0.05248165, 0.04978824,
0.05156302, 0.05214596, 0.07210374, 0.06283927, 0.04857302,
0.05679178, 0.04323983, 0.04890585, 0.04407573, 0.05073118]),
'test_f1_score': array([0.03381643, 0.06428571, 0.01939058, 0.02870813, 0.05673759,
0.05128205, 0.06306306, 0.01066667, 0.08275862, 0.0180791 ,
0.03755869, 0.04013378, 0.04255319, 0.07619048, 0.04494382,
0.08181818, 0.02181818, 0.02171291, 0.03367003, 0.04195804,
0.01532567, 0.05687204, 0.0591716 , 0.05825243, 0.07659574,
0.04848485, 0.01724138, 0.02247191, 0.01233046, 0.01920439]),
'test_f05_measure': array([0.02168525, 0.04174397, 0.01229796, 0.01840491, 0.03683241,
0.03355705, 0.04137116, 0.00676133, 0.05576208, 0.01143511,
0.02386635, 0.02599653, 0.02873563, 0.05012531, 0.02985075,
0.05369928, 0.01390176, 0.01374465, 0.02181501, 0.02722323,
0.00967586, 0.03740648, 0.03816794, 0.03836317, 0.05011136,
0.0312989 , 0.01089918, 0.01447178, 0.00780762, 0.01217815]),
'test_sensitivity': array([0.5 , 0.64285714, 0.5 , 0.42857143, 0.57142857,
0.42857143, 0.5 , 0.28571429, 0.42857143, 0.57142857,
0.85714286, 0.42857143, 0.21428571, 0.57142857, 0.28571429,
0.64285714, 0.42857143, 0.64285714, 0.35714286, 0.42857143,
0.57142857, 0.42857143, 0.71428571, 0.42857143, 0.64285714,
0.57142857, 0.57142857, 0.28571429, 0.35714286, 0.5 ]),
'test_specificity': array([0.96783961, 0.9789689 , 0.94263502, 0.96743044, 0.97872166,
0.98248629, 0.98355021, 0.9400933 , 0.98977003, 0.92937229,
0.94983633, 0.97716858, 0.9898527 , 0.98461538, 0.98690564,
0.98387757, 0.95662493, 0.93403716, 0.97724855, 0.97823062,
0.91636661, 0.98436989, 0.97430442, 0.98477905, 0.98264997,
0.97479335, 0.92585318, 0.97233816, 0.93518291, 0.94205745])},
{'fit_time': array([266.90850329, 276.56025481, 286.16302919, 277.08430457,
286.8862102 , 288.08904719, 288.04218554, 272.11777163,
275.47355413, 272.80465865, 277.46604395, 264.88494945,
276.60775948, 279.0381875 , 277.55336189, 279.63098645,
259.44465542, 258.57707739, 262.15785336, 263.71264052,
270.20684457, 269.78548741, 269.23725843, 265.4925046 ,
221.50381541, 220.98780727, 224.46779513, 212.47151136,
219.48664856, 215.95075083]),
'score_time': array([10.18801832, 8.01537871, 3.33346009, 7.78975368, 2.98581243,
2.70479202, 2.86861777, 8.02667737, 9.41659141, 10.13697696,
6.73046923, 9.69650173, 4.27995682, 3.4434793 , 3.63535523,
4.40075231, 10.22509551, 10.44048429, 9.14734888, 7.9576447 ,
3.13996601, 4.10830784, 2.80078673, 2.92972565, 6.48999166,
6.47848701, 3.41430521, 6.733845 , 1.51497555, 2.96808743]),
'test_f1_score': array([0.63636364, 0.92307692, 0.96296296, 0.75 , 0.92857143,
0.83333333, 0.92307692, 0.83333333, 0.92307692, 0.92857143,
0.75 , 0.72727273, 0.8 , 0.96296296, 0.88 ,
0.88888889, 0.92307692, 0.92307692, 0.88888889, 0.88 ,
0.83333333, 0.88888889, 0.96551724, 0.88 , 0.96296296,
0.88 , 0.92307692, 0.75 , 0.83333333, 0.66666667]),
'test_f05_measure': array([0.76086957, 0.96774194, 0.98484848, 0.83333333, 0.92857143,
0.92592593, 0.96774194, 0.92592593, 0.96774194, 0.92857143,
0.83333333, 0.86956522, 0.86206897, 0.98484848, 0.94827586,
0.90909091, 0.96774194, 0.96774194, 0.90909091, 0.94827586,
0.92592593, 0.90909091, 0.94594595, 0.94827586, 0.98484848,
0.94827586, 0.96774194, 0.83333333, 0.92592593, 0.83333333]),
'test_sensitivity': array([0.5 , 0.85714286, 0.92857143, 0.64285714, 0.92857143,
0.71428571, 0.85714286, 0.71428571, 0.85714286, 0.92857143,
0.64285714, 0.57142857, 0.71428571, 0.92857143, 0.78571429,
0.85714286, 0.85714286, 0.85714286, 0.85714286, 0.78571429,
0.71428571, 0.85714286, 1. , 0.78571429, 0.92857143,
0.78571429, 0.85714286, 0.64285714, 0.71428571, 0.5 ]),
'test_specificity': array([0.99991817, 1. , 1. , 0.99991817, 0.99991816,
1. , 1. , 1. , 1. , 0.99991816,
0.99991817, 1. , 0.99991817, 1. , 1. ,
0.99991816, 1. , 1. , 0.99991816, 1. ,
1. , 0.99991817, 0.99991817, 1. , 1. ,
1. , 1. , 0.99991816, 1. , 1. ])},
{'fit_time': array([174.20313787, 198.07899594, 184.28722906, 178.60294867,
199.02107191, 179.50637341, 174.12752724, 178.98297739,
154.63025308, 167.98617458, 178.08862185, 168.87556529,
154.76291323, 163.30928159, 142.50800228, 165.50385952,
168.07484317, 161.5811913 , 157.46719265, 153.52208304,
171.98308134, 158.28413796, 141.83335066, 168.22751117,
143.11924505, 133.29637599, 141.15710378, 150.20137 ,
149.12544584, 130.72285938]),
'score_time': array([10.3494947 , 18.17751551, 7.69723105, 14.2464962 , 19.54308391,
7.40629816, 13.88110065, 14.76199102, 11.11700416, 9.4580555 ,
12.19720626, 11.57381821, 15.72216558, 10.20245957, 10.69855881,
13.65211844, 14.3795464 , 16.48296714, 10.02136064, 11.43304801,
15.07670116, 12.36216903, 9.41213584, 11.64626813, 8.78664351,
11.56165099, 8.66782999, 7.65322256, 8.58487797, 8.68105125]),
'test_f1_score': array([0.00107335, 0.00068248, 0.00094757, 0.00183083, 0.00252127,
0.00114827, 0.00064725, 0.00101868, 0.00189095, 0.00243717,
0.00251467, 0.00230814, 0.00161264, 0.00114833, 0.00164609,
0.00261584, 0.00213311, 0.00060588, 0.00067877, 0.00191205,
0.00125274, 0.00163043, 0.00184945, 0.00174004, 0.00291333,
0.00147438, 0.00357782, 0.00063331, 0.00130506, 0.00108421]),
'test_f05_measure': array([0.0006721 , 0.00042731, 0.00059321, 0.00114647, 0.00157841,
0.00071875, 0.00040591, 0.00063781, 0.00118381, 0.00152567,
0.00157398, 0.00144697, 0.00100961, 0.00071915, 0.00103029,
0.00163812, 0.00133618, 0.00037988, 0.00042499, 0.00119904,
0.00078425, 0.00102194, 0.00115816, 0.00108951, 0.00182432,
0.00092328, 0.00224316, 0.00039714, 0.00081706, 0.00067892]),
'test_sensitivity': array([0.21428571, 0.14285714, 0.21428571, 0.35714286, 0.57142857,
0.28571429, 0.07142857, 0.21428571, 0.42857143, 0.57142857,
0.64285714, 0.28571429, 0.35714286, 0.21428571, 0.42857143,
0.5 , 0.35714286, 0.07142857, 0.14285714, 0.21428571,
0.28571429, 0.21428571, 0.35714286, 0.35714286, 0.57142857,
0.28571429, 0.42857143, 0.07142857, 0.28571429, 0.21428571]),
'test_specificity': array([0.54394435, 0.52168576, 0.48322422, 0.55458265, 0.48244537,
0.43129552, 0.74834274, 0.5193551 , 0.48228169, 0.46452247,
0.41612111, 0.71783961, 0.49410802, 0.57381342, 0.40502496,
0.56371225, 0.61789017, 0.73107456, 0.51902774, 0.74457812,
0.47888707, 0.7002455 , 0.55908347, 0.53126023, 0.55233653,
0.55741059, 0.72714625, 0.74277764, 0.4997954 , 0.54849006])}]
You have to iterate over your list:
def get_values(res, k)
out = []
for v in res:
if k in v
out.append(v[k])
return out
out = get_values(results, 'test_f1_score')
Or single line:
out = [v['test_f1_score'] for v in results if 'test_f1_score' in v]
Use list comprehension to go over the list items, and get for each dictionary it's 'test_f1_score':
Going over the results : for item in results
Validating current result has 'test_f1_score' as key : if 'test_f1_score' in item.keys()
2.1. If it has 'test_f1_score' add it to list : item['test_f1_score']
f1_scores = [item['test_f1_score'] for item in results if 'test_f1_score' in item.keys()]
Output:
[array([0.03381643, 0.06428571, 0.01939058, 0.02870813, 0.05673759,
0.05128205, 0.06306306, 0.01066667, 0.08275862, 0.0180791 ,
0.03755869, 0.04013378, 0.04255319, 0.07619048, 0.04494382,
0.08181818, 0.02181818, 0.02171291, 0.03367003, 0.04195804,
0.01532567, 0.05687204, 0.0591716 , 0.05825243, 0.07659574,
0.04848485, 0.01724138, 0.02247191, 0.01233046, 0.01920439]), array([0.63636364, 0.92307692, 0.96296296, 0.75 , 0.92857143,
0.83333333, 0.92307692, 0.83333333, 0.92307692, 0.92857143,
0.75 , 0.72727273, 0.8 , 0.96296296, 0.88 ,
0.88888889, 0.92307692, 0.92307692, 0.88888889, 0.88 ,
0.83333333, 0.88888889, 0.96551724, 0.88 , 0.96296296,
0.88 , 0.92307692, 0.75 , 0.83333333, 0.66666667]), array([0.00107335, 0.00068248, 0.00094757, 0.00183083, 0.00252127,
0.00114827, 0.00064725, 0.00101868, 0.00189095, 0.00243717,
0.00251467, 0.00230814, 0.00161264, 0.00114833, 0.00164609,
0.00261584, 0.00213311, 0.00060588, 0.00067877, 0.00191205,
0.00125274, 0.00163043, 0.00184945, 0.00174004, 0.00291333,
0.00147438, 0.00357782, 0.00063331, 0.00130506, 0.00108421])]
I have a dict of numpy arrays :
{'data1': array([[0.16461831, 0.82400555],
[0.02958593, 0.483629 ],
[0.51268564, 0.07030046],
[0.17027816, 0.35304705]]),
'data2': array([[0.8292598 , 0.78136548],
[0.30389913, 0.69250432],
[0.66608852, 0.42237639],
[0.72678807, 0.40486951]]),
'data3': array([[0.45614633, 0.96677904],
[0.87066105, 0.75826116],
[0.39431988, 0.73041888],
[0.65685809, 0.65498308]])}
Expected output :
[([0.16461831, 0.82400555], [0.8292598 , 0.78136548], [0.45614633, 0.96677904]),
([0.02958593, 0.483629 ], [0.66608852, 0.42237639], [0.87066105, 0.75826116]),
([0.51268564, 0.07030046], [0.66608852, 0.42237639], [0.39431988, 0.73041888]),
([0.17027816, 0.35304705], [0.72678807, 0.40486951], [0.65685809, 0.65498308])]
But when I am trying with zip :
list(zip(data.values()))
Getting this output:
[(array([[0.16461831, 0.82400555],
[0.02958593, 0.483629 ],
[0.51268564, 0.07030046],
[0.17027816, 0.35304705]]),),
(array([[0.8292598 , 0.78136548],
[0.30389913, 0.69250432],
[0.66608852, 0.42237639],
[0.72678807, 0.40486951]]),),
(array([[0.45614633, 0.96677904],
[0.87066105, 0.75826116],
[0.39431988, 0.73041888],
[0.65685809, 0.65498308]]),)]
How to zip list of numpy arrays?
Use
list(zip(*data.values())
Output:
[(array([0.16461831, 0.82400555]),
array([0.8292598 , 0.78136548]),
array([0.45614633, 0.96677904])),
(array([0.02958593, 0.483629 ]),
array([0.30389913, 0.69250432]),
array([0.87066105, 0.75826116])),
(array([0.51268564, 0.07030046]),
array([0.66608852, 0.42237639]),
array([0.39431988, 0.73041888])),
(array([0.17027816, 0.35304705]),
array([0.72678807, 0.40486951]),
array([0.65685809, 0.65498308]))]
If a 3D array works for you, you can just stack on the 2nd axis (axis=1):
np.stack(data.values(), axis=1)
#[[[0.16461831 0.82400555]
# [0.8292598 0.78136548]
# [0.45614633 0.96677904]]
# [[0.02958593 0.483629 ]
# [0.30389913 0.69250432]
# [0.87066105 0.75826116]]
# [[0.51268564 0.07030046]
# [0.66608852 0.42237639]
# [0.39431988 0.73041888]]
# [[0.17027816 0.35304705]
# [0.72678807 0.40486951]
# [0.65685809 0.65498308]]]
following code will create your output:
tmp = [data[d].tolist() for d in data]
tmp = list(zip(*tmp))
output:
[([0.16461831, 0.82400555], [0.8292598, 0.78136548], [0.45614633, 0.96677904]), ([0.02958593, 0.483629], [0.30389913, 0.69250432], [0.87066105, 0.75826116]), ([0.51268564, 0.07030046], [0.66608852, 0.42237639], [0.39431988, 0.73041888]), ([0.17027816, 0.35304705], [0.72678807, 0.40486951], [0.65685809, 0.65498308])]
this link will explain about * syntax
Another way:
[tuple(x) for x in np.stack(data.values(),axis=1).tolist()]
[([0.16461831, 0.82400555], [0.8292598, 0.78136548], [0.45614633, 0.96677904]),
([0.02958593, 0.483629], [0.30389913, 0.69250432], [0.87066105, 0.75826116]),
([0.51268564, 0.07030046], [0.66608852, 0.42237639], [0.39431988, 0.73041888]),
([0.17027816, 0.35304705], [0.72678807, 0.40486951], [0.65685809, 0.65498308])]
When feeding the iteration to the session ValueError: setting an array element with a sequence. occurs at _, err = sess.run([train, loss], feed_dict={image: epoch_x, label: epoch_y})
Here data is 2d matrix with first column as images and second column as one hot labels.
code fragment:
for epoch in range(epochs):
epoch_loss = 0
# data = np.random.shuffle(data)
data = shuffle(data) #sklearn.utils.shuffle
ptr = 0
for iter in range(int(n_examples/batch_size)): #batch_size = 10
epoch_data = data[ptr : ptr + batch_size] # to get the batch
epoch_x = epoch_data[:, 0] # fist col - images
epoch_y = epoch_data[:, 1] # second col - labels
ptr += batch_size
_, err = sess.run([train, loss], feed_dict={image: epoch_x, label: epoch_y}) # error occurs here.
epoch_loss += err
print("Epoch: ", epoch, " - Completed out of: ", epochs, " - Loss: ", epoch_loss)
Here epoch_x is a batch of coloured images:
[array([[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.68235294, 0.6745098 , 0.67058824],
[0.68627451, 0.68235294, 0.66666667],
[0.69019608, 0.68627451, 0.67058824]],
[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.68235294, 0.6745098 , 0.67058824],
[0.67843137, 0.67058824, 0.66666667],
[0.68235294, 0.67843137, 0.6627451 ]],
[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.6745098 , 0.66666667, 0.6627451 ],
[0.68235294, 0.6745098 , 0.67058824],
[0.68235294, 0.67843137, 0.6627451 ]],
...,
[[0.39607843, 0.30980392, 0.61568627],
[0.4 , 0.30588235, 0.61176471],
[0.39215686, 0.29411765, 0.60784314],
...,
[0.24705882, 0.18431373, 0.49019608],
[0.23921569, 0.17254902, 0.49019608],
[0.23137255, 0.15294118, 0.47843137]],
[[0.39215686, 0.29019608, 0.61568627],
[0.38039216, 0.27843137, 0.60392157],
[0.38431373, 0.28235294, 0.61176471],
...,
[0.28235294, 0.21960784, 0.5254902 ],
[0.27058824, 0.19607843, 0.51372549],
[0.2745098 , 0.2 , 0.51764706]],
[[0.39215686, 0.28235294, 0.63137255],
[0.39607843, 0.28627451, 0.63529412],
[0.39215686, 0.28235294, 0.63529412],
...,
[0.29411765, 0.22745098, 0.54509804],
[0.3254902 , 0.25098039, 0.56078431],
[0.30588235, 0.23137255, 0.54117647]]])
array([[[0.85098039, 0.89803922, 0.91372549],
[0.85490196, 0.90196078, 0.91764706],
[0.85490196, 0.90196078, 0.91764706],
...,
[0.09411765, 0.10588235, 0.09803922],
[0.10196078, 0.11372549, 0.10588235],
[0.11372549, 0.1254902 , 0.11764706]],
[[0.85882353, 0.90588235, 0.92156863],
[0.85882353, 0.90588235, 0.92156863],
[0.8627451 , 0.90980392, 0.9254902 ],
...,
[0.10980392, 0.11764706, 0.11764706],
[0.11764706, 0.1254902 , 0.1254902 ],
[0.11372549, 0.12156863, 0.12156863]],
[[0.85882353, 0.90588235, 0.92156863],
[0.85882353, 0.90588235, 0.92156863],
[0.8627451 , 0.90980392, 0.9254902 ],
...,
[0.14901961, 0.14509804, 0.15294118],
[0.15294118, 0.14901961, 0.16470588],
[0.14117647, 0.1372549 , 0.15294118]],
...,
[[0.55294118, 0.52941176, 0.47843137],
[0.56470588, 0.54117647, 0.49019608],
[0.58823529, 0.55686275, 0.50588235],
...,
[0.12941176, 0.09803922, 0.10196078],
[0.12941176, 0.09803922, 0.10196078],
[0.1254902 , 0.09411765, 0.09803922]],
[[0.60392157, 0.57647059, 0.51764706],
[0.58823529, 0.56078431, 0.50196078],
[0.60784314, 0.58039216, 0.52156863],
...,
[0.13333333, 0.10196078, 0.10588235],
[0.13333333, 0.10196078, 0.10588235],
[0.13333333, 0.10196078, 0.10588235]],
[[0.60784314, 0.58039216, 0.52156863],
[0.60392157, 0.57647059, 0.50980392],
[0.60392157, 0.57647059, 0.50980392],
...,
[0.12941176, 0.09803922, 0.10196078],
[0.12941176, 0.09803922, 0.10196078],
[0.1254902 , 0.09411765, 0.09803922]]])
array([[[0.68627451, 0.69411765, 0.67058824],
[0.68627451, 0.69411765, 0.67058824],
[0.68627451, 0.69411765, 0.67058824],
...,
[0.6627451 , 0.67058824, 0.6745098 ],
[0.68627451, 0.69411765, 0.69803922],
[0.68627451, 0.69411765, 0.69803922]],
[[0.68627451, 0.69411765, 0.67058824],
[0.68627451, 0.69411765, 0.67058824],
[0.68627451, 0.69411765, 0.67058824],
...,
[0.67058824, 0.67843137, 0.67843137],
[0.67058824, 0.67843137, 0.67843137],
[0.69019608, 0.69803922, 0.70196078]],
[[0.68627451, 0.69411765, 0.67058824],
[0.68627451, 0.69411765, 0.67058824],
[0.68627451, 0.69411765, 0.67058824],
...,
[0.69019608, 0.70196078, 0.69411765],
[0.69019608, 0.69803922, 0.69803922],
[0.69411765, 0.70196078, 0.70196078]],
...,
[[0.18431373, 0.1254902 , 0.41568627],
[0.19607843, 0.1372549 , 0.42745098],
[0.18431373, 0.1254902 , 0.41568627],
...,
[0.03529412, 0.04313725, 0.17647059],
[0.05098039, 0.05490196, 0.20392157],
[0.08235294, 0.08235294, 0.23921569]],
[[0.20392157, 0.14509804, 0.43529412],
[0.23137255, 0.17647059, 0.45882353],
[0.21960784, 0.15294118, 0.43921569],
...,
[0.04705882, 0.05098039, 0.18431373],
[0.08235294, 0.07843137, 0.23529412],
[0.12156863, 0.11372549, 0.27843137]],
[[0.23137255, 0.17647059, 0.45882353],
[0.23921569, 0.18431373, 0.46666667],
[0.19607843, 0.12941176, 0.41568627],
...,
[0.0745098 , 0.0745098 , 0.21568627],
[0.1254902 , 0.12156863, 0.27843137],
[0.14509804, 0.13333333, 0.30980392]]])
array([[[0.8627451 , 0.80392157, 0.41176471],
[0.8627451 , 0.80392157, 0.41176471],
[0.8627451 , 0.80392157, 0.41176471],
...,
[0.77647059, 0.70980392, 0.32941176],
[0.80784314, 0.72156863, 0.32156863],
[0.82352941, 0.7372549 , 0.3254902 ]],
[[0.85490196, 0.79607843, 0.40392157],
[0.85490196, 0.79607843, 0.40392157],
[0.85490196, 0.79607843, 0.40392157],
...,
[0.77647059, 0.70588235, 0.33333333],
[0.79215686, 0.71372549, 0.31764706],
[0.82352941, 0.7372549 , 0.3254902 ]],
[[0.85490196, 0.79607843, 0.40392157],
[0.85490196, 0.79607843, 0.40392157],
[0.85490196, 0.79607843, 0.40392157],
...,
[0.76862745, 0.70196078, 0.3372549 ],
[0.78431373, 0.70196078, 0.31372549],
[0.82745098, 0.7372549 , 0.3372549 ]],
...,
[[0.68627451, 0.61960784, 0.05098039],
[0.68235294, 0.61568627, 0.04705882],
[0.68235294, 0.61568627, 0.04705882],
...,
[0.30588235, 0.31372549, 0.35294118],
[0.40392157, 0.43529412, 0.48627451],
[0.49411765, 0.5254902 , 0.59215686]],
[[0.68627451, 0.61960784, 0.05098039],
[0.68627451, 0.61960784, 0.05098039],
[0.68235294, 0.61568627, 0.04705882],
...,
[0.33333333, 0.34901961, 0.39215686],
[0.42352941, 0.45490196, 0.52156863],
[0.51764706, 0.56470588, 0.63529412]],
[[0.68627451, 0.61960784, 0.05098039],
[0.68627451, 0.61960784, 0.05098039],
[0.68235294, 0.61568627, 0.04705882],
...,
[0.39607843, 0.41960784, 0.4627451 ],
[0.45490196, 0.49411765, 0.56078431],
[0.54509804, 0.58823529, 0.66666667]]])
array([[[0.71372549, 0.71764706, 0.70980392],
[0.71372549, 0.71764706, 0.70980392],
[0.71372549, 0.71764706, 0.70980392],
...,
[0.36078431, 0.35686275, 0.34117647],
[0.41960784, 0.41960784, 0.39607843],
[0.46666667, 0.46666667, 0.44313725]],
[[0.70980392, 0.71372549, 0.70588235],
[0.71372549, 0.71764706, 0.70980392],
[0.70980392, 0.71372549, 0.70588235],
...,
[0.42745098, 0.42745098, 0.40392157],
[0.48235294, 0.48235294, 0.45882353],
[0.51764706, 0.51764706, 0.49411765]],
[[0.70980392, 0.71372549, 0.70588235],
[0.70980392, 0.71372549, 0.70588235],
[0.70980392, 0.71372549, 0.70588235],
...,
[0.45882353, 0.46666667, 0.44313725],
[0.51764706, 0.5254902 , 0.50196078],
[0.53333333, 0.54117647, 0.51764706]],
...,
[[0.44313725, 0.34509804, 0.69803922],
[0.44313725, 0.34509804, 0.69803922],
[0.43921569, 0.34117647, 0.69411765],
...,
[0.48235294, 0.36078431, 0.74901961],
[0.48235294, 0.36078431, 0.74901961],
[0.49019608, 0.36862745, 0.75686275]],
[[0.44313725, 0.34117647, 0.69411765],
[0.43921569, 0.3372549 , 0.69019608],
[0.43529412, 0.33333333, 0.68627451],
...,
[0.48235294, 0.36078431, 0.74901961],
[0.47843137, 0.35686275, 0.74509804],
[0.47843137, 0.35686275, 0.74509804]],
[[0.43137255, 0.32941176, 0.68235294],
[0.42745098, 0.3254902 , 0.67843137],
[0.42745098, 0.31372549, 0.67843137],
...,
[0.48627451, 0.36470588, 0.75294118],
[0.48627451, 0.36470588, 0.75294118],
[0.4745098 , 0.34901961, 0.74509804]]])
array([[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.69803922, 0.69019608, 0.68627451],
[0.70588235, 0.69803922, 0.69411765],
[0.70980392, 0.70196078, 0.69803922]],
[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.69803922, 0.69019608, 0.68627451],
[0.70196078, 0.69411765, 0.69019608],
[0.70588235, 0.69803922, 0.69411765]],
[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.69803922, 0.69019608, 0.68627451],
[0.69803922, 0.69019608, 0.68627451],
[0.69803922, 0.69019608, 0.68627451]],
...,
[[0.06666667, 0.04313725, 0.22745098],
[0.09019608, 0.06666667, 0.25098039],
[0.0745098 , 0.05098039, 0.24313725],
...,
[0.39607843, 0.28627451, 0.63529412],
[0.39215686, 0.29019608, 0.61960784],
[0.38823529, 0.29411765, 0.6 ]],
[[0.04705882, 0.03529412, 0.21176471],
[0.0627451 , 0.04313725, 0.21960784],
[0.07058824, 0.05098039, 0.22745098],
...,
[0.39215686, 0.28235294, 0.63529412],
[0.38431373, 0.27843137, 0.61568627],
[0.38431373, 0.28235294, 0.61176471]],
[[0.03529412, 0.02745098, 0.19215686],
[0.03921569, 0.03137255, 0.19607843],
[0.06666667, 0.04705882, 0.22352941],
...,
[0.36078431, 0.25098039, 0.60392157],
[0.36470588, 0.25490196, 0.60784314],
[0.39215686, 0.28235294, 0.63529412]]])
array([[[0.75294118, 0.75294118, 0.75294118],
[0.74509804, 0.74509804, 0.74509804],
[0.74509804, 0.74509804, 0.74509804],
...,
[0.80784314, 0.82745098, 0.82352941],
[0.80392157, 0.82352941, 0.81960784],
[0.81960784, 0.83921569, 0.83529412]],
[[0.74901961, 0.74901961, 0.74901961],
[0.74509804, 0.74509804, 0.74509804],
[0.74901961, 0.74901961, 0.74901961],
...,
[0.82745098, 0.84705882, 0.84313725],
[0.82352941, 0.84313725, 0.83921569],
[0.81960784, 0.83921569, 0.83529412]],
[[0.76078431, 0.76862745, 0.76862745],
[0.76862745, 0.76862745, 0.76862745],
[0.76862745, 0.76862745, 0.76862745],
...,
[0.82352941, 0.84313725, 0.83921569],
[0.81960784, 0.83921569, 0.83529412],
[0.82352941, 0.84313725, 0.83921569]],
...,
[[0.47058824, 0.45098039, 0.29803922],
[0.47058824, 0.45098039, 0.29803922],
[0.48235294, 0.45882353, 0.29803922],
...,
[0.48235294, 0.47058824, 0.41176471],
[0.44705882, 0.43529412, 0.37647059],
[0.4627451 , 0.45098039, 0.39215686]],
[[0.44705882, 0.43137255, 0.27058824],
[0.45098039, 0.43529412, 0.2745098 ],
[0.4627451 , 0.43921569, 0.27843137],
...,
[0.4627451 , 0.45490196, 0.38431373],
[0.45098039, 0.44313725, 0.37254902],
[0.46666667, 0.45882353, 0.38823529]],
[[0.45882353, 0.44313725, 0.28235294],
[0.42745098, 0.41176471, 0.25098039],
[0.42352941, 0.4 , 0.23921569],
...,
[0.45098039, 0.44313725, 0.37254902],
[0.46666667, 0.45882353, 0.38823529],
[0.45098039, 0.44313725, 0.37254902]]])
array([[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.69411765, 0.69803922, 0.68235294],
[0.70196078, 0.70588235, 0.69019608],
[0.69019608, 0.69411765, 0.67843137]],
[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.67843137, 0.68235294, 0.66666667],
[0.69411765, 0.69803922, 0.68235294],
[0.69803922, 0.70196078, 0.68627451]],
[[0.72156863, 0.7254902 , 0.70980392],
[0.72156863, 0.7254902 , 0.70980392],
[0.72156863, 0.7254902 , 0.70980392],
...,
[0.66666667, 0.67058824, 0.6627451 ],
[0.68235294, 0.68627451, 0.67058824],
[0.69803922, 0.70196078, 0.68627451]],
...,
[[0.20392157, 0.14117647, 0.39215686],
[0.19607843, 0.12941176, 0.39215686],
[0.09803922, 0.02745098, 0.29803922],
...,
[0.27843137, 0.21960784, 0.50980392],
[0.29411765, 0.21960784, 0.5372549 ],
[0.29411765, 0.21568627, 0.54117647]],
[[0.31764706, 0.23921569, 0.53333333],
[0.30588235, 0.22745098, 0.52941176],
[0.31764706, 0.23529412, 0.54509804],
...,
[0.28627451, 0.22352941, 0.52941176],
[0.30980392, 0.23137255, 0.55686275],
[0.30980392, 0.22745098, 0.56078431]],
[[0.37647059, 0.28235294, 0.62745098],
[0.37254902, 0.27843137, 0.62352941],
[0.35686275, 0.25882353, 0.61176471],
...,
[0.30588235, 0.23921569, 0.55686275],
[0.32156863, 0.23921569, 0.57254902],
[0.34117647, 0.25490196, 0.6 ]]])
array([[[0.80784314, 0.84313725, 0.85490196],
[0.78431373, 0.83137255, 0.83921569],
[0.79215686, 0.83529412, 0.85098039],
...,
[0.57647059, 0.59607843, 0.59215686],
[0.60784314, 0.63137255, 0.62745098],
[0.65882353, 0.68235294, 0.67843137]],
[[0.80784314, 0.84313725, 0.85490196],
[0.8 , 0.83529412, 0.84705882],
[0.8 , 0.84313725, 0.85882353],
...,
[0.52156863, 0.54117647, 0.5372549 ],
[0.61176471, 0.63137255, 0.62745098],
[0.62352941, 0.64705882, 0.64313725]],
[[0.80784314, 0.84313725, 0.85490196],
[0.80784314, 0.84313725, 0.85882353],
[0.80392157, 0.83921569, 0.85490196],
...,
[0.5254902 , 0.53333333, 0.53333333],
[0.54509804, 0.56470588, 0.56078431],
[0.63529412, 0.65490196, 0.65098039]],
...,
[[0.20784314, 0.15294118, 0.16862745],
[0.22352941, 0.16862745, 0.18431373],
[0.21176471, 0.16078431, 0.16862745],
...,
[0.76470588, 0.76470588, 0.76470588],
[0.76862745, 0.76862745, 0.76862745],
[0.76078431, 0.76862745, 0.76862745]],
[[0.21568627, 0.16078431, 0.17647059],
[0.22352941, 0.16862745, 0.18431373],
[0.21568627, 0.16470588, 0.17254902],
...,
[0.75686275, 0.76470588, 0.76470588],
[0.76078431, 0.76862745, 0.76862745],
[0.76470588, 0.77254902, 0.77254902]],
[[0.20392157, 0.14901961, 0.16470588],
[0.21176471, 0.15686275, 0.17254902],
[0.22745098, 0.17647059, 0.18431373],
...,
[0.76078431, 0.76862745, 0.76862745],
[0.76470588, 0.77254902, 0.77254902],
[0.76862745, 0.77647059, 0.77647059]]])
array([[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.69019608, 0.68235294, 0.67843137],
[0.69803922, 0.69019608, 0.68627451],
[0.70196078, 0.69411765, 0.69019608]],
[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.69019608, 0.69411765, 0.68627451],
[0.69019608, 0.69411765, 0.68627451],
[0.70196078, 0.69411765, 0.69019608]],
[[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
[0.7254902 , 0.72941176, 0.71372549],
...,
[0.69411765, 0.69803922, 0.69019608],
[0.69411765, 0.69803922, 0.69019608],
[0.69411765, 0.69803922, 0.69019608]],
...,
[[0.40392157, 0.29411765, 0.64705882],
[0.40392157, 0.30588235, 0.65098039],
[0.40784314, 0.30980392, 0.64705882],
...,
[0.06666667, 0.03529412, 0.27058824],
[0.09411765, 0.0627451 , 0.30196078],
[0.21568627, 0.18039216, 0.42745098]],
[[0.42745098, 0.32156863, 0.65882353],
[0.39607843, 0.29019608, 0.62745098],
[0.42745098, 0.3254902 , 0.65490196],
...,
[0.10980392, 0.0745098 , 0.32156863],
[0.20392157, 0.16078431, 0.43529412],
[0.3254902 , 0.27843137, 0.56078431]],
[[0.39215686, 0.27843137, 0.61176471],
[0.40392157, 0.30196078, 0.63137255],
[0.39215686, 0.29019608, 0.61568627],
...,
[0.18823529, 0.14901961, 0.40784314],
[0.29803922, 0.25098039, 0.53333333],
[0.31764706, 0.2627451 , 0.56862745]]])]
and epoch_y is batch of labels with 3 classes:
[list([0, 1, 0]) list([0, 0, 1]) list([0, 1, 0]) list([1, 0, 0])
list([0, 1, 0]) list([0, 1, 0]) list([0, 0, 1]) list([0, 1, 0])
list([0, 0, 1]) list([0, 1, 0])]
When feeding these batches of image and label I get ValueError: setting an array element with a sequence.
Okay I solved it. The reason this happened was because I pre-processed my data in another file and then saved it to a pickle format. My labels should be [] and not list().
I had loaded the data from pickle format to the model file and then converted it to a numpy array using data = np.asarray(data). I don't know what it did to cause the problem, but removing this solved the problem. My epoch_y now comes in [] and not list().
I want to try get the data from this website http://wiki.stat.ucla.edu/socr/index.php/SOCR_Data_Dinov_020108_HeightsWeights using beautiful soup and requests. Here is my code:
import requests
from bs4 import BeautifulSoup
response = requests.get("http://wiki.stat.ucla.edu/socr/index.php/SOCR_Data_Dinov_020108_HeightsWeights")
soup = BeautifulSoup(response.text, "html.parser")
list_table_data = soup.find(class_="wikitable").contents
list_tr_data = list_table_data[1::2]
print(list_tr_data)
when you print list_tr_data the output become:
[<tr>
<th>Index</th><th>Height(Inches)</th><th>Weight(Pounds)
</th></tr>, <tr>
<td>1</td><td>65.78</td><td>112.99
</td></tr>, <tr>
<td>2</td><td>71.52</td><td>136.49
</td></tr>, <tr>
<td>3</td><td>69.40</td><td>153.03
</td></tr>,...., <tr>
<td>200</td><td>71.39</td><td>127.88
</td></tr>]
I want this Height(Inches) data into a list called list_height_data, but when I trying to access using this code:
list_height_data = []
for row in list_tr_data:
list_height_data.append(row.find_all("tr"))
print(list_height_data)
this cause an empty list:
[[], [], [], [], [], [], [], [], [], [], ... []]
what should I do to get height(inches) data? If you print list_height_data and print len(list_height_data) should become:
[65.78, 71.52, 69.40, ..., 71.39]
200
You need to iterate over the td tags:
import requests
from bs4 import BeautifulSoup as soup
d = soup(requests.get('http://wiki.stat.ucla.edu/socr/index.php/SOCR_Data_Dinov_020108_HeightsWeights').text, 'html.parser')
_, *results = [[float(c.text.replace('\n', '')) for c in i.find_all('td')] for i in d.find('table', {'class':'wikitable'}).find_all('tr')]
height = [i[1] for i in results]
Output:
[65.78, 71.52, 69.4, 68.22, 67.79, 68.7, 69.8, 70.01, 67.9, 66.78, 66.49, 67.62, 68.3, 67.12, 68.28, 71.09, 66.46, 68.65, 71.23, 67.13, 67.83, 68.88, 63.48, 68.42, 67.63, 67.21, 70.84, 67.49, 66.53, 65.44, 69.52, 65.81, 67.82, 70.6, 71.8, 69.21, 66.8, 67.66, 67.81, 64.05, 68.57, 65.18, 69.66, 67.97, 65.98, 68.67, 66.88, 67.7, 69.82, 69.09, 69.91, 67.33, 70.27, 69.1, 65.38, 70.18, 70.41, 66.54, 66.36, 67.54, 66.5, 69.0, 68.3, 67.01, 70.81, 68.22, 69.06, 67.73, 67.22, 67.37, 65.27, 70.84, 69.92, 64.29, 68.25, 66.36, 68.36, 65.48, 69.72, 67.73, 68.64, 66.78, 70.05, 66.28, 69.2, 69.13, 67.36, 70.09, 70.18, 68.23, 68.13, 70.24, 71.49, 69.2, 70.06, 70.56, 66.29, 63.43, 66.77, 68.89, 64.87, 67.09, 68.35, 65.61, 67.76, 68.02, 67.66, 66.31, 69.44, 63.84, 67.72, 70.05, 70.19, 65.95, 70.01, 68.61, 68.81, 69.76, 65.46, 68.83, 65.8, 67.21, 69.42, 68.94, 67.94, 65.63, 66.5, 67.93, 68.89, 70.24, 68.27, 71.23, 69.1, 64.4, 71.1, 68.22, 65.92, 67.44, 73.9, 69.98, 69.52, 65.18, 68.01, 68.34, 65.18, 68.26, 68.57, 64.5, 68.71, 68.89, 69.54, 67.4, 66.48, 66.01, 72.44, 64.13, 70.98, 67.5, 72.02, 65.31, 67.08, 64.39, 69.37, 68.38, 65.31, 67.14, 68.39, 66.29, 67.19, 65.99, 69.43, 67.97, 67.76, 65.28, 73.83, 66.81, 66.89, 65.74, 65.98, 66.58, 67.11, 65.87, 66.78, 68.74, 66.23, 65.96, 68.58, 66.59, 66.97, 68.08, 70.19, 65.52, 67.46, 67.41, 69.66, 65.8, 66.11, 68.24, 68.02, 71.39]
I have a text file composed by different dictionaries and it looks like this:
{"destination.fqdn": "194-65-57-128.ctt.pt", "feed.provider": "MyFeed", "source.abuse_contact": "coisas#foo.com", "raw": "bWFsd2FyZSwyMTAuMjguNTYuMSxodHRwOi8vd3d3LmN0dC5wdCAsMTk0LTY1LTU3LTEyOC5jdHQucHQsY29pc2FzQGZvby5jb20sMTk0LjIzOS4xNjcuNSx3d3cudmVyeWJhZC5jb20gLHZlcnkudmVyeWJhZC5jb20sLCwsMjAxMC0wMi0xOFQwMDowMDowMCswMDowMA0K", "feed.name": "FileCollector", "destination.geolocation.latitude": 32.2109, "destination.geolocation.cc": "CN", "source.geolocation.longitude": 12.069, "event_description.text": "ctt", "source.ip": "194.239.167.5", "source.geolocation.city": "Frederikssund", "destination.geolocation.city": "Zhenjiang", "destination.url": "http://www.ctt.pt", "classification.taxonomy": "malicious code", "source.url": "http://www.verybad.com", "source.fqdn": "very.verybad.com", "feed.url": "file://localhost/opt/intelmq/teste_ip_url_fqdn.csv", "feed.accuracy": 100.0, "time.observation": "2017-07-18T13:15:48+00:00", "destination.geolocation.longitude": 119.4551, "source.geolocation.latitude": 55.8396, "classification.type": "malware", "destination.ip": "210.28.56.1", "time.source": "2010-02-18T00:00:00+00:00", "source.geolocation.cc": "DK"}
{"destination.url": "http://www2.ctt.pt", "classification.taxonomy": "malicious code", "source.url": "http://www.telecom.pt", "feed.provider": "MyFeed", "time.observation": "2017-07-18T13:15:48+00:00", "destination.fqdn": "ctt-pt.mail.protection.outlook.com", "source.abuse_contact": "coisas7#foo.com", "source.geolocation.cc": "TN", "feed.url": "file://localhost/opt/intelmq/teste_ip_url_fqdn.csv", "raw": "YyZjLDI1MS4xNTQuNjUuOSxodHRwOi8vd3d3Mi5jdHQucHQsY3R0LXB0Lm1haWwucHJvdGVjdGlvbi5vdXRsb29rLmNvbSxjb2lzYXM3QGZvby5jb20sMTk3LjEzLjEwNS44LHd3dy50ZWxlY29tLnB0LCwsLCwyMDEwLTAyLTE4VDAwOjAwOjAwKzAwOjAwDQo=", "feed.name": "FileCollector", "classification.type": "c&c", "source.geolocation.latitude": 34.0, "source.geolocation.longitude": 9.0, "destination.ip": "251.154.65.9", "event_description.text": "ctt", "source.ip": "197.13.105.8", "time.source": "2010-02-18T00:00:00+00:00", "feed.accuracy": 100.0}
Each line is a dictionary and some dictionaries have more keys than others, and I would like to convert the text file to a csv file.
I have the following code:
import json
import csv
import ast
def json_to_csv(txt_file, csv_file):
lista = []
with open(txt_file, 'rb') as fin:
lines = fin.readlines()
for line in lines:
dict_line = ast.literal_eval(line)
lista.append(line)
list_json = json.dumps(lista)
read_json = json.loads(list_json)
header =["feed.accuracy","feed.url","source.geolocation.longitude","event_description.text","raw","destination.geolocation.city","source.ip","classification.taxonomy",
"time.observation","destination.geolocation.latitude","destination.ip","source.asn","feed.name","source.geolocation.latitude","time.source","feed.provider",
"destination.geolocation.longitude","destination.geolocation.cc","destination.asn","source.abuse_contact","source.geolocation.cc","classification.type"]
with open(csv_file, 'wb+') as f:
dict_writer = csv.DictWriter(f, header)
dict_writer.writeheader()
dict_writer.writerows(read_json)
First I read the text file, then I convert its content into JSON and then I try to write the converted data into the csv file, however its returning the following error:
Traceback (most recent call last):
File "<pyshell#38>", line 1, in <module>
json_to_csv('ctt.txt','ctt.csv')
File "C:/Users/Marisa/Documents/json_to_csv.py", line 26, in json_to_csv
dict_writer.writerows(read_json)
File "C:\Python27\lib\csv.py", line 157, in writerows
rows.append(self._dict_to_list(rowdict))
File "C:\Python27\lib\csv.py", line 148, in _dict_to_list
+ ", ".join([repr(x) for x in wrong_fields]))
ValueError: dict contains fields not in fieldnames: u'{', u'"', u'f', u'e', u'e', u'd', u'.', u'a', u'c', u'c', u'u', u'r', u'a', u'c', u'y', u'"', u':', u' ', u'1', u'0', u'0', u'.', u'0', u',', u' ', u'"', u'c', u'l', u'a', u's', u's', u'i', u'f', u'i', u'c', u'a', u't', u'i', u'o', u'n', u'.', u't', u'a', u'x',...
You're making it a little more complicated than it needs to be, and you're missing some of the fields in your own example data above. We can get rid of the ast dependency and the back & forth JSON processing, add in the missing fields, and the following will work with the sample data you've provided:
import json
import csv
def json_to_csv(txt_file, csv_file):
lista = []
with open(txt_file, 'r') as in_file:
lines = in_file.readlines()
for line in lines:
try:
dict_line = json.loads(line)
lista.append(dict_line)
except Exception as err:
print(err)
header = [
"feed.accuracy", "feed.url", "source.geolocation.longitude",
"event_description.text", "raw", "destination.geolocation.city",
"source.ip", "classification.taxonomy", "time.observation",
"destination.geolocation.latitude", "destination.ip", "source.asn",
"feed.name", "source.geolocation.latitude", "time.source",
"feed.provider", "destination.geolocation.longitude",
"destination.geolocation.cc", "destination.asn",
"source.abuse_contact", "source.geolocation.cc", "classification.type",
'destination.fqdn', 'source.fqdn', 'source.geolocation.city',
'source.url', 'destination.url'
]
with open(csv_file, 'w+') as out_file:
dict_writer = csv.DictWriter(out_file, header)
dict_writer.writeheader()
dict_writer.writerows(lista)
Note that if your real data has more fields that aren't included in your sample, you'll need to add those, too.
Note too that if your input data were a proper JSON array like:
[{"destination.fqdn": "194-65-57-128.ctt.pt", "feed.provider": "MyFeed", "source.abuse_contact": "coisas#foo.com", "raw": "bWFsd2FyZSwyMTAuMjguNTYuMSxodHRwOi8vd3d3LmN0dC5wdCAsMTk0LTY1LTU3LTEyOC5jdHQucHQsY29pc2FzQGZvby5jb20sMTk0LjIzOS4xNjcuNSx3d3cudmVyeWJhZC5jb20gLHZlcnkudmVyeWJhZC5jb20sLCwsMjAxMC0wMi0xOFQwMDowMDowMCswMDowMA0K", "feed.name": "FileCollector", "destination.geolocation.latitude": 32.2109, "destination.geolocation.cc": "CN", "source.geolocation.longitude": 12.069, "event_description.text": "ctt", "source.ip": "194.239.167.5", "source.geolocation.city": "Frederikssund", "destination.geolocation.city": "Zhenjiang", "destination.url": "http://www.ctt.pt", "classification.taxonomy": "malicious code", "source.url": "http://www.verybad.com", "source.fqdn": "very.verybad.com", "feed.url": "file://localhost/opt/intelmq/teste_ip_url_fqdn.csv", "feed.accuracy": 100.0, "time.observation": "2017-07-18T13:15:48+00:00", "destination.geolocation.longitude": 119.4551, "source.geolocation.latitude": 55.8396, "classification.type": "malware", "destination.ip": "210.28.56.1", "time.source": "2010-02-18T00:00:00+00:00", "source.geolocation.cc": "DK"},
{"destination.url": "http://www2.ctt.pt", "classification.taxonomy": "malicious code", "source.url": "http://www.telecom.pt", "feed.provider": "MyFeed", "time.observation": "2017-07-18T13:15:48+00:00", "destination.fqdn": "ctt-pt.mail.protection.outlook.com", "source.abuse_contact": "coisas7#foo.com", "source.geolocation.cc": "TN", "feed.url": "file://localhost/opt/intelmq/teste_ip_url_fqdn.csv", "raw": "YyZjLDI1MS4xNTQuNjUuOSxodHRwOi8vd3d3Mi5jdHQucHQsY3R0LXB0Lm1haWwucHJvdGVjdGlvbi5vdXRsb29rLmNvbSxjb2lzYXM3QGZvby5jb20sMTk3LjEzLjEwNS44LHd3dy50ZWxlY29tLnB0LCwsLCwyMDEwLTAyLTE4VDAwOjAwOjAwKzAwOjAwDQo=", "feed.name": "FileCollector", "classification.type": "c&c", "source.geolocation.latitude": 34.0, "source.geolocation.longitude": 9.0, "destination.ip": "251.154.65.9", "event_description.text": "ctt", "source.ip": "197.13.105.8", "time.source": "2010-02-18T00:00:00+00:00", "feed.accuracy": 100.0}]
the solution simplifies quite a bit more with the whole initial with open block becoming just:
with open(txt_file, 'r') as in_file:
lista = json.load(in_file)