CSV files and dictionary - python

I'm trying to create CSV file, based on iterating over other CSV files. The output CSV file, is in a slightly different format to the input ones.
My question is, being quite new to Python, how I'd go about doing this?
My input is something like this:
1.csv
"Street", "Number", "Occupants"
"Test Road", "7", "4"
"Test Street", "5", "1"
"Test Avenue, "2", "6"
2.csv
"Street", "Number", "Occupants"
"Test Road", "12", "2"
"Test Street", "11", "3"
"Test Avenue, "9", "2"
3.csv
"Street", "Number", "Occupants"
"Test Road", "34", "2"
"Test Street", "22", "3"
"Test Lane", "19", "2"
expected_output.csv
"", "Street", "1", "2", "3"
"Number", "Test Road", "7", "12", "34"
"", "Test Street", "5", "11", "22"
"", "Test Avenue", "2", "9", "N/A"
"", "Test Lane", "N/A", "N/A", "19"
"Occupants", "Test Road", "4", "2", "2"
"", "Test Street", "1", "3", "3"
"", "Test Avenue", "6", "2", "N/A"
"", "Test Lane", "N/A", "N/A", "2"
So you can see across the top of the output i have Number/Occupants, Street and then the number of the input cvs file (1.csv, 2.csv, 3.csv etc)
If a particular street is not in the input file, then the output should display N/A for the Number or Occupant
I'm not sure what the best/fastest approach to this is. Either concatenating all the CSV files together first, maybe in a dictionary, then doing some complex loop or have several loops to create the output.

Based on our discussion in chat, here's a full program including some test files. You will only need to delete the labeled lines to use your own files.
#user configurable variables:
number_of_files = 4
#delete the following lines to use your own files.
f1 = open('1.csv','w')
f1.write('''"Street", "Number", "Occupants"
"Test Road", "7", "4"
"Test Street", "5", "1"
"Test Avenue", "2", "6"''')
f1.close()
f2 = open('2.csv','w')
f2.write('''"Street", "Number", "Occupants"
"Test Road", "12", "2"
"Test Street", "11", "3"
"Test Avenue", "9", "2"''')
f2.close()
f3 = open('3.csv','w')
f3.write('''"Street", "Number", "Occupants"
"Test Road", "34", "2"
"Test Street", "22", "3"
"Test Lane", "19", "2"''')
f3.close()
f4 = open('4.csv','w')
f4.write('''"Street", "Number", "Occupants"
"Test Road", "4", "7"
"Test Street", "1243", "6"
"Test Lane", "17", "1"''')
f4.close()
#stop deleteing
#the rest 'should' take care of itself.
file_set = []
for y in range(number_of_files):
exec("f"+str(y+1)+" = [x.split(',') for x in open('"+str(y+1)+".csv','r').read().replace('\"','').replace(', ',',').split('\\n')]")
exec("file_set.append(f"+str(y+1)+")")
num_files = len(file_set)
data_dict = {}
for file in file_set:
block = file_set.index(file)
for line in file:
row = file.index(line)
if line[0] not in data_dict:
data_dict[line[0]] = num_files*[['N/A','N/A']]
data_dict[line[0]][block] = line[1:]
print (data_dict)
f0 = open('output.csv','w')
f0.write(',Street')
for x in range(num_files):
f0.write(','+str(x+1))
f0.write('\n')
temp = 0
for key in data_dict:
if key != 'Street':
if temp == 0:
f0.write('Number,'+key)
temp = 1
else:
f0.write(','+key)
for x in range(num_files):
f0.write(','+data_dict[key][x][0])
f0.write('\n')
temp = 0
for key in data_dict:
if key != 'Street':
if temp == 0:
f0.write('Occupants,'+key)
temp = 1
else:
f0.write(','+key)
for x in range(num_files):
f0.write(','+data_dict[key][x][1])
f0.write('\n')
f0.close()
Enjoy, and have a great day.

Related

How write to ms word by sample?

I have ms word sample.docx like this:
{{id1}} some text {{name1}} text again {{password1}}
{{id2}} some text {{name2}} text again {{password2}}
{{id3}} some text {{name3}} text again {{password3}}
dict from code
list_data = [
{"id": "1", "name": "cat", "password": "123"},
{"id": "2", "name": "john", "password": "321"},
{"id": "3", "name": "mike", "password": "555"},
{"id": "1", "name": "who is this", "password": "342"},
{"id": "2", "name": "some", "password": "67332"},
{"id": "3", "name": "horse", "password": "qwerty"},
{"id": "1", "name": "sone n", "password": "some pass n"},
{"id": "2", "name": "some n", "password": "some pass n"},
{"id": "3", "name": "some n", "password": "some pass n"},
]
code
from docxtpl import DocxTemplate
context = {}
doc = DocxTemplate("sample.docx")
for i in range(len(list_data)):
for data in list_data:
if i % 3 == 0:
context['id' + data['id']] = data['id']
context['name' + data['id']] = data['name']
context['password' + data['id']] = data['password']
doc.render(context)
doc.save(f"{i}_output.docx")
this code get next result:
0_output.docx:
1 some text who is this text again 342
2 some text some text again 67332
3 some text horse text again qwerty
and
3_output.docx have result 0_output.docx
--------------------------------------------------------------------------------------------------
How get result
0_output.docx:
1 some cat who is this text again 123
2 some john some text again 321
3 some mike some text again 555
3_output.docx:
1 some text who is this text again 342
2 some text some text again 67332
3 some text horse text again qwerty
etc ....
Try this (Python 3.x):
from docxtpl import DocxTemplate
list_data = [
{"id": "1", "name": "cat", "password": "123"},
{"id": "2", "name": "john", "password": "321"},
{"id": "3", "name": "mike", "password": "555"},
{"id": "1", "name": "who is this", "password": "342"},
{"id": "2", "name": "some", "password": "67332"},
{"id": "3", "name": "horse", "password": "qwerty"},
{"id": "1", "name": "sone n", "password": "some pass n"},
{"id": "2", "name": "some n", "password": "some pass n"},
{"id": "3", "name": "some n", "password": "some pass n"},
]
cols = ['id','name','password']
for i in range(len(list_data)):
if i % 3 ==0:
doc = DocxTemplate("sample.docx")
context = {}
for col in cols:
context[f'{col}1'] = list_data[i][col]
context[f'{col}2'] = list_data[i+1][col]
context[f'{col}3'] = list_data[i+2][col]
doc.render(context)
doc.save(f"{i}_output.docx")

Parsing JSON in Python and Converting to Excel

I am attempting to parse this JSON and convert it to Excel and only print out certain columns using Python but I keep getting an TypeError: string indices must be integers. The JSON file continues to repeat what is inside "d" over and over again.
JSON:
{
"d": [
{
"__type": "New Cust",
"Description": "TV Purchase",
"End": "/Date(1624962600000)/",
"ID": 1,
"Resources": [
{
"Available": true,
"Key": "12345",
"Text": "John Doe",
"Type": "SalesProvider",
"CalendarID": "1234"
}
],
"Subject": "Meeting with Sam",
"Visible": true,
"AppStatus": "5",
"StartTime": "06/01/2021 10:30:00",
"AppID": "1",
"ServiceID": "7",
"NotesCount": "0",
"CustomerID": "1",
"AppType": "NNR",
"IsEditedThis": "False",
"BusinessPackageID": "0",
"PopupNotesCount": "0",
"EventType": "1",
"SPName": "John Doe",
"SPID": "12345",
"EventCapacity": "0",
"IsPaid": "False",
"IsWorkShop": "False",
"FormStatus": "0",
"PaidType": "1",
"AppComment": "",
"CustName": "Sam Will",
"ResourceID": "",
"CssClass": "rsCategoryBlue",
"ServiceName": "TV Sale",
"NoOfAttendees": null,
"StreamingNoOfAttendees": null,
"StreamingStatus": "0",
"StreamingEventCapacity": "",
"Photo": "",
"PersonalOffType": null,
"ResourceName": null,
"IsShowCheckIn": false,
"PaymentStatus": 0
},
{
"__type": "New Cust",
"Description": "Receiver Purchase",
"End": "/Date(1624962600000)/",
"ID": 1,
"Resources": [
{
"Available": true,
"Key": "12345",
"Text": "John Doe",
"Type": "SalesProvider",
"CalendarID": "1234"
}
],
"Subject": "Meeting with Bill",
"Visible": true,
"AppStatus": "5",
"StartTime": "07/02/2021 9:30:00",
"AppID": "1",
"ServiceID": "7",
"NotesCount": "0",
"CustomerID": "1",
"AppType": "NNR",
"IsEditedThis": "False",
"BusinessPackageID": "0",
"PopupNotesCount": "0",
"EventType": "1",
"SPName": "John Doe",
"SPID": "12345",
"EventCapacity": "0",
"IsPaid": "False",
"IsWorkShop": "False",
"FormStatus": "0",
"PaidType": "1",
"AppComment": "",
"CustName": "Bill Tom",
"ResourceID": "",
"CssClass": "rsCategoryBlue",
"ServiceName": "Audio Sale",
"NoOfAttendees": null,
"StreamingNoOfAttendees": null,
"StreamingStatus": "0",
"StreamingEventCapacity": "",
"Photo": "",
"PersonalOffType": null,
"ResourceName": null,
"IsShowCheckIn": false,
"PaymentStatus": 0
}
]
}
Python Code:
import json
import pandas as pd
f = open('JSON.txt', 'r')
data = json.loads(f.read())
l = []
for profile in data['d']:
l.append(profile["Subject"]["StartTime"]["IsPaid"]["CustName"]["ServiceName"])
df1 = pd.DataFrame(l)
print(df1)
df1.to_excel('df1.xlsx')
I do not need the "Resources": [] info I just need certain parameters outside it in the JSON object. I am having difficulty parsing the JSON any help would be appreciated.
you can use a combination of the json standard library and pd.json_normalize
import json
import pandas as pd
parsed_json = json.loads('JSON.txt')
df = pd.json_normalize(parsed_json['d'],record_path='Resources')
print(df)
Available Key Text Type CalendarID
0 True 12345 John Doe SalesProvider 1234
1 True 12345 John Doe SalesProvider 1234
then pass it to excel
df.to_excel(...,index=False)
Going back to your issues, it seems like your trying to grab a bunch of fields like a list, when in reality you're attempting to find an attribute from a single data type object.
print(data['d'][0]['Subject'])
'Meeting with Sam'
This has no other nested items so you'll naturally get an error.
the error TypeError: string indices must be integers is telling you you can only slice this object with integers i.e
#reversing the string.
print(data['d'][0]['Subject'][::-1])
maS htiw gniteeM
or
#first two characters of the string.
print(data['d'][0]['Subject'][:2])
Me
if you want to grab only a subset of columns from the top level you could do :
cols = ['Subject', 'StartTime', 'IsPaid', 'CustName', 'ServiceName']
df = pd.json_normalize(parsed_json['d'],)[cols]
print(df)
Subject StartTime IsPaid CustName ServiceName
0 Meeting with Sam 06/01/2021 10:30:00 False Sam Will TV Sale
1 Meeting with Bill 07/02/2021 9:30:00 False Bill Tom Audio Sale

JSON File Merging With Primary Keys in Python

My code was working as of yesterday but today I ran the script and started getting this error saying I can't use strings to access my JSON dictionary anymore. As I understand it, I am already iterating through my dictionary entries and it is valid JSON so I should be able to access information in it like a dictionary with string values instead of just the column number:
S:\Scripts\ZACH\DB MERGE>python jsonMerge.py
Beginning Merge...
Traceback (most recent call last):
File "jsonMerge.py", line 42, in <module>
if intResult['SCH_NAME'] == extResult['SCH_NAME'] and intResult['SCH_CITY']
== extResult['SCH_CITY'] :
TypeError: list indices must be integers or slices, not str
I'm merging 2 validated JSON files like this:
[{"SCH_ID": "13554", "SCH_NAME": "100 Mile House Elementary", "SCH_ADDR": "Box 460, 145 North Birch", "SCH_CITY": "100 Mile House", "SCH_PROV": "BC", "SCH_PCODE": "V0K 2E0", "SCH_PHONE": "(250)395-2258", "SCH_FAX": "(250)395-3621", "SCH_DIST": "1027", "SCH_TYPE": "E", "SCH_P_REP": "FB", "SCH_G_REP": "", "SCH_P_COM": "LOC", "SCH_G_COM": "", "SCH_REBT": "10", "SCH_REBT2": "0", "SCH_P_CID": "23", "SCH_G_CID": "0", "SCH_P_CCD": "SR", "SCH_G_CCD": "", "DATE1": "", "DATE2": "", "PLAN1": "20G", "PLAN2": "2GR", "LNOPST": "FALSE"},{"SCH_ID": "16101", "SCH_NAME": "1 Step Ahead Preschool", "SCH_ADDR": "1340 Kingfisher Ave.", "SCH_CITY": "Kitimat", "SCH_PROV": "BC", "SCH_PCODE": "V8C 1G6", "SCH_PHONE": "(250)632-2288", "SCH_FAX": "", "SCH_DIST": "", "SCH_TYPE": "E", "SCH_P_REP": "FB", "SCH_G_REP": "", "SCH_P_COM": "P", "SCH_G_COM": "", "SCH_REBT": "0", "SCH_REBT2": "0", "SCH_P_CID": "23", "SCH_G_CID": "0", "SCH_P_CCD": "SR", "SCH_G_CCD": "", "DATE1": "", "DATE2": "", "PLAN1": "200", "PLAN2": "0", "LNOPST": "FALSE"},{"SCH_ID": "16736", "SCH_NAME": "1st Step Montessori", "SCH_ADDR": "8884 Russell Drive", "SCH_CITY": "Delta", "SCH_PROV": "BC", "SCH_PCODE": "V4C 4P8", "SCH_PHONE": "(604)417-3290", "SCH_FAX": "", "SCH_DIST": "1037", "SCH_TYPE": "E", "SCH_P_REP": "GF", "SCH_G_REP": "", "SCH_P_COM": "MWS", "SCH_G_COM": "", "SCH_REBT": "10", "SCH_REBT2": "0", "SCH_P_CID": "18", "SCH_G_CID": "0", "SCH_P_CCD": "SB", "SCH_G_CCD": "", "DATE1": "", "DATE2": "", "PLAN1": "20G", "PLAN2": "0", "LNOPST": "FALSE"},{"SCH_ID": "1959", "SCH_NAME": "150 Mile Elementary", "SCH_ADDR": "Box 259, 3081 Hwy. 97", "SCH_CITY": "150 Mile House", "SCH_PROV": "BC", "SCH_PCODE": "V0K 2G0", "SCH_PHONE": "(250)296-3356", "SCH_FAX": "(250)296-3291", "SCH_DIST": "1027", "SCH_TYPE": "E", "SCH_P_REP": "FB", "SCH_G_REP": "", "SCH_P_COM": "MWS", "SCH_G_COM": "", "SCH_REBT": "10", "SCH_REBT2": "0", "SCH_P_CID": "23", "SCH_G_CID": "0", "SCH_P_CCD": "SR", "SCH_G_CCD": "", "DATE1": "9/12/2018", "DATE2": "10/30/2018", "PLAN1": "2GS", "PLAN2": "2GR", "LNOPST": "FALSE"}]
and:
[{"District Number": "82", "School Code": "8297024", "SCH_NAME": "Na Aksa Gyilak'yoo", "SCH_ADDR": "PO Box 544 STN Main", "SCH_CITY": "Terrace", "SCH_PROV": "BC", "SCH_PCODE": "V8G 4B5", "Principal Title": "Mrs", "Principal First name": "Colleen", "Principal Last Name": "Austin", "School Type": "Standard School", "Grade Range": "K-12", "School Category": "Independent School", "Funding Group(s)": "2", "NLC: Early Learning": "no", "NLC: Afterschool": "no", "NLC: Cont. Ed.": "no", "NLC: Seniors": "no", "NLC: Comm. Sport": "no", "NLC: Comm. Use": "no", "NLC: Integr. Svcs.": "no", "SCH_PHONE": "(250)615-2844", "SCH_FAX": "(250)615-2833", "Email": "kalumteacher#gmail.com", "Enrolment Total": "80", "Enrolment As Of": "September 30 2018", "KH Enrolment": "1", "KF Enrolment": "11", "HS Registration": "0", "SU Enrolment": "0", "EU Enrolment": "0", "Grade 1 Enrolment": "2", "Grade 2 Enrolment": "8", "Grade 3 Enrolment": "4", "Grade 4 Enrolment": "5", "Grade 5 Enrolment": "4", "Grade 6 Enrolment": "8", "Grade 7 Enrolment": "4", "Grade 8 Enrolment": "9", "Grade 9 Enrolment": "5", "Grade 10 Enrolment": "11", "Grade 11 Enrolment": "3", "Grade 12 Enrolment": "5"}]
using SCH_NAME and SCH_CITY as the primary keys:
with open(extFile, 'r') as extF:
#Iterate through every entry
for extLine in extF:
hasMatched = False
#load line
extResult = json.loads(extLine)
#print ("Checking: " + intResult['SCH_NAME'] + '\n')
#Set context as the external result to start us off
#contextLine = extResult
with open(intFile, 'r') as intF:
#Iterate through every entry
for intLine in intF:
#Load line
intResult = json.loads(intLine)
#print ("Matching: " + extResult['SCH_NAME'] + '\n')
#Check if rows match
if intResult['SCH_NAME'] == extResult['SCH_NAME'] and intResult['SCH_CITY'] == extResult['SCH_CITY'] :
#We have a match
hasMatched = True
Can anyone help shine a light on what might be going wrong here?
Yea ok looking at the error and the two json files, they're coming in as lists instead of dicts. You may want to figure out why this happened, but you can do a quick workaround like so.
intResult = json.loads(intLine)[0]
note you may need to do the same for extResult

How to delete on square from json file in python?

I have an extra last square } in a big json file, I need to remove it by using python :
{
"layers": {
"frame": {
"frame.interface_id": "0",
"frame.encap_type": "127",
"frame.time": "Oct 10, 2017 18:05:51.620568000 Central European Daylight Time",
"frame.offset_shift": "0.000000000",
"frame.time_epoch": "1507651551.620568000",
"frame.time_delta": "0.324011000",
"frame.time_delta_displayed": "0.324011000",
"frame.time_relative": "29.248970000",
"frame.number": "38",
"frame.len": "64",
"frame.cap_len": "64",
"frame.marked": "0",
"frame.ignored": "0",
"frame.protocols": "wpan:6lowpan:ipv6:ipv6.hopopts:udp:data",
"frame.coloring_rule.name": "UDP",
"frame.coloring_rule.string": "udp"
},
"wpan": {
"wpan.frame_length": "66",
"wpan.fcf": "0x0000dc41",
"wpan.fcf_tree": {
"wpan.frame_type": "0x00000001",
"wpan.security": "0",
"wpan.pending": "0",
"wpan.ack_request": "0",
"wpan.pan_id_compression": "1",
"wpan.seqno_suppression": "0",
"wpan.ie_present": "0",
"wpan.dst_addr_mode": "0x00000003",
"wpan.version": "1",
"wpan.src_addr_mode": "0x00000003"
},
"wpan.seq_no": "8",
"wpan.dst_pan": "0x0000abcd",
"wpan.dst64": "00:21:2f:3c:c6:b5:00:01",
"wpan.src64": "00:21:2f:3c:c6:b5:00:7e",
"wpan.fcs_ok": "1"
},
"6lowpan": {
"IPHC Header": {
"6lowpan.pattern": "0x00000003",
"6lowpan.iphc.tf": "0x00000003",
"6lowpan.iphc.nh": "0",
"6lowpan.iphc.hlim": "0x00000002",
"6lowpan.iphc.cid": "1",
"6lowpan.iphc.sac": "1",
"6lowpan.iphc.sam": "0x00000003",
"6lowpan.iphc.m": "0",
"6lowpan.iphc.dac": "1",
"6lowpan.iphc.dam": "0x00000003",
"6lowpan.iphc.sci": "0x00000000",
"6lowpan.iphc.dci": "0x00000000"
},
"6lowpan.next": "0x00000000",
"6lowpan.src": "::221:2f3c:c6b5:7e",
"6lowpan.dst": "::221:2f3c:c6b5:1"
},
"ipv6": {
"ipv6.version": "6",
"ip.version": "6",
"ipv6.tclass": "0x00000000",
"ipv6.tclass_tree": {
"ipv6.tclass.dscp": "0",
"ipv6.tclass.ecn": "0"
},
"ipv6.flow": "0x00000000",
"ipv6.plen": "39",
"ipv6.nxt": "0",
"ipv6.hlim": "64",
"ipv6.src": "::221:2f3c:c6b5:7e",
"ipv6.addr": "::221:2f3c:c6b5:7e",
"ipv6.src_host": "::221:2f3c:c6b5:7e",
"ipv6.host": "::221:2f3c:c6b5:7e",
"ipv6.dst": "::221:2f3c:c6b5:1",
"ipv6.addr": "::221:2f3c:c6b5:1",
"ipv6.dst_host": "::221:2f3c:c6b5:1",
"ipv6.host": "::221:2f3c:c6b5:1",
"Source GeoIP: Unknown": "",
"Destination GeoIP: Unknown": "",
"ipv6.hopopts": {
"ipv6.hopopts.nxt": "17",
"ipv6.hopopts.len": "0",
"ipv6.hopopts.len_oct": "8",
"ipv6.opt": {
"ipv6.opt.type": "99",
"ipv6.opt.type_tree": {
"ipv6.opt.type.action": "1",
"ipv6.opt.type.change": "1",
"ipv6.opt.type.rest": "0x00000003"
},
"ipv6.opt.length": "4",
"ipv6.opt.rpl.flag": "0x00000000",
"ipv6.opt.rpl.flag_tree": {
"ipv6.opt.rpl.flag.o": "0",
"ipv6.opt.rpl.flag.r": "0",
"ipv6.opt.rpl.flag.f": "0",
"ipv6.opt.rpl.flag.rsv": "0x00000000"
},
"ipv6.opt.rpl.instance_id": "0x0000001e",
"ipv6.opt.rpl.sender_rank": "0x00000200"
}
}
},
"udp": {
"udp.srcport": "30002",
"udp.dstport": "3000",
"udp.port": "30002",
"udp.port": "3000",
"udp.length": "31",
"udp.checksum": "0x00007ca5",
"udp.checksum.status": "2",
"udp.stream": "17"
},
"data": {
"data.data": "2f:14:02:15:20:ed:1a:05:02:40:29:5c:ab:41:cc:23:c7:42:10:d8:eb:41:45",
"data.len": "23"
}
}
}
}
,
How could I remove it please?
I would be very grateful if you help me please?
First thing first: having one extra closing brace means this is not valid json, so the best thing to do would be to cure the problem at the source. If this comes verbatim from some api then contact the tech staff, if this comes from your own code then fix it where this extra brace is introduced.
This being said, assuming your json is stored as a string data, then removing the last closing brace is as simple as
data = data.strip().rstrip("}")
If this is part of an automated process and you only sometimes have this extraneaous brace, you can test before cleaning up:
if data.count("}") > data.count("{"):
data = data.strip().rstrip("}")

Best way to speed up the process of creating dictionaries when dealing with a large amount of netflow data in a .json file in Python?

I'm currently working on sorting netflow data in a json file based on end time. I'm placing all of this data into dictionaries in which keys are the end time (but only the hour and minute, so that multiple data values fall under one time). However, this is taking a bit long - not longer than a few second, but that's still too long. What's a good way to better the big O of this? What I'm doing right now is just going through the file line by line, and extracting the end times, and creating an empty dictionary (where the values are empty sets) and the keys are the hour/min of the end time. Then, I just go through the dictionary and add the lines that have the corresponding endtime to the given key to the value which is a set.
edit: Here is a sample of the kind of json data. The following is one line of it. The files I'm working with are close to 300,000 lines.
{
"#timestamp": "2015-05-18T19:26:08.000Z",
"netflow": {
"version": "9",
"flow_seq_num": "188185",
"flowset_id": "257",
"last_switched": "2015-05-15T14:28:02.999Z",
"first_switched": "2015-05-15T14:27:38.999Z",
"in_bytes": "71",
"in_pkts": "1",
"input_snmp": "5",
"output_snmp": "4",
"ipv4_src_addr": "192.1.44.133",
"ipv4_dst_addr": "10.10.1.4",
"protocol": "6",
"src_tos": "0",
"dst_tos": "2",
"l4_src_port": "12373",
"l4_dst_port": "80",
"flow_sampler_id": "0",
"ipv4_next_hop": "10.10.1.5",
"dst_mask": "2",
"src_mask": "31",
"tcp_flags": "6",
"direction": "0"
},
"#version": "1",
"host": "192.168.19.202",
"src_host_name": "",
"dst_host_name": "",
"app_name": "",
"tcp_flags_str": "",
"dscp": "",
"highval": "",
"src_blacklisted": "0",
"dst_blacklisted": "0",
"invalid_ToS": "0",
"bytes_per_packet": 71,
"tcp_nominal_payload": "0",
"malformed_ip": "0",
"empty_tcp": "0",
"short_tcp_handshake": "0",
"icmp_malformed_packets": "0",
"snort_attack_flow": "0",
"empty_udp": "0",
"short_udp": "0",
"short_tcp_rstack": "0",
"short_tcp_pansf": "0",
"short_tcp_synack": "0",
"short_tcp_synrst": "0",
"short_tcp_finack": "0",
"short_tcp_pna": "0",
"non_unicast_src": "0",
"multicast": "0",
"broadcast": "0",
"network": "0",
"tcp_urg": "0",
"land_attack": "0",
"short_tcp_ack": "0",
"tcp_synfin": "0",
"tcp_fin": "0",
"malformed_tcp": "1",
"tcp_xmas": "0",
"udp_echo_req": "0",
"tcp_null": "0",
"tcp_syn": "0",
"malformed_udp": "0",
"tcp_rst": "0",
"icmp_request": "0",
"icmp_response": "0",
"icmp_port_unreachable": "0",
"icmp_host_unreachable": "0",
"icmp_unreachable_for_Tos": "0",
"icmp_network_unreachable": "0",
"icmp_redirects": "0",
"icmp_time_exceeded_flows": "0",
"icmp_parameter_problem_flows": "0",
"icmp_trace_route": "0",
"icmp_datagram": "0",
"udp_echo_chargen_broadcast": "0",
"udp_chargen_echo_broadcast": "0",
"icmp_src_quench": "0",
"icmp_proto_unreachable": "0",
"udp_echo_broadcast": "0",
"udp_echo_rsp": "0"
}
As for code I have tried, currently I'm just converting these lines into dictionaries to access the different values I'm looking to sort by. It's really simple, I'm just using json.loads and such to create dictionaries. What kind of data structure is best for organizing this kind of thing? I'm using a dictionary for now, but is there a better one?

Categories

Resources