Parsing text file after certain word

Parsing text file after certain word - python

I would like to parse the following file and get the values after the "ID" and "Label":
{"data" : [{
"id" : "3743",
"fgColor" : "#000000",
"Comment" : [ "GLIO" ],
"Group" : "0",
"Shape" : "roundrectangle",
"GraphicsName" : "TITLE:Glioma",
"Matching_Attribute" : [ "TITLE:Glioma" ],
"Entry_id" : "78",
"Label" : "TITLE:Glioma",
"EntrezIDs" : "05214, ",
"shared_name" : "path:hsa05214",
"Type" : "map",
"kegg_x" : "86.0",
"kegg_y" : "58.0",
"bgColor" : "#FFFFFF",
"name" : "path:hsa05214",
"SUID" : 3743,
"Height" : "25",
"Width" : "92",
"Link" : "http://www.kegg.jp/dbget-bin/www_bget?hsa05214",
"selected" : false
}]}
I'm using the following code there is nothing being written to the file specified:
import re
cyjs = open("/users/skylake/desktop/cyjs-example.txt", "r")
jsonfile = open("/users/skylake/desktop/jsonfile.txt", "w")
for line in cyjs:
if line.startswith('"id"'):
print(line)
jsonfile.write(line)
jsonfile.close()

There is a better suited for the problem tool.
It is a JSON file which can be parsed with json built-in module:
In [1]: import json
In [2]: with open("data.txt", "r") as f:
...: data = json.load(f)
In [3]: obj = data["data"][0]
In [4]: obj["id"]
Out[4]: u'3743'
In [5]: obj["Label"]
Out[5]: u'TITLE:Glioma'

Related

Exception when trying to parse large JSON file using ijson

I am trying to parse a large JSON file (16GB) using ijson but I always get the following error :
Exception has occurred: IncompleteJSONError
lexical error: invalid char in json text.
venue" : { "type" : NumberInt(0) }, "yea
(right here) ------^
File "C:\pyth\dblp_parser.py", line 14, in <module>
for record in ijson.items(f, 'item', use_float=True):
My code is as follows:
with open("dblpv13.json", "rb") as f:
for record in ijson.items(f, 'records.item', use_float=True):
paper_id = record["_id"] #_id is only for test
paper_id_tab.append(paper_id)
A part of my json file is as follows:
{
"_id" : "53e99784b7602d9701f3f636",
"title" : "Flatlined",
"authors" : [
{
"_id" : "53f58b15dabfaece00f8046d",
"name" : "Peter J. Denning",
"org" : "ACM Education Board",
"gid" : "5b86c72de1cd8e14a3c2b772",
"oid" : "544bd99545ce266baef0668a",
"orgid" : "5f71b2811c455f439fe3c58a"
}
],
"venue" : {
"_id" : "555036f57cea80f954169e28",
"raw" : "Commun. ACM",
"raw_zh" : null,
"publisher" : null,
"type" : NumberInt(0)
},
"year" : NumberInt(2002),
"keywords" : [
"linear scale",
"false dichotomy"
],
"n_citation" : NumberInt(7),
"page_start" : "15",
"page_end" : "19",
"lang" : "en",
"volume" : "45",
"issue" : "6",
"issn" : "",
"isbn" : "",
"doi" : "10.1145/508448.508463",
"pdf" : "",
"url" : [
"http://doi.acm.org/10.1145/508448.508463"
],
"abstract" : "Our propensity to create linear scales between opposing alternatives creates false dichotomies that hamper our thinking and limit our action."
},
I tried to fill in records item by item but always the same error. I'm completely blocked.
Please, can any body help me?

The same problem happened to me with the said dataset. ijson can't handle it. I overcame the problem by creating another dataset and then parsing the new dataset with ijson. The approach is quite simple: read the orignal dataset with simple read; remove "NumberInt(" and ")", write the result to a new json file. the code is given below.
f=open('dblpv13_clean.json')
with open('dblpv13.json','r',errors='ignore') as myFile:
for line in myFile:
line=line.replace("NumberInt(","").replace(")","")
f.write(line)
f.close()
Now you can parse the new dataset with ijson as follows.
with open('dblpv13_clean.json', "r",errors='ignore') as f:
for i, element in enumerate(ijson.items(f, "item")):
do something....

how to read nested lists information from a Json file using Python

Here is a part of my Jason file, and I want to read "information" under "runs" -> "results" -> "properties"
I am trying the following:
with open(inputFile, "r") as readFile:
data = json.load(readFile)
print(type(data))
print("Run data type is: ",type(data['runs']))
#print("properties data type is: ", type(data['runs']['properties']))
# error: print("results data type is: ", type(data['runs']['properties']))TypeError: list indices must be integers or slices, not str
for info in data['runs']:
res = info.get('results',{})
#res = info.get('results', {}).get('properties', None)
#Error: AttributeError: 'list' object has no attribute 'get'
#inf = info.get('properties')
print(res)
All the parts that I have commented is not working. and I added also the error message
how can i read "information" in a loop?
{
"$schema" : "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.4.json",
"version" : "2.1.0",
"runs" : [ {
"tool" : { ...},
"artifacts" : [ ...],
"results" : [ {
"ruleId" : "DECL_MISMATCH",
"ruleIndex" : 0,
"message" : {
"text" : "XXXXX"
},
"level" : "error",
"baselineState" : "unchanged",
"rank" : 100,
"kind" : "fail",
"properties" : {
"tags" : [ "databaseId", "metaFamily", "family", "group", "information", "severity", "status", "comment", "justified", "assignedTo", "ticketKey", "color" ],
"databaseId" : 54496,
"metaFamily" : "Defect",
"family" : "Defect",
"group" : "Programming",
"information" : "Impact: High",
"severity" : "Unset",
"status" : "Unreviewed",
"comment" : "",
"justified" : false,
"color" : "RED"
},
"locations" : [ {
"physicalLocation" : {
"artifactLocation" : {
"index" : 0
}
},
"logicalLocations" : [ {
"fullyQualifiedName" : "File Scope",
"kind" : "function"
} ]
} ]
} ]
} ]
}

While you're trying to access the key properties which is inside a list, you have to set the index number. In this json you've posted the index number can be 0. So the code probably should be like this:
with open(inputFile, "r") as readFile:
data = json.load(readFile)
print(type(data))
print("Run data type is: ",type(data['runs']))
#print("properties data type is: ", type(data['runs']['properties']))
# error: print("results data type is: ", type(data['runs']['properties']))TypeError: list indices must be integers or slices, not str
for info in data['runs']:
# res = info.get('results',{})
res = info.get('results', {})[0].get('properties', None)
#inf = info.get('properties')
print(res)

for run in data['runs']:
for result in run['results']:
properties = result['properties']
print("information = {}".format(properties['information']))

Reading nested dictionary-like txt file into a Pandas dataframe

Sort of a new python guy here and haven't had much success with the following.
I have a txt file with data formatted as follows:
{
"$type" : "TableInstance",
"$version" : 1,
"Instance" : "InstanceName",
"ColumnAliases" : [ "", "", ],
"ColumnNames" : [ "keyName", "dateName"],
"ColumnData" : [ {
"type" : "ColumnData1",
"Strings" : [key1, key2],]
}, {
"type" : "ColumnData2",
"Strings" : [date1, date2]}]
}
That I would like to read into a dataframe such that it is formatted as:
[ keyName dateName
key1 date1
key2 date1 ]
Is there a simple way to do this?

does this work for you?
dict = {
"$type" : "TableInstance",
"$version" : 1,
"Instance" : "InstanceName",
"ColumnAliases" : [ "", "", ],
"ColumnNames" : [ "keyName", "dateName"],
"ColumnData" : [ {
"type" : "ColumnData1",
"Strings" : ['key1', 'key2']
}, {
"type" : "ColumnData2",
"Strings" : ['date1', 'date2']}]
}
df = pd.DataFrame({dict['ColumnNames'][0]:dict['ColumnData'][0]['Strings'], dict['ColumnNames'][1]:dict['ColumnData'][1]['Strings']})

It looks it that you stored the serialized python object in the file. Hence, you can deserialize the Python object by the help of pickle, then you can parse the object based on your requirements.
import pickle
import pandas as pd
filePath = 'test.txt'
obj = pd.read_pickle(filePath)
#obj = pickle.load(open(filePath, "rb"))
df = pd.DataFrame({obj['ColumnNames'][0]:obj['ColumnData'][0]['Strings'], obj['ColumnNames'][1]:obj['ColumnData'][1]['Strings']})

How to extract data from json into a string

I am not able to extract the "Data" "12639735;7490484;3469776;9164745;650;0"
from this file using python:
In php it's piece of cake for me but I cannot master it in python.
Other answers from Stackexchange didn't give me the answer.
Here is the contents of the file test.json:
{
"ActTime" : 1494535483,
"ServerTime" : "2017-05-11 22:44:43",
"Sunrise" : "05:44",
"Sunset" : "21:14",
"result" : [
{
"AddjMulti" : 1.0,
"AddjMulti2" : 1.0,
"AddjValue" : 0.0,
"AddjValue2" : 0.0,
"BatteryLevel" : 255,
"Counter" : "20130.221",
"CounterDeliv" : "12634.521",
"CounterDelivToday" : "0.607 kWh",
"CounterToday" : "1.623 kWh",
"CustomImage" : 0,
"Data" : "12639735;7490484;3469776;9164745;650;0",
"Description" : "",
"Favorite" : 1,
"HardwareID" : 3,
"HardwareName" : "Slimme Meter",
"HardwareType" : "P1 Smart Meter USB",
"HardwareTypeVal" : 4,
"HaveTimeout" : false,
"ID" : "1",
"LastUpdate" : "2017-05-11 22:44:39",
"Name" : "Elektriciteitsmeter",
"Notifications" : "false",
"PlanID" : "0",
"PlanIDs" : [ 0 ],
"Protected" : false,
"ShowNotifications" : true,
"SignalLevel" : "-",
"SubType" : "Energy",
"SwitchTypeVal" : 0,
"Timers" : "false",
"Type" : "P1 Smart Meter",
"TypeImg" : "counter",
"Unit" : 1,
"Usage" : "650 Watt",
"UsageDeliv" : "0 Watt",
"Used" : 1,
"XOffset" : "0",
"YOffset" : "0",
"idx" : "1"
}
],
"status" : "OK",
"title" : "Devices"
}

This should work
import json
with open('test.json') as f:
contents = json.load(f)
print(contents['result'][0]['Data'])
Similar questions have been asked before: Parsing values from a JSON file using Python?

Got it.
url = "http://192.168.2.1:8080/json.htm?type=devices&rid=1"
response = urllib.urlopen(url)
str = json.loads(response.read())
for i in str["result"]:
datastring = i["Data"]
elementstring = i["Data"].split(';')
counter = 0
for j in elementstring:
if counter == 4:
usage = j
counter += 1
delivery = get_num(i["UsageDeliv"])

Python - get results of one item based on another in list

[
{
"account" : "",
"address" : "D8xWhR8LqSdSLTxRWwouQ3EiSnvcjLmdo6",
"category" : "send",
"amount" : -1000.00000000,
"fee" : -0.00000001,
"confirmations" : 716,
"blockhash" : "4569322b4c8c98fba3ef4c7bda91b53b4ee82d268eae2ff7658bc0d3753c00ff",
"blockindex" : 2,
"blocktime" : 1394242415,
"txid" : "45b629a779e6e0bf6d160c37833a27f1f2cc1bfa34632d166cccae83e69eb6fe",
"time" : 1394242259,
"timereceived" : 1394242259
},
{
"account" : "",
"address" : "DCPFe1fs7qScDFvoTexYqo95LmnJJkjmu7",
"category" : "receive",
"amount" : 0.13370000,
"confirmations" : 717,
"blockhash" : "e9024e177b42ca23fed741fb90c39836de5f9c722a93157e50df2e3f2c318d77",
"blockindex" : 26,
"blocktime" : 1394242412,
"txid" : "ce41b4c35b09ae582436b8138d62375840c32bd9ea0360457bd9f589012d2da3",
"time" : 1394242315,
"timereceived" : 1394242315
},
{
"account" : "",
"address" : "DCPFe1fs7qScDFvoTexYqo95LmnJJkjmu7",
"category" : "receive",
"amount" : 0.00100000,
"confirmations" : 692,
"blockhash" : "17eb2ef40b8bcb2ceb3d7f07d6545f03fc9bf41c8d28f759becd84a31e65e123",
"blockindex" : 14,
"blocktime" : 1394243788,
"txid" : "2b099fd0ce6239c5c3c69e2ba70669c3069858908e42b8ca970bf213e555d715",
"time" : 1394243669,
"timereceived" : 1394243669
},
{
"account" : "",
"address" : "DCPFe1fs7qScDFvoTexYqo95LmnJJkjmu7",
"category" : "send",
"amount" : -0.00100000,
"fee" : -2.00000000,
"confirmations" : 692,
"blockhash" : "17eb2ef40b8bcb2ceb3d7f07d6545f03fc9bf41c8d28f759becd84a31e65e123",
"blockindex" : 14,
"blocktime" : 1394243788,
"txid" : "2b099fd0ce6239c5c3c69e2ba70669c3069858908e42b8ca970bf213e555d715",
"time" : 1394243669,
"timereceived" : 1394243669
}
]
That ^^ is my data.
How am I able to see the entries in "txid" where the "category" == "receive" I also wish to write this line by line to a file called "list.txt"
I'm not interested at all in where "category" == "send"
Thanks :-)
EDIT:
This is my code
with (open('text.json') as f:
data = json.load(f)
my_list = json.load(open("text.json"))
result = sum(item["category"] == "receive" for item in my_list)
i = 0
res = ""
while i < result:
res = data[i]['txid']
if data[i]['category'] == "receive":
with open ("list.txt", "a") as myfile:
myfile.write(res + "\n")
i += 1

import json
object_list = json.load(open('text.json'))
receive_txids = [(x['txid'] + '\n') for x in object_list if x['category'] == 'receive']
output_file = open("list.txt", "a")
output_file.writelines(receive_txids)
Edit, here's a longer but clearer (untested) version:
import json
with open('text.json') as json_file:
object_list = json.load(json_file)
receive_txids = []
for object in object_list:
if object['category'] == 'receive':
receive_txids.append(object['txid'])
with open("list.txt", "a") as output_file:
for txid in receive_txids:
output_file.write(txid + '\n')
Edit: Why am I even building up a list and then doing nothing much with it?
With no intermediate list, the terse version:
import json
object_list = json.load(open('text.json'))
output_file = open("list.txt", "a")
output_file.writelines([(x['txid'] + '\n') for x in object_list if x['category'] == 'receive'])
and the tidier, clearer version:
import json
with open('text.json') as json_file:
object_list = json.load(json_file)
with open("list.txt", "a") as output_file:
for object in object_list:
if object['category'] == 'receive':
output_file.writeline(object['txid'] + '\n')

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parsing text file after certain word - python

Related

Exception when trying to parse large JSON file using ijson

how to read nested lists information from a Json file using Python

Reading nested dictionary-like txt file into a Pandas dataframe

How to extract data from json into a string

Python - get results of one item based on another in list

Categories

Resources