I have a log file which looks like this :
>>> 2017-08-02 08:51:45 +0200 [INFO] from com.sun.metro.assembler in application-siaServiceImplPort-context-362552 - MASM0007: No application metro.xml configuration file found.
>>> 2017-08-02 08:53:06 +0200 [INFO] from application in application-akka.actor.default-dispatcher-362046 - LOG_EVENT: {
"event" : "sxxxxxdd",
"ts" : "2017-xx
"svc" : "dxx.tlc-1",
"rexxxt" : {
"ts" : "2017-xxxx2:00",
"xx" : "73478c0f-dc70-46b7-a388-d12f7b8aa91e",
"xxxx" : "/xxx/xxx",
"xxx" : "POST",
"user_agent" : "xxx/6.2.1 xxxx/7.38.0 xxx/7.0xx16-1~xxx+8.1",
"user_id" : 39,
"xxx_ip" : "xxxx.1",
"xxxx" : "xxxxx",
"xx" : "xx",
"app_id" : "d4da4385a8204be2949ed62323231443",
"axxe" : "POxxkout"
},
"operation" : {
"scxe" : "checkout",
"rxxxlt" : {
"xxxus" : 2x0
}
},
"xx" : {
"xxx_id" : "CHTO06MLKXP9N",
"xxx_attributes" : {
"xx" : "2017xx6+02:00",
"date_xxxxx" : "2xx7-08xx53:06+02:00",
"xus" : "WAxING",
"dexxion" : "numx0",
"chaxxmount" : 2,
"chaxx_start" : "20x8xx+02:00",
"charge_max_count" : 1,
"merchant" : {
"xxx" : "xxxx",
"xxx" : "xxxxxxx",
"xx" : "xx-x xxxxxl.",
"logo" : "httxxxff0/258xxxjpeg",
"account_type" : "B"
},
"xx_xxx" : "xxxx",
"xxxx_xxx_url" : "https://xxx.xxx.xxx-pay.xx/xxx",
"xxx" : "xxxx",
"xxx" : "xx://dp.xx/uxx10/xxxx"
}
},
"cxx" : "xxxx"
}
the file has more line of course, alternate single line like the example and line that has multi line json.
What i want to achieve is having a file that reads the file and whenever it comes a line that has the json, automatically dump in 1 line.
so it will be like:
>>> 2017-08-02 08:51:45 +0200 [INFO] from com.sun.metro.assembler in application-siaServiceImplPort-context-362552 - MASM0007: No application metro.xml configuration file found.
>>> 2017-08-02 08:53:06 +0200 [INFO] from application in application-akka.actor.default-dispatcher-362046 - LOG_EVENT: {the json here in 1 line}
I've tried, using python, the following :
infile = "/hoxxxx/application.log"
important = []
keep_phrases = "LOG_EVENT"
with open(infile) as f:
f = f.readlines()
for line in f:
if keep_phrases in line:
print(line)
important.append(line)
but this is returning the line but of course it doesn't understand where the json finishes...
Any help?
Thanks
you could try something with regex and json
regex:
import re
with open(infile) as f:
text = f.read()
print re.sub(r'\n([^>])', r'\1', text)
output:
>>> 2017-08-02 08:51:45 +0200 [INFO] from com.sun.metro.assembler in application-siaServiceImplPort-context-362552 - MASM0007: No application metro.xml configuration file found.
>>> 2017-08-02 08:53:06 +0200 [INFO] from application in application-akka.actor.default-dispatcher-362046 - LOG_EVENT: {"event" : "sxxxxxdd","ts" : "2017-xx"svc" : "dxx.tlc-1","rexxxt" : {"ts" : "2017-xxxx2:00","xx" : "73478c0f-dc70-46b7-a388-d12f7b8aa91e","xxxx" : "/xxx/xxx","xxx" : "POST","user_agent" : "xxx/6.2.1 xxxx/7.38.0 xxx/7.0xx16-1~xxx+8.1","user_id" : 39,"xxx_ip" : "xxxx.1","xxxx" : "xxxxx","xx" : "xx","app_id" : "d4da4385a8204be2949ed62323231443","axxe" : "POxxkout"},"operation" : {"scxe" : "checkout","rxxxlt" : {"xxxus" : 2x0}},"xx" : {"xxx_id" : "CHTO06MLKXP9N","xxx_attributes" : {"xx" : "2017xx6+02:00","date_xxxxx" : "2xx7-08xx53:06+02:00","xus" : "WAxING","dexxion" : "numx0","chaxxmount" : 2,"chaxx_start" : "20x8xx+02:00","charge_max_count" : 1,"merchant" : {"xxx" : "xxxx","xxx" : "xxxxxxx","xx" : "xx-x xxxxxl.","logo" : "httxxxff0/258xxxjpeg","account_type" : "B"},"xx_xxx" : "xxxx","xxxx_xxx_url" : "https://xxx.xxx.xxx-pay.xx/xxx","xxx" : "xxxx","xxx" : "xx://dp.xx/uxx10/xxxx"}},"cxx" : "xxxx"}
if you want the jsons as python objects you can also do this:
import json
text2 = re.sub(r'\n([^>])', r'\1', text)
js = [json.loads(x) for x in re.findall(r'{.*}', text2)]
Related
I am upgrading from python 2 to 3. This code works in python 2 but not in 3. When I run in python 3 it seems to not get the data into an actual json format plus add's \n and b'.
I believe my python 3 is writing my json file wrongly.
Code to extract json from web url:
def WebService_As_Source(Source_Id):
dst_path = SOURCECONFIG.GLOBAL_WorkPath
bdate = SOURCECONFIG.GLOBAL_DATE
print ("Extracting from Web Service...\t\t" + str(datetime.datetime.now()))
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
uid = 'stack' #save lan your userid ######################################################## enter UserId
pwd = 'overflow'#save your lan password ######################################################## enter Password
top_level_url = SOURCECONFIG.WebServices_URL(Source_Id)
password_mgr.add_password(None, top_level_url, uid, pwd)
handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
# create "opener" (OpenerDirector instance)
opener = urllib.request.build_opener(handler)
# use the opener to fetch a URL
opener.open(top_level_url)
# Now all calls to urllib2.urlopen use our opener.
urllib.request.install_opener(opener)
req = urllib.request.Request(top_level_url)
response = urllib.request.urlopen(req)
data = response.readlines()
otf = open(os.path.join(dst_path + Source_Id+".json"), "w+") # open text file
rowcount = 0
for line in data:
rowcount = rowcount + 1
otf.write(str(line))
otf.close()
print(Source_Id +" json extracted.\t\t"+ str(datetime.datetime.now()))
Sample of my actual Json file python 3 produces:
b'[ {\n'b' "filterFlag" : "",\n'b' "lookup" : "",\n'b' "rule" : "",\n'b' "prefix" : "",\n'b' "validBDRAppName" : "",\n'b' "vendor" : {\n'b' "bookId" : "40302539",\n'b' "bookName" : "NYC",\n'b' "bookStatus" : "ACTIVE",\n'b' "commProductType" : "",\n'b' "businessDate" : "2019-08-05",\n'b' "endOfDay" : null,\n'b' "excludeFromAggregation" : "FALSE",\n'b' "geoLocation" : "",\n'b' "isHoliday" : "",\n'b' "isOSFIBook" : false,\n'b' "legalEntity" : "",\n'b' "location" : "",\n'b' "logicalDate" : "",\n'b' "regulatoryType" : "Trading",\n'b' "reportingLineBookName" : "NYC",\n'b' "reportingLinePathName" : "super/user",\n'b' "riskFilterType" : "USA",\n'b' "statusId" : "",\n'b' "transit" : "",\n'b' "l8n" : ""\n'b' },\n'b' "bdr" : {\n'b' "bookId" : "7447",\n'b' "bookName" : "NY",\n'b' "bookTransit" : "92218",\n'b' "bookStatus" : "ACTIVE",\n'b' "owner" : "",\n'b' "empId" : "",\n'b' "purpose" : "Trading",\n'b' "appName" : "STRATEGY",\n'b' "appCode" : "STRATEGY",\n'b' "transitDesc" : "TOR",\n'b' "appCategory" : "Front Office",\n'b' "bookAppId" : "49512",\n'b' "bookAppName" : "NY",\n'b' "deskName" : "USA",\n'b' "product" : "",\n'b' "asOfDate" : "2019-08-05",\n'b' "legalEntity" : "CANADA",\n'b' "bookAppSecondaryName" : "NY",\n'b' "strategy" : "NY",\n'b' "lhu" : "FCC3",\n'b' "masterBookName" : "NY"\n'b' }\n'b'}, {\n'b' "filterFlag" : "",\n'b' "lookup" : "",\n'b' "rule" : "",\n'b' "prefix" : "",\n'b' "validBDRAppName" : "",\n'b' "vendor" : {\n'b' "bookId" : "40296540",\n'b' "bookName" : "LDN",\n'b' "bookStatus" : "ACTIVE",\n'b' "commProductType" : "",\n'b' "businessDate" : "2019-08-05",\n'b' "endOfDay" : null,\n'b' "excludeFromAggregation" : "FALSE",\n'b' "geoLocation" : "",\n'b' "isHoliday" : "",\n'b' "isOSFIBook" : false,\n'b' "legalEntity" : "",\n'b' "location" : "",\n'b' "logicalDate" : "",\n'b' "regulatoryType" : "Trading",\n'b' "reportingLineBookName" : "LDN",\n'b' "reportingLinePathName" : "stack/overflow",\n'b' "riskFilterType" : "NONE",\n'b' "statusId" : "",\n'b' "transit" : "",\n'b' "l8n" : ""\n'b' },\n'b'
Sample of my Json file python 2 products:
I ran my code to grab the json from the url in python 2 and it gives me the data in the actual json format and doesnt add the b''s and \n's.
[ {
"filterFlag" : "",
"lookup" : "",
"rule" : "",
"prefix" : "",
"validBDRAppName" : "",
"vendor" : {
"bookId" : "40302539",
"bookName" : "NYC",
"bookStatus" : "ACTIVE",
"commProductType" : "",
"businessDate" : "2019-08-06",
"endOfDay" : null,
"excludeFromAggregation" : "FALSE",
"geoLocation" : "",
"isHoliday" : "",
"isOSFIBook" : false,
"legalEntity" : "",
"location" : "",
"logicalDate" : "",
"regulatoryType" : "Trading",
"reportingLineBookName" : "NYC",
"reportingLinePathName" : "super/user",
"riskFilterType" : "USA",
"statusId" : "",
"transit" : "",
"l8n" : ""
},
"bdr" : {
"bookId" : "7447",
"bookName" : "NY",
"bookTransit" : "92218",
"bookStatus" : "ACTIVE",
"owner" : "",
"empId" : "",
"purpose" : "Trading",
"appName" : "STRATEGY",
"appCode" : "STRATEGY",
"transitDesc" : "TOR",
"appCategory" : "Front Office",
"bookAppId" : "49512",
"bookAppName" : "NY",
"deskName" : "USA",
"product" : "",
"asOfDate" : "2019-08-06",
"legalEntity" : "CANADA",
"bookAppSecondaryName" : "NY",
"strategy" : "NY",
"lhu" : "FCC3",
"masterBookName" : "NY"
}
}, {
"filterFlag" : "",
"lookup" : "",
"rule" : "",
"prefix" : "",
"validBDRAppName" : "",
"vendor" : {
"bookId" : "40296540",
"bookName" : "LDN",
"bookStatus" : "ACTIVE",
"commProductType" : "",
"businessDate" : "2019-08-06",
"endOfDay" : null,
"excludeFromAggregation" : "FALSE",
"geoLocation" : "",
"isHoliday" : "",
"isOSFIBook" : false,
"legalEntity" : "",
"location" : "",
"logicalDate" : "",
"regulatoryType" : "Trading",
"reportingLineBookName" : "LDN",
"reportingLinePathName" : "stack/overflow",
"riskFilterType" : "NONE",
"statusId" : "",
"transit" : "",
"l8n" : ""
can anyone help with this?
The problem is that response.readlines() (where response = urllib.request.urlopen(url)) returns a list of bytes. In python 2 bytes and str are the same thing, but on python 3 this is no longer true. So when you did
otf.write(str(line))
the str() call was a no-op on python 2, but on python 3 you called str on a bytes object. This is never what you want to do:
>>> import urllib
... resp = urllib.request.urlopen('https://stackoverflow.com')
... dat = resp.readlines()
... first_line = dat[0]
... print(type(first_line))
... print(repr(first_line))
... print(repr(str(first_line)))
<class 'bytes'>
b'<!DOCTYPE html>\r\n'
"b'<!DOCTYPE html>\\r\\n'"
As you can see, the first line is a bytes object, and str(first_line) is a string that literally starts with a b and some single quotes.
Instead what you have to do is decode your bytes according to its corresponding encoding. I'm not very familiar with web things so I don't know what the best way is to correctly guess the encoding used by the website you're making requests to, but I do know that the third-party requests library can give you a usually correctly decoded json directly from the response.
If with urllib you have to do the decoding manually you need something like
otf.write(line.decode('utf8'))
Python 2 didn't differentiate between byte strings and unicode strings. Python 3 does, which is what the b'' is denoting.
This line
data = response.readlines()
could be
data = response.read().decode(response.headers.get_content_charset()).split('/n')
which should figure out the proper encoding, as per this answer
I am wondering what I am doing wrong when trying to print the data of name of the following code in python.
import urllib.request, json
with urllib.request.urlopen("<THIS IS A URL IN THE ORIGINAL SCRIPT>") as url:
data = json.loads(url.read().decode())
print (data['Departure']['Product']['name'])
print (data['Departure']['Stops']['Stop'][0]['depTime'])
And this is the api I am fetching the data from:
{
"Departure" : [ {
"Product" : {
"name" : "Länstrafik - Buss 201",
"num" : "201",
"catCode" : "7",
"catOutS" : "BLT",
"catOutL" : "Länstrafik - Buss",
"operatorCode" : "254",
"operator" : "JLT",
"operatorUrl" : "http://www.jlt.se"
},
"Stops" : {
"Stop" : [ {
"name" : "Gislaved Lundåkerskolan",
"id" : "740040260",
"extId" : "740040260",
"routeIdx" : 12,
"lon" : 13.530096,
"lat" : 57.298178,
"depTime" : "20:55:00",
"depDate" : "2019-03-05"
}
data["Departure"] is a list, and you are indexing into it like it's a dictionary.
You wrote the dictionary sample confusingly. Here's how I think it looks:
d = {
"Departure" : [ {
"Product" : {
"name" : "Länstrafik - Buss 201",
"num" : "201",
"catCode" : "7",
"catOutS" : "BLT",
"catOutL" : "Länstrafik - Buss",
"operatorCode" : "254",
"operator" : "JLT",
"operatorUrl" : "http://www.jlt.se"
},
"Stops" : {
"Stop" : [ {
"name" : "Gislaved Lundåkerskolan",
"id" : "740040260",
"extId" : "740040260",
"routeIdx" : 12,
"lon" : 13.530096,
"lat" : 57.298178,
"depTime" : "20:55:00",
"depDate" : "2019-03-05"
}]}}]}
And here's how you can print depTime
print(d["Departure"][0]["Stops"]["Stop"][0]["depTime"])
The important part you missed is d["Departure"][0] because d["Departure"] is a list.
As Kyle said in the previous answer, data["Departure"] is a list, but you're trying to use it as a dictionary. There are 2 possible solutions.
Change data["Departure"]["Stops"]["Stop"] etc. to data["Departure"][0]["Stops"]["Stop"] etc.
Change the JSON file to make departure into a dictionary, which would allow you to keep your original code. This would make the final JSON snippet look like this:
"Departure" : {
"Product" : {
"name" : "Länstrafik - Buss 201",
"num" : "201",
"catCode" : "7",
"catOutS" : "BLT",
"catOutL" : "Länstrafik - Buss",
"operatorCode" : "254",
"operator" : "JLT",
"operatorUrl" : "http://www.jlt.se"
},
"Stops" : {
"name" : "Gislaved Lundåkerskolan",
"id" : "740040260",
"extId" : "740040260",
"routeIdx" : 12,
"lon" : 13.530096,
"lat" : 57.298178,
"depTime" : "20:55:00",
"depDate" : "2019-03-05"
}
}
I am not able to extract the "Data" "12639735;7490484;3469776;9164745;650;0"
from this file using python:
In php it's piece of cake for me but I cannot master it in python.
Other answers from Stackexchange didn't give me the answer.
Here is the contents of the file test.json:
{
"ActTime" : 1494535483,
"ServerTime" : "2017-05-11 22:44:43",
"Sunrise" : "05:44",
"Sunset" : "21:14",
"result" : [
{
"AddjMulti" : 1.0,
"AddjMulti2" : 1.0,
"AddjValue" : 0.0,
"AddjValue2" : 0.0,
"BatteryLevel" : 255,
"Counter" : "20130.221",
"CounterDeliv" : "12634.521",
"CounterDelivToday" : "0.607 kWh",
"CounterToday" : "1.623 kWh",
"CustomImage" : 0,
"Data" : "12639735;7490484;3469776;9164745;650;0",
"Description" : "",
"Favorite" : 1,
"HardwareID" : 3,
"HardwareName" : "Slimme Meter",
"HardwareType" : "P1 Smart Meter USB",
"HardwareTypeVal" : 4,
"HaveTimeout" : false,
"ID" : "1",
"LastUpdate" : "2017-05-11 22:44:39",
"Name" : "Elektriciteitsmeter",
"Notifications" : "false",
"PlanID" : "0",
"PlanIDs" : [ 0 ],
"Protected" : false,
"ShowNotifications" : true,
"SignalLevel" : "-",
"SubType" : "Energy",
"SwitchTypeVal" : 0,
"Timers" : "false",
"Type" : "P1 Smart Meter",
"TypeImg" : "counter",
"Unit" : 1,
"Usage" : "650 Watt",
"UsageDeliv" : "0 Watt",
"Used" : 1,
"XOffset" : "0",
"YOffset" : "0",
"idx" : "1"
}
],
"status" : "OK",
"title" : "Devices"
}
This should work
import json
with open('test.json') as f:
contents = json.load(f)
print(contents['result'][0]['Data'])
Similar questions have been asked before: Parsing values from a JSON file using Python?
Got it.
url = "http://192.168.2.1:8080/json.htm?type=devices&rid=1"
response = urllib.urlopen(url)
str = json.loads(response.read())
for i in str["result"]:
datastring = i["Data"]
elementstring = i["Data"].split(';')
counter = 0
for j in elementstring:
if counter == 4:
usage = j
counter += 1
delivery = get_num(i["UsageDeliv"])
I would like to parse the following file and get the values after the "ID" and "Label":
{"data" : [{
"id" : "3743",
"fgColor" : "#000000",
"Comment" : [ "GLIO" ],
"Group" : "0",
"Shape" : "roundrectangle",
"GraphicsName" : "TITLE:Glioma",
"Matching_Attribute" : [ "TITLE:Glioma" ],
"Entry_id" : "78",
"Label" : "TITLE:Glioma",
"EntrezIDs" : "05214, ",
"shared_name" : "path:hsa05214",
"Type" : "map",
"kegg_x" : "86.0",
"kegg_y" : "58.0",
"bgColor" : "#FFFFFF",
"name" : "path:hsa05214",
"SUID" : 3743,
"Height" : "25",
"Width" : "92",
"Link" : "http://www.kegg.jp/dbget-bin/www_bget?hsa05214",
"selected" : false
}]}
I'm using the following code there is nothing being written to the file specified:
import re
cyjs = open("/users/skylake/desktop/cyjs-example.txt", "r")
jsonfile = open("/users/skylake/desktop/jsonfile.txt", "w")
for line in cyjs:
if line.startswith('"id"'):
print(line)
jsonfile.write(line)
jsonfile.close()
There is a better suited for the problem tool.
It is a JSON file which can be parsed with json built-in module:
In [1]: import json
In [2]: with open("data.txt", "r") as f:
...: data = json.load(f)
In [3]: obj = data["data"][0]
In [4]: obj["id"]
Out[4]: u'3743'
In [5]: obj["Label"]
Out[5]: u'TITLE:Glioma'
[
{
"account" : "",
"address" : "D8xWhR8LqSdSLTxRWwouQ3EiSnvcjLmdo6",
"category" : "send",
"amount" : -1000.00000000,
"fee" : -0.00000001,
"confirmations" : 716,
"blockhash" : "4569322b4c8c98fba3ef4c7bda91b53b4ee82d268eae2ff7658bc0d3753c00ff",
"blockindex" : 2,
"blocktime" : 1394242415,
"txid" : "45b629a779e6e0bf6d160c37833a27f1f2cc1bfa34632d166cccae83e69eb6fe",
"time" : 1394242259,
"timereceived" : 1394242259
},
{
"account" : "",
"address" : "DCPFe1fs7qScDFvoTexYqo95LmnJJkjmu7",
"category" : "receive",
"amount" : 0.13370000,
"confirmations" : 717,
"blockhash" : "e9024e177b42ca23fed741fb90c39836de5f9c722a93157e50df2e3f2c318d77",
"blockindex" : 26,
"blocktime" : 1394242412,
"txid" : "ce41b4c35b09ae582436b8138d62375840c32bd9ea0360457bd9f589012d2da3",
"time" : 1394242315,
"timereceived" : 1394242315
},
{
"account" : "",
"address" : "DCPFe1fs7qScDFvoTexYqo95LmnJJkjmu7",
"category" : "receive",
"amount" : 0.00100000,
"confirmations" : 692,
"blockhash" : "17eb2ef40b8bcb2ceb3d7f07d6545f03fc9bf41c8d28f759becd84a31e65e123",
"blockindex" : 14,
"blocktime" : 1394243788,
"txid" : "2b099fd0ce6239c5c3c69e2ba70669c3069858908e42b8ca970bf213e555d715",
"time" : 1394243669,
"timereceived" : 1394243669
},
{
"account" : "",
"address" : "DCPFe1fs7qScDFvoTexYqo95LmnJJkjmu7",
"category" : "send",
"amount" : -0.00100000,
"fee" : -2.00000000,
"confirmations" : 692,
"blockhash" : "17eb2ef40b8bcb2ceb3d7f07d6545f03fc9bf41c8d28f759becd84a31e65e123",
"blockindex" : 14,
"blocktime" : 1394243788,
"txid" : "2b099fd0ce6239c5c3c69e2ba70669c3069858908e42b8ca970bf213e555d715",
"time" : 1394243669,
"timereceived" : 1394243669
}
]
That ^^ is my data.
How am I able to see the entries in "txid" where the "category" == "receive" I also wish to write this line by line to a file called "list.txt"
I'm not interested at all in where "category" == "send"
Thanks :-)
EDIT:
This is my code
with (open('text.json') as f:
data = json.load(f)
my_list = json.load(open("text.json"))
result = sum(item["category"] == "receive" for item in my_list)
i = 0
res = ""
while i < result:
res = data[i]['txid']
if data[i]['category'] == "receive":
with open ("list.txt", "a") as myfile:
myfile.write(res + "\n")
i += 1
import json
object_list = json.load(open('text.json'))
receive_txids = [(x['txid'] + '\n') for x in object_list if x['category'] == 'receive']
output_file = open("list.txt", "a")
output_file.writelines(receive_txids)
Edit, here's a longer but clearer (untested) version:
import json
with open('text.json') as json_file:
object_list = json.load(json_file)
receive_txids = []
for object in object_list:
if object['category'] == 'receive':
receive_txids.append(object['txid'])
with open("list.txt", "a") as output_file:
for txid in receive_txids:
output_file.write(txid + '\n')
Edit: Why am I even building up a list and then doing nothing much with it?
With no intermediate list, the terse version:
import json
object_list = json.load(open('text.json'))
output_file = open("list.txt", "a")
output_file.writelines([(x['txid'] + '\n') for x in object_list if x['category'] == 'receive'])
and the tidier, clearer version:
import json
with open('text.json') as json_file:
object_list = json.load(json_file)
with open("list.txt", "a") as output_file:
for object in object_list:
if object['category'] == 'receive':
output_file.writeline(object['txid'] + '\n')