Search JSON file for specific key using python - python

JSON file:https://1drv.ms/w/s!AizscpxS0QM4hJl99vVfUMvEjgXV3Q
i can extract TECH-XXX
#!/usr/bin/python
import sys
import json
sys.stdout = open('output.txt','wt')
datapath = sys.argv[1]
data = json.load(open(datapath))
for issue in data['issues']:
if len(issue['fields']['subtasks']) == 0:
print(issue['key'])
For every issue without subtasks (TECH-729
TECH-731) i want to extract TECH from
project": {
"avatarUrls": {
"16x16": "https://jira.corp.company.com/secure/projectavatar?size=xsmall&pid=10001&avatarId=10201",
"24x24": "https://jira.corp.company.com/secure/projectavatar?size=small&pid=10001&avatarId=10201",
"32x32": "https://jira.corp.company.com/secure/projectavatar?size=medium&pid=10001&avatarId=10201",
"48x48": "https://jira.corp.company.com/secure/projectavatar?pid=10001&avatarId=10201"
},
"id": "10001",
"key": "TECH",
"name": "Technology",
"self": "https://jira.corp.company.com/rest/api/2/project/10001"
},
and customfield_10107.id
i tried with print(issue['customfield_10107']['id']) and got
./tasks1.py 1.json
Traceback (most recent call last):
File "./tasks1.py", line 11, in <module>
print(issue['customfield_10107']['id'])
KeyError: 'customfield_10107'

key exists under issue and customfield_10107 exists in issue['fields']:
for issue in response["issues"]:
# For issues without subtasks
if len(issue['fields']['subtasks']) == 0:
# Print custom field id
if 'customfield_10107' in issue['fields']:
custom_field = issue['fields']['customfield_10107']
print custom_field['id']
# Print key
if 'key' in issue:
print issue['key']

Related

Json count unique values

I have this json file:
[
{
"#timestamp": "",
"userID": "",
"destinationUserName": "",
"message": ": 12,050",
"name": "Purge Client Events"
},
{
"#timestamp": "",
"userID": "",
"destinationUserName": "",
"message": "",
"name": ""
},
{
"#timestamp": "",
"userID": "",
"destinationUserName": "",
"message": "",
"name": ""
},
{
"#timestamp": "",
"userID": "",
"name": "",
"sourceUserName": "",
"deviceAction": ""
}
]
I am looking for a solution in which I can loop over all the file, and count the unique values for UserID and return that value printed.
I found different solution but non of them worked for me and I am completely stuck.
So far this is my code, its just a formatter that convert the file into a json format.
After that I tried to check the length of the file and loop over it appending unique elements.
import json
to_queue = []
def structure_json():
with open("file.json", "r+") as f:
old = f.read()
f.seek(0) # rewind
# save to the old string after replace
new = old.replace('}{', '},{')
f.write(new)
tmps = '[' + str(new) + ']'
json_string = json.loads(tmps)
for key in json_string:
to_queue.append(key)
f.close
with open('update_2.json', 'w') as file:
json.dump(json_string, file, indent=2)
size=len(file["UserID"])
uniqueNames = [];
for i in range(0,size,1):
if(file["UserID"] not in uniqueNames):
uniqueNames.append(file["UserID"]);
print(uniqueNames)
structure_json()
print(to_queue)
But I get the following error:
Traceback (most recent call last):
File "format.py", line 24, in <module>
structure_json()
File "format.py", line 17, in structure_json
size=len(file["UserID"])
TypeError: '_io.TextIOWrapper' object is not subscriptable
Please any help will be much appreciated. Thank you so much for any help, and if you need any more info just let me know
Open the file and load the content. Then you can iterate over list of dicts and crate set of all values for key userID. Note, if any missing key it will yield None and will affect the count (+1).
import json
with open('your_file.json') as f:
data = json.load(f)
users = set(item.get('userID') for item in data)
print(len(users))
print(users)

Getting TypeError: string indices must be integers. Not Sure Why

Trying to run code where I create empty list by iterating events from json and append userid of any user who did homepage event into the empty list. I'm trying to use conditional parsing to only parse out 'name' key value with pair 'Visited home page' from the events.json
My code is as follows:
import json
#step 1 -- read events file into string
with open('events.json') as events_data:
#step 2 -- convert string to json object (json.load(string)
events_data = json.load(events_data)
#print (events_data['events'][0]['name'])
#print(events_data.keys())
#with open ("data/events.json") as events_file:
#step 3 -- create empty list; iterate events list and append userid of any user who did homepage event into the empty list
#that gives us set of user ids who did homepage event
homeuserid = []
for i in events_data['events']:
homeuserid[i] = i ['name'] ['Visited home page']
print(homeuserid)
However when I go the run the code I get the following error and I am unsure why:
Traceback (most recent call last):
File "run.py", line 34, in <module>
homeuserid[i] = i ['name'] ['Visited home page']
TypeError: string indices must be integers
Sample of JSON (events.json):
{
"events": [
{
"name": "Added item to cart",
"timestamp": 1422119921,
"user_id": "5a5598f2-f7db-420e-9b8e-52a9ad694bc1"
},
{
"name": "Visited product page",
"timestamp": 1409554014,
"user_id": "4683c9b6-3c8b-4215-a401-a9bbfde833ee"
},
{
"name": "Visited about page",
"timestamp": 1430938313,
"user_id": "26a1593b-b17d-4389-aa93-4a1c9c0e9c67"
},
{
"name": "Added item to cart",
"timestamp": 1427447392,
"user_id": "e71f2ee8-09ce-412b-92e1-3c6c0a90dda8"
},
Here's the proper way to do it, there's no reason to process the file twice. Note that the resulting list may be empty if there are no 'Visited home page' events.
import json
filepath = 'events.json'
with open(filepath) as events_file:
events_data = json.load(events_file)
homeuserids = []
for event in events_data['events']:
if event['name'] == 'Visited home page':
homeuserids.append(event['user_id'])
print(homeuserids)

BigQuery external tables with python

How can i create external tables (federated data source) in BigQuery using python (google-cloud-bigquery)?
I know you can use bq commands like this, but that is not how i want to do it:
bq mk --external_table_definition=path/to/json tablename
bq update tablename path/to/schemafile
with external_table_definition as:
{
"autodetect": true,
"maxBadRecords": 9999999,
"csvOptions": {
"skipLeadingRows": 1
},
"sourceFormat": "CSV",
"sourceUris": [
"gs://bucketname/file_*.csv"
]
}
and a schemafile like this:
[
{
"mode": "NULLABLE",
"name": "mycolumn1",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "mycolumn2",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "mycolumn3",
"type": "STRING"
}
]
Thank you for your help!
Lars
table_id = 'table1'
table = bigquery.Table(dataset_ref.table(table_id), schema=schema)
external_config = bigquery.ExternalConfig('CSV')
external_config = {
"autodetect": true,
"options": {
"skip_leading_rows": 1
},
"source_uris": [
"gs://bucketname/file_*.csv"
]
}
table.external_data_configuration = external_config
table = client.create_table(table)
Schema Format is :
schema = [
bigquery.SchemaField(name='mycolumn1', field_type='INTEGER', is_nullable=True),
bigquery.SchemaField(name='mycolumn2', field_type='STRING', is_nullable=True),
bigquery.SchemaField(name='mycolumn3', field_type='STRING', is_nullable=True),
]
I know this is well after the question has been asked and answered, but the above accepted answer does not work. I attempted to do the same thing you are describing and additionally trying to use the same approach to update an existing external table who added some new columns. This would be the correct snippet to use assuming you have that JSON file stored somewhere like /tmp/schema.json
[
{
"mode": "NULLABLE",
"name": "mycolumn1",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "mycolumn2",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "mycolumn3",
"type": "STRING"
}
]
You should simply need to have the following if you already have the API representation of the options you want to add to the external table.
from google.cloud import bigquery
client = bigquery.Client()
# dataset must exist first
dataset_name = 'some_dataset'
dataset_ref = client.dataset(dataset_name)
table_name = 'tablename'
# Or wherever your json schema lives
schema = client.schema_from_json('/tmp/schema.json')
external_table_options = {
"autodetect": True,
"maxBadRecords": 9999999,
"csvOptions": {
"skipLeadingRows": 1
},
"sourceFormat": "CSV",
"sourceUris": [
"gs://bucketname/file_*.csv"
]
}
external_config = client.ExternalConfig.from_api_repr(external_table_options)
table = bigquery.Table(dataset_ref.table(table_name), schema=schema)
table.external_data_configuration = external_config
client.create_table(
table,
# Now you can create the table safely with this option
# so that it does not fail if the table already exists
exists_od=True
)
# And if you seek to update the table's schema and/or its
# external options through the same script then use
client.update_table(
table,
# As a side note, this portion of the code had me confounded for hours.
# I could not for the life of me figure our that "fields" did not point
# to the table's columns, but pointed to the `google.cloud.bigquery.Table`
# object's attributes. IMHO, the naming of this parameter is horrible
# given "fields" are already a thing (i.e. `SchemaField`s).
fields=['schema', 'external_data_configuration'])
)
In addition to setting the external table configuration using the API representation, you can set all of the same attributes by calling the names of those attributes on the bigquery.ExternalConfig object itself. So this would be another approach surrounding just the external_config portion of the code above.
external_config = bigquery.ExternalConfig('CSV')
external_config.autodetect = True
external_config.max_bad_records = 9999999
external_config.options.skip_leading_rows = 1
external_config.source_uris = ["gs://bucketname/file_*.csv"]
I must again however raise some frustration with the Google documentation. The bigquery.ExternalConfig.options attribute claims that it can be set with a dictionary
>>> from google.cloud import bigquery
>>> help(bigquery.ExternalConfig.options)
Help on property:
Optional[Dict[str, Any]]: Source-specific options.
but that is completely false. As you can see above the python object attribute names and the API representation names of those same attributes are slightly different. Either way you try it though, if you had a dict of the source-specific options (e.g. CSVOptions, GoogleSheetsOptions, BigTableOptions, etc...) and attempted to pass that dict as the options attribute, it laughs in your face and says mean things like this.
>>> from google.cloud import bigquery
>>> external_config = bigquery.ExternalConfig('CSV')
>>> options = {'skip_leading_rows': 1}
>>> external_config.options = options
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: cant set attribute
>>> options = {'skipLeadingRows': 1}
>>> external_config.options = options
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: cant set attribute
>>> options = {'CSVOptions': {'skip_leading_rows': 1}}
>>> external_config.options = options
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: cant set attribute
>>> options = {'CSVOptions': {'skipLeadingRows': 1}}
>>> external_config.options = options
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: cant set attribute
The workaround was iterating over the options dict and using the __setattr__() method on the options which worked well for me. Pick you favorite approach from above. I have tested all of this code and will be using it for some time.

Aggregate not returning results, with user authentication in MongoDB

The issue I'm having, I cant make the aggregate search work. I have been able to run it in a mongodb without any users, however in the mongodb with users (even when authentication is not turned on) i get this weird error message (see below). Whats weirder is I have no problem with other functions for example find().count() witch as you can see below returns 32 (the number of results in the database).
Code in MongoDB shell to create user.
use admin
db.createUser( { user: "VibrationDataCollector",
pwd: '45.',
roles: [ "readWriteAnyDatabase",
"dbAdminAnyDatabase",
"clusterAdmin" ] } )
Code in python to conduct search
client = MongoClient('mongodb://xx.x.x.xx:xxxxx/')
db = client['VibrationDataDB']
db.authenticate('VibrationDataCollector', '45.', source='admin')
coll = db.VibrationData
hello = coll.find().count()
print 'count=', hello
LastWrite_Time = coll.aggregate([{
"$unwind": "$Records"
}, {
"$redact": {
"$cond": [{
"$anyElementTrue": {
"$map": {
"input": "$Records.Properties",
"as": "result",
"in": {
"$and": [{
"$eq": ["$$result.Property.Name", "LastWrite_User"]
}, {
"$eq": ["$$result.value", "42"]
}]
}
}
}
},
"$$KEEP",
"$$PRUNE"
]
}
}, {
"$unwind": "$Records.Properties"
}, {
"$match": {
"Records.Properties.Property.Name": 'LastWrite_Time'
}
}, {
"$project": {
"_id": 0,
"value": "$Records.Properties.value"
}
}])
list1 = LastWrite_Time['result']
for T in list1:
print T['value']
Result
count= 32
Traceback (most recent call last):
File "C:/Python_scripts/SimulationCEI.py", line 64, in <module>
list1 = LastWrite_Time['result']
TypeError: 'CommandCursor' object has no attribute '__getitem__'
UPDATEEEEE!!!!
using next()
count= 32
Traceback (most recent call last):
File "C:/Python_scripts/SimulationCEI.py", line 64, in <module>
list1 = LastWrite_Time['result']
TypeError: 'CommandCursor' object has no attribute '__getitem__'
>>> next()
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
next()
TypeError: next expected at least 1 arguments, got 0
>>>
You want:
for T in LastWrite_Time:
print T['value']
"aggregate" returns a cursor that must be iterated. I'm certain that authentication is not related to the error you see.

How do you parse this JSON in Python?

JSON below
{"result":[
{
"spawn_point_id":"89",
"encounter_id":"1421",
"expiration_timestamp_ms":"1470105387836",
"latitude":38.22,
"longitude": -91.27
},
{
"distance_in_meters":10,
"encounter_id":"9677"
},
{
"distance_in_meters":10,
"encounter_id":"1421"
},
{
"spawn_point_id":"11",
"encounter_id":"2142",
"expiration_timestamp_ms":"1470105387444",
"latitude":38.00,
"longitude": -91.00
}
]}
and i want the output to look like
spawn 89 at lat 38.22 long -91.27
spawn 11 at lat 38.00 long -91.00
i used json.loads and it actually makes the json look funky.
Code so far below:
c = json.loads(r.content)
for d in c['result']:
if d['latitude'] is not None:
print(str(d['latitude']))
seems to kind of work but then get error
Traceback (most recent call last):
File "fast0.py", line 11, in <module>
if d['latitude'] is not None:
KeyError: 'latitude'
You are looking for a key that does not exist. Try:
c = json.loads(r.content)
for d in c['result']:
if 'latitude' in d:
print(str(d['latitude']))

Categories

Resources