how to add fields in google cloud fire store using python - python

i have two Json data data 1 and data 2 and i want to merge it in fire store document when i try to merge two dicts only half part is showing .
data = {
u'name': u'Los Angeles',
u'state': u'CA',
u'country': u'USA'
}
data2= {
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
}
db.collection(u'cities').document(u'LA').set(data)
city_ref = db.collection(u'cities').document(u'LA')
city_ref.set({
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
}, merge=True)
#only this part is showing
{
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
}
#when i, doing this
data = {
u'name': u'Los Angeles',
u'state': u'CA',
u'country': u'USA',
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'}
#only this much part is showing in my field
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
is there any way to merge this two in python

You can only have one value per field name. Merge is useful when you want to add additional fields to the same document, for example:
data = {
u'name': u'Los Angeles',
u'state': u'CA',
u'country': u'USA'
}
db.collection(u'cities').document(u'LA').set(data)
city_ref = db.collection(u'cities').document(u'LA')
city_ref.set({
u'name2': u'MIAMI',
u'state2': u'CA',
u'country2': u'USA'
}, merge=True)
# Result
data = {
u'country': u'USA',
u'country2': u'USA',
u'name': u'Los Angeles',
u'name2': u'MIAMI',
u'state': u'CA',
u'state2': u'CA',}

You can use this code( it will add the new fields if they don't exist and update them if they exist already).
doc.reference.update({
u'newField1': newValue1
u'newField2': newValue2
})

Related

I am trying to get answer in json format but it simply returns a String.What am i doing wrong?

I am trying to get answer in json format but it simply returns a String.What am i doing wrong?
from flask import Flask, request, Response
import mysql.connector
import json
app = Flask(__name__)
def createConnection():
connection = mysql.connector.connect(user="akshay", password="akshay", host="localhost", database="BOOKS")
return connection
#app.route("/contact")
def getContacts():
print(request)
connection = createConnection()
cursor = connection.cursor()
cursor.execute("SELECT ID, NAME, AUTHOR, SUBJECT, PRICE FROM BOOKS;")
contacts = []
for (ID, NAME, AUTHOR, SUBJECT, PRICE) in cursor:
contacts.append({
'ID': ID,
'NAME': NAME,
"AUTHOR": AUTHOR,
"SUBJECT": SUBJECT,
"PRICE": PRICE
})
cursor.close()
connection.close()
response = Response()
response.content_type ='application/json'
response.data = json.loads(json.dumps(contacts))
return response
app.run('0.0.0.0', 4000)
And this is Output::
[{u'PRICE': 9999.999, u'AUTHOR': u'AKSHAY', u'NAME': u'AKSHAY', u'ID':
4, u'SUBJECT': u'BIG_DATA'}, {u'PRICE': 123.456, u'AUTHOR': u'Yashwant
Kanetkar', u'NAME': u'Exploring C', u'ID': 1001, u'SUBJECT': u'C
Programming'}, {u'PRICE': 371.019, u'AUTHOR': u'Yashwant Kanetkar',
u'NAME': u'Pointers in C', u'ID': 1002, u'SUBJECT': u'C Programming'},
{u'PRICE': 334.215, u'AUTHOR': u'E Balaguruswami', u'NAME': u'ANSI C
Programming', u'ID': 1003, u'SUBJECT': u'C Programming'}, {u'PRICE':
140.121, u'AUTHOR': u'Dennis Ritchie', u'NAME': u'ANSI C Programming', u'ID': 1004, u'SUBJECT': u'C Programming'}, {u'PRICE': 417.764,
u'AUTHOR': u'Herbert Schildt', u'NAME': u'C++ Complete Reference',
u'ID': 2001, u'SUBJECT': u'C++ Programming'}, {u'PRICE': 620.665,
u'AUTHOR': u'Stanley Lippman', u'NAME': u'C++ Primer', u'ID': 2002,
u'SUBJECT': u'C++ Programming'}, {u'PRICE': 987.213, u'AUTHOR':
u'Bjarne Stroustrup', u'NAME': u'C++ Programming Language', u'ID':
2003, u'SUBJECT': u'C++ Programming'}, {u'PRICE': 525.121, u'AUTHOR':
u'Herbert Schildt', u'NAME': u'Java Complete Reference', u'ID': 3001,
u'SUBJECT': u'Java Programming'}, {u'PRICE': 575.651, u'AUTHOR': u'Cay
Horstmann', u'NAME': u'Core Java Volume I', u'ID': 3002, u'SUBJECT':
u'Java Programming'}, {u'PRICE': 458.238, u'AUTHOR': u'James Gosling',
u'NAME': u'Java Programming Language', u'ID': 3003, u'SUBJECT': u'Java
Programming'}, {u'PRICE': 567.391, u'AUTHOR': u'Peter Galvin',
u'NAME': u'Operatig System Concepts', u'ID': 4001, u'SUBJECT':
u'Operating Systems'}, {u'PRICE': 421.938, u'AUTHOR': u'Mauris J
Bach', u'NAME': u'Design of UNIX Operating System', u'ID': 4002,
u'SUBJECT': u'Operating Systems'}, {u'PRICE': 352.822, u'AUTHOR':
u'Uresh Vahalia', u'NAME': u'UNIX Internals', u'ID': 4003, u'SUBJECT':
u'Operating Systems'}, {u'PRICE': 872.652, u'AUTHOR': u'Ayn Rand',
u'NAME': u'Atlas Shrugged', u'ID': 8001, u'SUBJECT': u'Novell'},
{u'PRICE': 9999.999, u'AUTHOR': u'AKSHAY', u'NAME': u'AKSHAY', u'ID':
9001, u'SUBJECT': u'BIG_DATA'}]
There is nothing particularly wrong with your code, just remove the call to json.loads() so that the JSON encoded data is used as the response data.
Calling json.loads() will recreate and pass a Python data structure (list of dictionaries in this case) to the response, which will not serialise as valid JSON. In Python 2 it will serialise using the Python repr() format as produced by str() which will not always produce a JSON compatible string (the u'' string prefix for example is not valid JSON). In Python 3 is will probably cause an exception because it won't be able to convert to a bytes object.
You could improve the code by using flask.json.jsonify() which will return a Response object with the correct content type header and a JSON body:
from flask.json import jsonify
#app.route("/contact")
def getContacts():
# create contacts...
return jsonify(contacts)
Ok ,finally got the proper output::
/ 20171119101107
// http://localhost:4000/contact
[
{
"SUBJECT": "BIG_DATA",
"PRICE": 9999.999,
"AUTHOR": "AKSHAY",
"ID": 4,
"NAME": "AKSHAY"
},
{
"SUBJECT": "C Programming",
"PRICE": 123.456,
"AUTHOR": "Yashwant Kanetkar",
"ID": 1001,
"NAME": "Exploring C"
},
{
"SUBJECT": "C Programming",
"PRICE": 371.019,
"AUTHOR": "Yashwant Kanetkar",
"ID": 1002,
"NAME": "Pointers in C"
},
I removed json.loads() and installed json viewer plugin.Thank you very much for help.

SugarCRM call records REST API

I am trying to get the call records from my SugarCRM account using the REST API and I am using Python.
There I want to obtain all the attendees but all I get is the user to whom the call is assigned.
u'assigned_user_id': u'xxxxxxxx',
The response I've received is,
{u'created_by_link': {u'id': u'1', u'full_name': u'adminx', u'_acl': {u'fields': {u'last_login': {u'write': u'no', u'create': u'no'}, u'pwd_last_changed': {u'write': u'no', u'create': u'no'}}, u'_hash': u'xxxx', u'delete': u'no'}}, u'dri_workflow_task_template_link': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'', u'id': u''},
u'customer_journey_points': 10,
u'dri_subworkflow_id': u'',
u'recurrence_id': u'',
u'created_by_name': u'adminx',
u'date_end': u'2018-05-02T09:45:00+00:00',
u'dri_subworkflow_template_id': u'',
u'parent_type': u'Accounts',
u'contact_id': u'xxxx',
u'_acl': {u'fields': {}},
u'duration_minutes': 30,
u'tag': [],
u'assigned_user_name': u'xxxx',
u'repeat_ordinal': u'',
u'repeat_count': None,
u'contact_name': u'xxxx',
u'repeat_interval': 1, u'id': u'xxxx', > - u'parent_name': u'ABC',
u'customer_journey_parent_activity_id': u'',
u'date_entered': u'2017-07-17T12:49:23+00:00',
u'outlook_id': u'',
u'team_name': [{u'name_2': u'', u'selected': False, u'primary': True, u'id': u'1', u'name': u'Global'}, {u'name_2': u'', u'selected': False, u'primary': False, u'id': u'West', u'name': u'West'}],
u'contacts': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'xxx', u'id': u'xxx'},
u'dri_workflow_task_template_id': u'',
u'customer_journey_score': None,
u'date_start': u'2018-05-02T09:15:00+00:00',
u'reminder_checked': u'',
u'dri_workflow_sort_order': u'1',
u'created_by': u'1',
u'parent_id': u'xxxx',
u'dri_subworkflow_template_link': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'', u'id': u''},
u'dri_subworkflow_name': u'',
u'dri_subworkflow_link': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'', u'id': u''},
u'modified_by_name': u'adminx',
u'repeat_selector': u'',
u'email_reminder_sent': False,
u'dri_workflow_template_id': u'',
u'status': u'Not Held',
u'direction': u'Outbound',
u'accept_status_users': u'',
u'repeat_dow': u'',
u'description': u'',
u'parent': {u'type': u'Accounts', u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'XYZ Funding Inc', u'id': u'xxxx'},
u'repeat_unit': u'',
u'deleted': False,
u'is_customer_journey_parent_activity': False,
u'customer_journey_parent_activity_type': u'',
u'locked_fields': [],
u'email_reminder_time': -1,
u'following': False,
u'assigned_user_link': {u'id': u'xxxx', u'full_name': u'xxxx', u'_acl': {u'fields': [], u'_hash': u'xxxx'}},
u'repeat_type': u'',
u'assigned_user_id': u'seed_sally_id',
u'team_count_link': {u'team_count': u'', u'id': u'1', u'_acl': {u'fields': [], u'_hash': u'xxxx'}},
u'dri_workflow_task_template_name': u'',
u'modified_user_link': {u'id': u'1', u'full_name': u'adminx', u'_acl': {u'fields': {u'last_login': {u'write': u'no', u'create': u'no'}, u'pwd_last_changed': {u'write': u'no', u'create': u'no'}}, u'_hash': u'xxx', u'delete': u'no'}},
u'email_reminder_checked': u'',
u'_module': u'Calls',
u'modified_user_id': u'1',
u'repeat_until': u'',
u'name': u'test',
u'date_modified': u'2017-07-17T12:49:23+00:00',
u'accept_status': u'',
u'reminder_time': -1,
u'customer_journey_progress': 0,
u'dri_workflow_template_name': u'',
u'my_favorite': False,
u'dri_subworkflow_template_name': u'',
u'dri_workflow_template_link': {u'_acl': {u'fields': [], u'_hash': u'xxx'}, u'name': u'', u'id': u''}, > - u'set_accept_links': u'',
u'repeat_days': u'',
u'is_customer_journey_activity': False,
u'repeat_parent_id': u'',
u'team_count': u'',
u'duration_hours': 0,
u'recurring_source': u''},
Strangely, the object which contains the list of "Guests" is not present in the standard GET request i.e.
https://{INSTANCE}/rest/v10/Calls/{RECORD_ID}
After doing some troubleshooting, and looking at the model in the web application itself, I found that the "Guests" field in the GUI ties back to a model property called "invitees".
Running a web request specifically referencing this field provides you with an array of records linked to the Call ID. So try running a GET request to this endpoint:
https://{INSTANCE}/rest/v10/Calls/{RECORD_ID}?fields=invitees
This should provide you with JSON akin to the below:
{
"id": "ec041f60-72b1-11e7-89f0-00163ef1f82f",
"date_modified": "2017-08-08T12:26:47+00:00",
"invitees": {
"records": [
{
"id": "cf378211-2b38-4fe5-949b-a53040717f04",
"date_modified": "2017-08-01T16:12:48+00:00",
"_acl": {
"fields": {}
},
"_module": "Users",
"_link": "users"
},
{
"id": "fe1740e6-3fa4-11e7-8fef-00163ef1f82f",
"date_modified": "2017-05-23T10:45:52+00:00",
"_acl": {
"fields": {}
},
"_module": "Contacts",
"_link": "contacts"
},
{
"id": "dcc526fc-72b1-11e7-a6dd-00163ef1f82f",
"date_modified": "2017-07-27T09:57:21+00:00",
"_acl": {
"fields": {}
},
"_module": "Leads",
"_link": "leads"
},
{
"id": "89f8a6d1-7df0-0e0b-3568-58a5bb6ecf34",
"date_modified": "2017-04-06T10:36:16+00:00",
"_acl": {
"fields": {}
},
"_module": "Leads",
"_link": "leads"
}
],
"next_offset": {
"contacts": -1,
"leads": -1,
"users": -1
}
},
"_acl": {
"fields": {}
},
"contact_name": "test",
"_module": "Calls"
}

Accessing value boto3 dictionary response

I'm having an error trying to get a value out of data returned from boto3. I'm able to print the entire response (see below) but can't figure out what I need to do to get the NetworkInterfaceId out of response.
I'm running this in Python 2.7.5 because that's what the instances that need to run it have by default. I'm new to python so hope I'm missing some simple, thanks for your help!
Error
TypeError: list indices must be integers, not str
Code
#!/usr/bin/python
import boto3
ec2 = boto3.client('ec2')
response = ec2.describe_route_tables(
RouteTableIds=[
"rtb-4a1efc23",
],
Filters=[
{
'Name': 'route.destination-cidr-block',
'Values': [
"172.29.0.0/16",
]
},
]
)
#print(response)
print(response["RouteTables"][0]["Routes"]["NetworkInterfaceId"])
Response
{'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': 'a8e7ba60-7599-450a-a708-8d90e429d59e', 'HTTPHeaders': {'transfer-encoding': 'chunked', 'vary': 'Accept-Encoding', 'server': 'AmazonEC2', 'content-type': 'text/xml;charset=UTF-8', 'date': 'Wed, 08 Feb 2017 11:51:47 GMT'}}, u'RouteTables': [{u'Associations': [{u'SubnetId': 'subnet-d7040aaf', u'RouteTableAssociationId': 'rtbassoc-867a94ef', u'Main': False, u'RouteTableId': 'rtb-4a1efc23'}, {u'SubnetId': 'subnet-e0fcd3aa', u'RouteTableAssociationId': 'rtbassoc-9f7a94f6', u'Main': False, u'RouteTableId': 'rtb-4a1efc23'}], u'RouteTableId': 'rtb-4a1efc23', u'VpcId': 'vpc-0d00e264', u'PropagatingVgws': [{u'GatewayId': 'vgw-fcf479cc'}], u'Tags': [{u'Value': 'pub', u'Key': 'Name'}], u'Routes': [{u'GatewayId': 'local', u'DestinationCidrBlock': '172.28.0.0/16', u'State': 'active', u'Origin': 'CreateRouteTable'}, {u'Origin': 'CreateRoute', u'DestinationCidrBlock': '172.29.0.0/16', u'InstanceId': 'i-0b84e502d9dc49443', u'NetworkInterfaceId': 'eni-08f55373', u'State': 'active', u'InstanceOwnerId': '444456106883'}, {u'Origin': 'CreateRoute', u'DestinationCidrBlock': '172.31.0.0/16', u'InstanceId': 'i-0b84e502d9dc49443', u'NetworkInterfaceId': 'eni-08f55373', u'State': 'active', u'InstanceOwnerId': '444456106883'}, {u'GatewayId': 'igw-7b03e012', u'DestinationCidrBlock': '0.0.0.0/0', u'State': 'active', u'Origin': 'CreateRoute'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.114.112.192/27', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.114.210.160/27', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.138.172.32/27', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.138.172.96/27', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.114.105.128/26', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.115.80.0/26', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.115.131.0/26', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.138.17.128/26', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.138.83.64/26', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.138.180.128/26', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}, {u'GatewayId': 'vgw-fcf479cc', u'DestinationCidrBlock': '10.192.0.0/16', u'State': 'active', u'Origin': 'EnableVgwRoutePropagation'}]}]}
From the documentation, you can find that Routes is a list. If you want to fetch the NetworkInterfaceId, you should loop through Routes.
for route in response["RouteTables"][0]['Routes']:
if 'NetworkInterfaceId' in route:
print route['NetworkInterfaceId']
Note that NetworkInterfaceId may or may not be present in the response. I figured that out from the response you have pasted here.

Joining Keywords in a list NYT

For each article that a keyword list is returned for. We want to join all the words using the key --> values into a list, as shown below. I would like to strip out the ‘u’ from the list, before I do the append. Then we want to compare how many common words in both list and return the result.
Example lists returned from dic['keywords']:
Article one returns:
[
{
u'value': u'Dunford, Joseph F Jr',
u'name': u'persons',
u'rank': u'1'
},
{
u'value': u'Afghanistan',
u'name': u'glocations',
u'rank': u'1'
},
{
u'value': u'Afghan National Police',
u'name': u'organizations',
u'rank': u'1'
},
{
u'value': u'Afghanistan War (2001- )',
u'name': u'subject',
u'rank': u'1'
},
{
u'value': u'Defense and Military Forces',
u'name': u'subject',
u'rank': u'2'
}
]
Article two returns:
[
{
u'value': u'Gall, Carlotta',
u'name': u'persons',
u'rank': u'1'
},
{
u'value': u'Gannon, Kathy',
u'name': u'persons',
u'rank': u'2'
},
{
u'value': u'Niedringhaus, Anja (1965-2014)',
u'name': u'persons',
u'rank': u'3'
},
{
u'value': u'Kabul (Afghanistan)',
u'name': u'glocations',
u'rank': u'2'
},
{
u'value': u'Afghanistan',
u'name': u'glocations',
u'rank': u'1'
},
{
u'value': u'Afghan National Police',
u'name': u'organizations',
u'rank': u'1'
},
{
u'value': u'Afghanistan War (2001- )',
u'name': u'subject',
u'rank': u'1'
}
]
Desired Output:
List1 = ['Dunford, Joseph F Jr',’ Afghanistan’, ‘Afghan National Police’, ‘: Afghanistan War (2001- )’, ‘Defense and Military Forces’]
List2 = [‘Gall, Carlotta'’,’ u'Gannon, Kathy',’ Niedringhaus, Anja (1965-2014)’,’Afghanistan’]
Keywords in common: 2
My Code is as follows:
from flask import Flask, render_template, request, session, g, redirect, url_for
from nytimesarticle import articleAPI
api = articleAPI('X')
articles = api.search( q = 'Afghan War',
fq = {'headline':'', 'source':['Reuters','AP', 'The New York Times']},
begin_date = 20111231 )
def parse_articles(articles):
'''
This function takes in a response to the NYT api and parses
the articles into a list of dictionaries
'''
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
dic['keywords'] = i['keywords']
print dic['keywords']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
# locations
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
# subject
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
print(parse_articles(articles))
You can use list comprehension to build lists from the given dict.
d = [{u'value': u'Dunford, Joseph F Jr', u'name': u'persons', u'rank': u'1'}, {u'value': u'Afghanistan', u'name': u'glocations', u'rank': u'1'}, {u'value': u'Afghan National Police', u'name': u'organizations', u'rank': u'1'}, {u'value': u'Afghanistan War (2001- )', u'name': u'subject', u'rank': u'1'}, {u'value': u'Defense and Military Forces', u'name': u'subject', u'rank': u'2'}]
print [v['value'] for v in d] # prints [u'Dunford, Joseph F Jr', u'Afghanistan', u'Afghan National Police', u'Afghanistan War (2001- )', u'Defense and Military Forces']

extracting hashtags out of Twitter trending topics data with Python Tweepy

I'm having a following problem:
using the Twitter API and tweepy module, I want to monitor the trending topics and extract hashtags out of the data.
This code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import tweepy, json
CONSUMER_KEY = 'key'
CONSUMER_SECRET = 'secret'
ACCESS_KEY = 'key'
ACCESS_SECRET = 'secret'
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)
trends1 = api.trends_place(1)
print trends1
gives me data about globally trending topics that is structured like this:
[{u'created_at': u'2014-04-16T12:13:15Z', u'trends': [{u'url': u'http://twitter.com/search?q=%22South+Korea%22', u'query': u'%22South+Korea%22', u'name': u'South Korea', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%23FETUSONEDIRECTIONDAY', u'query': u'%23FETUSONEDIRECTIONDAY', u'name': u'#FETUSONEDIRECTIONDAY', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%23PrayForSouthKorea', u'query': u'%23PrayForSouthKorea', u'name': u'#PrayForSouthKorea', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%23GaraGaraRP', u'query': u'%23GaraGaraRP', u'name': u'#GaraGaraRP', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%23%D8%A5%D8%B3%D9%85_%D8%A3%D9%85%D9%8A_%D8%A8%D8%AC%D9%88%D8%A7%D9%84%D9%8A', u'query': u'%23%D8%A5%D8%B3%D9%85_%D8%A3%D9%85%D9%8A_%D8%A8%D8%AC%D9%88%D8%A7%D9%84%D9%8A', u'name': u'#\u0625\u0633\u0645_\u0623\u0645\u064a_\u0628\u062c\u0648\u0627\u0644\u064a', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%23Kad%C4%B1nlarKamyon%C5%9Eof%C3%B6r%C3%BCOlursa', u'query': u'%23Kad%C4%B1nlarKamyon%C5%9Eof%C3%B6r%C3%BCOlursa', u'name': u'#Kad\u0131nlarKamyon\u015eof\xf6r\xfcOlursa', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%22Dear+My+BestFriend%22', u'query': u'%22Dear+My+BestFriend%22', u'name': u'Dear My BestFriend', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%22%D0%A1%D0%B0%D0%BC%D0%BE%D0%BE%D0%B1%D0%BE%D1%80%D0%BE%D0%BD%D0%B0+100%22', u'query': u'%22%D0%A1%D0%B0%D0%BC%D0%BE%D0%BE%D0%B1%D0%BE%D1%80%D0%BE%D0%BD%D0%B0+100%22', u'name': u'\u0421\u0430\u043c\u043e\u043e\u0431\u043e\u0440\u043e\u043d\u0430 100', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=%22If+I+Stay%22', u'query': u'%22If+I+Stay%22', u'name': u'If I Stay', u'promoted_content': None}, {u'url': u'http://twitter.com/search?q=Gabashvili', u'query': u'Gabashvili', u'name': u'Gabashvili', u'promoted_content': None}], u'as_of': u'2014-04-16T12:20:29Z', u'locations': [{u'woeid': 1, u'name': u'Worldwide'}]}]
Is this a python list, containing several dictionaries? How can I extract hashtags out of that data and save them into new variables?
I'm new to python so please explain your choices.
Thanks!
In your example you have a single entry in your list, consisting of nested dicts with key value 'trends' each value is a another dict, the one you are interested in is 'name' and in particular if it starts with '#':
In [180]:
[x for x in temp[0]['trends'] if x['name'].find('#') ==0]
Out[180]:
[{'name': '#FETUSONEDIRECTIONDAY',
'promoted_content': None,
'query': '%23FETUSONEDIRECTIONDAY',
'url': 'http://twitter.com/search?q=%23FETUSONEDIRECTIONDAY'},
{'name': '#PrayForSouthKorea',
'promoted_content': None,
'query': '%23PrayForSouthKorea',
'url': 'http://twitter.com/search?q=%23PrayForSouthKorea'},
{'name': '#GaraGaraRP',
'promoted_content': None,
'query': '%23GaraGaraRP',
'url': 'http://twitter.com/search?q=%23GaraGaraRP'},
{'name': '#إسم_أمي_بجوالي',
'promoted_content': None,
'query': '%23%D8%A5%D8%B3%D9%85_%D8%A3%D9%85%D9%8A_%D8%A8%D8%AC%D9%88%D8%A7%D9%84%D9%8A',
'url': 'http://twitter.com/search?q=%23%D8%A5%D8%B3%D9%85_%D8%A3%D9%85%D9%8A_%D8%A8%D8%AC%D9%88%D8%A7%D9%84%D9%8A'},
{'name': '#KadınlarKamyonŞoförüOlursa',
'promoted_content': None,
'query': '%23Kad%C4%B1nlarKamyon%C5%9Eof%C3%B6r%C3%BCOlursa',
'url': 'http://twitter.com/search?q=%23Kad%C4%B1nlarKamyon%C5%9Eof%C3%B6r%C3%BCOlursa'}]
EDIT
To get just the hastags:
In [181]:
[x['name'] for x in temp[0]['trends'] if x['name'].find('#') ==0]
Out[181]:
['#FETUSONEDIRECTIONDAY',
'#PrayForSouthKorea',
'#GaraGaraRP',
'#إسم_أمي_بجوالي',
'#KadınlarKamyonŞoförüOlursa']
You can use startswith instead of find:
[x['name'] for x in temp[0]['trends'] if x['name'].startswith('#')]
Your data is a list containing one dictionary. One of the keys in this dictionary is called trends. The value for this key is a list of dictionaries. Each of these dictionaries contains a key called name, which holds a string containing a hashtag. Here's an example of accessing your data:
hashtags = []
trends = data[0]['trends']
for trend in trends:
name = trend['name']
if name.startswith('#'):
hashtags.append(name)
This can be compacted to:
hashtags = [trend['name'] for trend in data[0]['trends'] if trend['name'].startswith('#')]
First three lines of output:
>>> for hashtag in hashtags:
print(hashtag)
#FETUSONEDIRECTIONDAY
#PrayForSouthKorea
#GaraGaraRP

Categories

Resources