Python Tweepy no response on Stream - python

Hello i try to listen on a tweet channel using python with libary Tweepy.
I use python 2.7.11 and install Tweepy using pip. When i run the following code i get no response an no error. Can you tell me what the problem is and how can i fix this:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
import json
#EDITED 13:25
from tweepy.auth import API
# Twitter Credentials
ckey = 'Consumer Key (API Key)'
csecret = 'Consumer Secret (API Secret)'
atoken = 'Access Token'
asecret = 'Access Token Secret'
class listener(StreamListener):
def on_data(self, data):
try:
tweet = json.loads(data)
if tweet["lang"] == "nl":
print tweet["id"]
return True
except BaseException, e:
print 'failed on_date,', str(e)
time.sleep(5)
def on_error(self, status):
print status
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
#EDITED 13:25
print api.verify_credentials()
# twitterStream.filter( track=lstZoekwaarde, languages="nl" )
twitterStream.filter(track='christmas', languages="nl")
CONSOLE: api.verify_credentials()
User(follow_request_sent=False, has_extended_profile=False, profile_use_background_image=True, _json={u'follow_request_sent': False, u'has_extended_profile': False, u'profile_use_background_image': True, u'default_profile_image': False, u'id': 169505005, u'profile_background_image_url_https': u'https://abs.twimg.com/images/themes/theme1/bg.png', u'verified': False, u'translator_type': u'none', u'profile_text_color': u'333333', u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/1425063736/image_normal.jpg', u'profile_sidebar_fill_color': u'DDEEF6', u'entities': {u'description': {u'urls': []}}, u'followers_count': 7, u'profile_sidebar_border_color': u'C0DEED', u'id_str': u'169505005', u'profile_background_color': u'C0DEED', u'listed_count': 0, u'status': {u'contributors': None, u'truncated': False, u'text': u'aan het werk bij Alfam', u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 541894460343582720, u'favorite_count': 1, u'source': u'Twitter for Android', u'retweeted': False, u'coordinates': {u'type': u'Point', u'coordinates': [5.207323, 52.0616799]}, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': []}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 0, u'id_str': u'541894460343582720', u'favorited': False, u'geo': {u'type': u'Point', u'coordinates': [52.0616799, 5.207323]}, u'in_reply_to_user_id_str': None, u'lang': u'nl', u'created_at': u'Mon Dec 08 09:58:01 +0000 2014', u'in_reply_to_status_id_str': None, u'place': {u'full_name': u'Bunnik, Nederland', u'url': u'https://api.twitter.com/1.1/geo/id/ef77325fbde0f5ad.json', u'country': u'The Netherlands', u'place_type': u'city', u'bounding_box': {u'type': u'Polygon', u'coordinates': [[[5.1532516, 51.9976555], [5.2803233, 51.9976555], [5.2803233, 52.0801935], [5.1532516, 52.0801935]]]}, u'contained_within': [], u'country_code': u'NL', u'attributes': {}, u'id': u'ef77325fbde0f5ad', u'name': u'Bunnik'}}, u'is_translation_enabled': False, u'utc_offset': None, u'statuses_count': 186, u'description': u'', u'friends_count': 7, u'location': u'', u'profile_link_color': u'1DA1F2', u'profile_image_url': u'http://pbs.twimg.com/profile_images/1425063736/image_normal.jpg', u'following': False, u'geo_enabled': True, u'profile_background_image_url': u'http://abs.twimg.com/images/themes/theme1/bg.png', u'screen_name': u'ehoeven', u'lang': u'en', u'profile_background_tile': False, u'favourites_count': 1, u'name': u'Erik Hoeven', u'notifications': False, u'url': None, u'created_at': u'Thu Jul 22 14:12:09 +0000 2010', u'contributors_enabled': False, u'time_zone': None, u'protected': False, u'default_profile': True, u'is_translator': False}, time_zone=None, id=169505005, description=u'', _api=<tweepy.api.API object at 0x7efdf2d5a510>, verified=False, profile_text_color=u'333333', profile_image_url_https=u'https://pbs.twimg.com/profile_images/1425063736/image_normal.jpg', profile_sidebar_fill_color=u'DDEEF6', is_translator=False, geo_enabled=True, entities={u'description': {u'urls': []}}, followers_count=7, protected=False, id_str=u'169505005', default_profile_image=False, listed_count=0, status=Status(contributors=None, truncated=False, text=u'aan het werk bij Alfam', is_quote_status=False, in_reply_to_status_id=None, id=541894460343582720, favorite_count=1, _api=<tweepy.api.API object at 0x7efdf2d5a510>, source=u'Twitter for Android', _json={u'contributors': None, u'truncated': False, u'text': u'aan het werk bij Alfam', u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 541894460343582720, u'favorite_count': 1, u'source': u'Twitter for Android', u'retweeted': False, u'coordinates': {u'type': u'Point', u'coordinates': [5.207323, 52.0616799]}, u'entities': {u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': []}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 0, u'id_str': u'541894460343582720', u'favorited': False, u'geo': {u'type': u'Point', u'coordinates': [52.0616799, 5.207323]}, u'in_reply_to_user_id_str': None, u'lang': u'nl', u'created_at': u'Mon Dec 08 09:58:01 +0000 2014', u'in_reply_to_status_id_str': None, u'place': {u'full_name': u'Bunnik, Nederland', u'url': u'https://api.twitter.com/1.1/geo/id/ef77325fbde0f5ad.json', u'country': u'The Netherlands', u'place_type': u'city', u'bounding_box': {u'type': u'Polygon', u'coordinates': [[[5.1532516, 51.9976555], [5.2803233, 51.9976555], [5.2803233, 52.0801935], [5.1532516, 52.0801935]]]}, u'contained_within': [], u'country_code': u'NL', u'attributes': {}, u'id': u'ef77325fbde0f5ad', u'name': u'Bunnik'}}, coordinates={u'type': u'Point', u'coordinates': [5.207323, 52.0616799]}, entities={u'symbols': [], u'user_mentions': [], u'hashtags': [], u'urls': []}, in_reply_to_screen_name=None, id_str=u'541894460343582720', retweet_count=0, in_reply_to_user_id=None, favorited=False, source_url=u'http://twitter.com/download/android', geo={u'type': u'Point', u'coordinates': [52.0616799, 5.207323]}, in_reply_to_user_id_str=None, lang=u'nl', created_at=datetime.datetime(2014, 12, 8, 9, 58, 1), in_reply_to_status_id_str=None, place=Place(_api=<tweepy.api.API object at 0x7efdf2d5a510>, country_code=u'NL', url=u'https://api.twitter.com/1.1/geo/id/ef77325fbde0f5ad.json', country=u'The Netherlands', place_type=u'city', bounding_box=BoundingBox(_api=<tweepy.api.API object at 0x7efdf2d5a510>, type=u'Polygon', coordinates=[[[5.1532516, 51.9976555], [5.2803233, 51.9976555], [5.2803233, 52.0801935], [5.1532516, 52.0801935]]]), contained_within=[], full_name=u'Bunnik, Nederland', attributes={}, id=u'ef77325fbde0f5ad', name=u'Bunnik'), retweeted=False), lang=u'en', utc_offset=None, statuses_count=186, profile_background_color=u'C0DEED', friends_count=7, profile_link_color=u'1DA1F2', profile_image_url=u'http://pbs.twimg.com/profile_images/1425063736/image_normal.jpg', notifications=False, default_profile=True, profile_background_image_url_https=u'https://abs.twimg.com/images/themes/theme1/bg.png', profile_background_image_url=u'http://abs.twimg.com/images/themes/theme1/bg.png', name=u'Erik Hoeven', is_translation_enabled=False, profile_background_tile=False, favourites_count=1, screen_name=u'ehoeven', url=None, created_at=datetime.datetime(2010, 7, 22, 14, 12, 9), contributors_enabled=False, location=u'', profile_sidebar_border_color=u'C0DEED', translator_type=u'none', following=False)

Try this syntax:
twitterStream.filter(track=['christmas'], languages=['nl'])

Related

How do I flat a Nested Json file in Python?

I have a nested Json file with arrays.
I want to flat it so there won't be nested Jsons.
For example:
Code for Json:
https://jsonblob.com/4b255e51-7e9f-11e8-b89c-37203846213e
This Json has sub-Json and also array that contains Json.
The source is:
Output should be:
If there are arrays that contains a single Json they can be ignored. But if they have also sub-json they should be treated as above. Basically from my point of view each array is like a separated Json file.
I know that flating the Json can be done as:
from pandas.io.json import json_normalize
json_normalize(sample_object)
But this won't work with arrays.
Any idea how to make this work?
EDIT:
This is how arrays should be handled:
source:
Output:
Which means first Json in array stays as is {0}, {1} etc... but the sub-jsons are flatted. There are no columns of attributes_0_value ! Basically convert it to array with a single Json. No nesting (unless there is another array).
Try using this:
import pandas as pd
import json
response = {u'total': 1245, u'limit': 2, u'results': [{u'customer': {u'lastName': u'rtyrtyrt', u'userAccountId': None, u'id': 637, u'firstName': u'rtyrtyrty', u'email': u'ddfgdfg#dfsdfgdfg.ggg'}, u'shippingAddress': {u'city': u'rtyrtyrtyrty', u'vatNumber': None, u'firstName': u'rtyrtyrty', u'companyName': None, u'country': {u'defaultCulture': {u'languageName': u'English', u'code': u'en-GB', u'id': 2, u'name': u'English'}, u'onlineStoreActive': True, u'currency': {u'symbol': u'\xa3', u'code': u'GBP', u'id': 2, u'currencyCulture': u'en-GB', u'numericCode': 826}, u'locale': None, u'isO2LetterCode': u'GB', u'vatPercentage': 20.0, u'continent': u'Europe ', u'isoNumericCode': u'826', u'invariantName': u'UNITED KINGDOM', u'id': 2, u'isO3LetterCode': u'GBR'}, u'stateProvince': None, u'lastName': u'rtyrtyrt', u'zipCode': u'5464556', u'email': u'ddfgdfg#dfsdfgdfg.ggg', u'addressLine2': None, u'addressLine1': u'tyrtyrty', u'phoneNumber': u'45644443456456546', u'addressName': None, u'id': 861}, u'orderDateUtc': u'0001-01-01 00:00', u'shoppingCardId': 0, u'paymentType': {u'code': u'SafeCharge', u'invariantName': u'Credit Card', u'id': 50}, u'orderNumber': u'0100000845', u'giftMessage': u'', u'storeId': 1, u'shippingService': {u'deletedOn': None, u'code': u'ROYALSTD', u'courier': None, u'updatedOn': u'2018-01-24 09:23', u'locale': None, u'createdOn': u'2018-01-24 09:23', u'storeId': 1, u'sortOrder': 1, u'invariantName': u'Royal Mail Standard', u'id': 1}, u'referenceOrderNumber': u'', u'totals': {u'shippingChargesNet': 3.95, u'orderLevelDiscount': 0.0, u'grandTotal': 8.95, u'vatPercentage': 20.0, u'shippingChargesDiscount': 0.0, u'shippingCharges': 3.95, u'units': 1, u'salesTaxPerc': 0.0, u'subTotal': 5.0, u'salesTax': 1.4916666666666667}, u'currency': {u'symbol': u'\xa3', u'code': u'GBP', u'id': 2, u'currencyCulture': u'en-GB', u'numericCode': 826}, u'status': {u'invariantName': u'Waiting PackingList', u'id': 4, u'name': None}, u'billingAddress': {u'city': u'rtyrtyrtyrty', u'vatNumber': None, u'firstName': u'rtyrtyrty', u'companyName': None, u'country': {u'defaultCulture': {u'languageName': u'English', u'code': u'en-GB', u'id': 2, u'name': u'English'}, u'onlineStoreActive': True, u'currency': {u'symbol': u'\xa3', u'code': u'GBP', u'id': 2, u'currencyCulture': u'en-GB', u'numericCode': 826}, u'locale': None, u'isO2LetterCode': u'GB', u'vatPercentage': 20.0, u'continent': u'Europe ', u'isoNumericCode': u'826', u'invariantName': u'UNITED KINGDOM', u'id': 2, u'isO3LetterCode': u'GBR'}, u'stateProvince': None, u'lastName': u'rtyrtyrt', u'zipCode': u'5464556', u'email': u'ddfgdfg#dfsdfgdfg.ggg', u'addressLine2': None, u'addressLine1': u'tyrtyrty', u'phoneNumber': u'456456456546', u'addressName': None, u'id': 861}, u'items': [{u'orderId': 844, u'discountEach': 0.0, u'cancellationId': 0, u'orderedQty': 1, u'giftMessage': u'', u'orderLevelDiscountEach': 0.0, u'historicalCategories': [], u'giftFrom': u'', u'netShippingChargesEach': 3.95, u'promotionItemIds': [], u'variantId': 11282, u'attributes': [{u'value': u'', u'key': u'ProductSeason'}], u'priceEach': 5.0, u'isGift': False, u'id': 939, u'giftTo': u''}], u'attributes': [{u'value': u'2', u'key': u'CustomerCultureId'}, {u'value': u'185.13.248.67', u'key': u'IpAddress'}, {u'value': u'UA', u'key': u'IpCountryCode'}, {u'value': u'OLS', u'key': u'OrderSource'}, {u'value': u'111790', u'key': u'SafeCharge_AuthCode'}, {u'value': u'UQBzAGQAaAB3ADgAMgB0AE4AagBHADUAegBpAHMAIwA7AC4ANgA3AFEAXwBMAGAAKwAqAHIAVgBGAEcAKQBFAD0ASQA8AC4ATgA0AD8ANQA+AFAAMwA=', u'key': u'SafeCharge_Token'}, {u'value': u'1512424599', u'key': u'SafeCharge_TransactionId'}, {u'value': u'1', u'key': u'StoreId'}], u'isGift': False, u'id': 844}, {u'customer': {u'lastName': u'dfgdfg', u'userAccountId': None, u'id': 638, u'firstName': u'dfgdfg', u'email': u'hfghfgh#dfdfg.fdg'}, u'shippingAddress': {u'city': u'fghfghhf', u'vatNumber': None, u'firstName': u'dfgdfg', u'companyName': None, u'country': {u'defaultCulture': {u'languageName': u'English', u'code': u'en-GB', u'id': 2, u'name': u'English'}, u'onlineStoreActive': True, u'currency': {u'symbol': u'\xa3', u'code': u'GBP', u'id': 2, u'currencyCulture': u'en-GB', u'numericCode': 826}, u'locale': None, u'isO2LetterCode': u'GB', u'vatPercentage': 20.0, u'continent': u'Europe ', u'isoNumericCode': u'826', u'invariantName': u'UNITED KINGDOM', u'id': 2, u'isO3LetterCode': u'GBR'}, u'stateProvince': None, u'lastName': u'dfgdfg', u'zipCode': u'4564566', u'email': u'hfghfgh#dfdfg.fdg', u'addressLine2': None, u'addressLine1': u'fghfghfgh', u'phoneNumber': u'567567567', u'addressName': None, u'id': 862}, u'orderDateUtc': u'0001-01-01 00:00', u'shoppingCardId': 0, u'paymentType': {u'code': u'SafeCharge', u'invariantName': u'Credit Card', u'id': 50}, u'orderNumber': u'0100000846', u'giftMessage': u'', u'storeId': 1, u'shippingService': {u'deletedOn': None, u'code': u'ROYALSTD', u'courier': None, u'updatedOn': u'2018-01-24 09:23', u'locale': None, u'createdOn': u'2018-01-24 09:23', u'storeId': 1, u'sortOrder': 1, u'invariantName': u'Royal Mail Standard', u'id': 1}, u'referenceOrderNumber': u'', u'totals': {u'shippingChargesNet': 3.95, u'orderLevelDiscount': 0.0, u'grandTotal': 8.95, u'vatPercentage': 20.0, u'shippingChargesDiscount': 0.0, u'shippingCharges': 3.95, u'units': 1, u'salesTaxPerc': 0.0, u'subTotal': 5.0, u'salesTax': 1.4916666666666667}, u'currency': {u'symbol': u'\xa3', u'code': u'GBP', u'id': 2, u'currencyCulture': u'en-GB', u'numericCode': 826}, u'status': {u'invariantName': u'Shipped', u'id': 6, u'name': None}, u'billingAddress': {u'city': u'fghfghhf', u'vatNumber': None, u'firstName': u'dfgdfg', u'companyName': None, u'country': {u'defaultCulture': {u'languageName': u'English', u'code': u'en-GB', u'id': 2, u'name': u'English'}, u'onlineStoreActive': True, u'currency': {u'symbol': u'\xa3', u'code': u'GBP', u'id': 2, u'currencyCulture': u'en-GB', u'numericCode': 826}, u'locale': None, u'isO2LetterCode': u'GB', u'vatPercentage': 20.0, u'continent': u'Europe ', u'isoNumericCode': u'826', u'invariantName': u'UNITED KINGDOM', u'id': 2, u'isO3LetterCode': u'GBR'}, u'stateProvince': None, u'lastName': u'dfgdfg', u'zipCode': u'4563334566', u'email': u'hfghfgh#dfdfg.fdg', u'addressLine2': None, u'addressLine1': u'fghfghfgh', u'phoneNumber': u'567567567', u'addressName': None, u'id': 862}, u'items': [{u'orderId': 845, u'discountEach': 0.0, u'cancellationId': 0, u'orderedQty': 1, u'giftMessage': u'', u'orderLevelDiscountEach': 0.0, u'historicalCategories': [], u'giftFrom': u'', u'netShippingChargesEach': 3.95, u'promotionItemIds': [], u'variantId': 11282, u'attributes': [{u'value': u'', u'key': u'ProductSeason'}], u'priceEach': 5.0, u'isGift': False, u'id': 940, u'giftTo': u''}], u'attributes': [{u'value': u'2', u'key': u'CustomerCultureId'}, {u'value': u'115.11.118.67', u'key': u'IpAddress'}, {u'value': u'UA', u'key': u'IpCountryCode'}, {u'value': u'OLS', u'key': u'OrderSource'}, {u'value': u'111335', u'key': u'SafeCharge_AuthCode'}, {u'value': u'UQA1AEYASgBVAEgAcgBvAE8AWAAlAFMAaABcAGAAMwA0AG4ATABiAHAAcQBoAEkAawB6AHMANQBXAEgAUQApACQATwBpAEQAUABAAGcAKwBcADQAMwA=', u'key': u'SafeCharge_Token'}, {u'value': u'1512424624', u'key': u'SafeCharge_TransactionId'}], u'isGift': False, u'id': 845}], u'offset': 0}
sample_object = pd.DataFrame(response)['results'].to_dict()
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
out[name[:-1]] = x
else:
out[name[:-1]] = x
flatten(y)
return out
flat = {k: flatten_json(v) for k, v in sample_object.items()}
with open('flat.json', 'w') as jsonfile:
jsonfile.write(json.dumps(flat))
I personally use this procedure.
First store the JSON data as string (or, load from url or file)
use nested_to_record() method from pandas
import json
from pandas.io.json.normalize import nested_to_record
json_dic = json.loads(json_str)
flat = nested_to_record(json_dic, sep='_')
for key in flat:
print key, flat[key]
Output:

Twitter API search returns truncated tweets

I'm trying to build a python program to get tweets based around a certain keyword. However, while I can successfully retrieve tweets, they come back truncated.
How can I get the full text of a tweet?
CODE: (Sample output below code)
(python-twitter module)
import twitter
api = twitter.Api(consumer_key=CONSUMER_KEY,
consumer_secret=CONSUMER_SECRET,
access_token_key=ACCESS_TOKEN,
access_token_secret=ACCESS_SECRET)
results = api.GetSearch(term="car", since="2018-04-11", until="2018-04-12", count=5)
for twt in results:
tempTweet = (str(twt))
tweet = json.loads(tempTweet)
for key in tweet:
print(str(key) + ": " + str(tweet[key]))
print("#############################################")
SAMPLE OUTPUT:
created_at: Wed Apr 11 20:55:25 +0000 2018
favorite_count: 1573
hashtags: []
id: 984173096566341632
id_str: 984173096566341632
lang: en
retweet_count: 1480
source: TweetDeck
**text**: Caution: Disturbing video. Car speeds through red light, striking pedestrian during vigil Wednesday for cyclist kil… **SHORTENEDURLHERE**
truncated: True
urls: [{'expanded_url':'https://twitter.com/i/web/status/984173096566341632', 'url':**SHORTENEDURLHERE**}]
user: {'created_at': 'Wed Nov 14 17:43:42 +0000 2007', 'description': 'KTLA has been keeping Southern California informed since 1947. \n\nHave great video, photos or story tips? Share with us using #ktla.', 'favourites_count': 1078, 'followers_count': 717397, 'friends_count': 769, 'geo_enabled': True, 'id': 10252962, 'id_str': '10252962', 'lang': 'en', 'listed_count': 3885, 'location': 'Los Angeles, CA', 'name': 'KTLA', 'profile_background_color': '040718', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/507323957578436608/olqcU4MS.jpeg', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/507323957578436608/olqcU4MS.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/10252962/1369959990', 'profile_image_url': 'http://pbs.twimg.com/profile_images/809849913240481792/YQ0aT9hv_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/809849913240481792/YQ0aT9hv_normal.jpg', 'profile_link_color': '24009C', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': '95E8EC', 'profile_text_color': '3C3940', 'profile_use_background_image': True, 'screen_name': 'KTLA', 'statuses_count': 144937, 'time_zone': 'Pacific Time (US & Canada)', 'url': '**SHORTENEDURLHERE**', 'utc_offset': -25200, 'verified': True}
user_mentions: []
#############################################
You just need to pass tweet_mode='extended' while initialising Api.
import twitter
api = twitter.Api(consumer_key=CONSUMER_KEY,
consumer_secret=CONSUMER_SECRET,
access_token_key=ACCESS_TOKEN,
access_token_secret=ACCESS_SECRET,
tweet_mode='extended')
results = api.GetSearch(term="car", since="2018-04-11", until="2018-04-12", count=5)
for twt in results:
tempTweet = (str(twt))
tweet = json.loads(tempTweet)
print(tweet)
This will print,
{u'lang': u'en', u'full_text': u'Have you ever been in so much trouble that you\u2019ve narrowed your options down to a. Winning the lottery b. Wrapping your car around a telephone pole and c. Giving the creepy neighborhood millionaire the date he keeps pestering for at Cheescake Factory? \nPffffffttt. Me either. <twitter link>', u'media': [{u'expanded_url': u'https://twitter.com/_jkate/status/984219542061703168/photo/1', u'sizes': {u'large': {u'h': 1280, u'w': 719, u'resize': u'fit'}, u'small': {u'h': 680, u'w': 382, u'resize': u'fit'}, u'medium': {u'h': 1200, u'w': 674, u'resize': u'fit'}, u'thumb': {u'h': 150, u'w': 150, u'resize': u'crop'}}, u'url': u'<twitter link>', u'media_url_https': u'https://pbs.twimg.com/media/Daimc02VQAAulWe.jpg', u'display_url': u'pic.twitter.com/ZfCeeZN4g0', u'type': u'photo', u'id': 984219532733530112, u'media_url': u'http://pbs.twimg.com/media/Daimc02VQAAulWe.jpg'}], u'created_at': u'Wed Apr 11 23:59:59 +0000 2018', u'hashtags': [], u'user_mentions': [], u'source': u'Twitter for iPhone', u'id_str': u'984219542061703168', u'urls': [], u'retweet_count': 2, u'id': 984219542061703168, u'favorite_count': 83, u'user': {u'profile_use_background_image': True, u'id': 492519212, u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/985737787306422273/aJykLNLj_normal.jpg', u'profile_sidebar_fill_color': u'F3F3F3', u'profile_text_color': u'333333', u'followers_count': 13069, u'location': u'United States', u'profile_background_color': u'EBEBEB', u'id_str': u'492519212', u'utc_offset': -21600, u'statuses_count': 1543, u'description': u'Illegitimate love child of digital marketing and \u2615\ufe0f. Instagram: <twitter link>', u'friends_count': 11470, u'profile_link_color': u'990000', u'profile_image_url': u'http://pbs.twimg.com/profile_images/985737787306422273/aJykLNLj_normal.jpg', u'profile_background_image_url_https': u'https://abs.twimg.com/images/themes/theme7/bg.gif', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/492519212/1509938057', u'profile_background_image_url': u'http://abs.twimg.com/images/themes/theme7/bg.gif', u'screen_name': u'_jkate', u'lang': u'en', u'favourites_count': 8060, u'name': u'\U0001f319J Kate \U0001f4ab', u'created_at': u'Tue Feb 14 20:23:54 +0000 2012', u'time_zone': u'Mountain Time (US & Canada)', u'profile_sidebar_border_color': u'DFDFDF', u'listed_count': 55}}

Python - Parsing Json file and getting multiple values from dictionaries in list

json file and I successfully parsed it as you can see below. What I want is Get the Id Of ['Users'] and get ['Photos'] ['Url'] part for the related Id.
My .json output
{u'Success': True,
u'Total': 172159,
u'Users': [{u'AboutMe': u'U\xe7mak i\xe7in ku\u015f olmak gerekmiyor, k\xfc\xe7\xfck sevin\xe7ler olsun yeter.',
u'Age': 34,
u'Education': None,
u'EyeColor': u'Mavi',
u'Gender': 2,
u'HairColor': u'A\xe7\u0131k kahve',
u'Height': 183,
u'Id': u'19185978',
u'IsHot': False,
u'IsOnline': True,
u'Job': u'Serbest meslek',
u'JobId': None,
u'LastActivityDate': u'2018-03-07T03:43:50.53855Z',
u'Location': u'\u0130zmir - Merkez',
u'LookingFor': None,
u'MaritalStatus': u'Single',
u'MaritalStatusId': None,
u'Photo': None,
u'Photos': [{u'CreateDate': u'0001-01-01T00:00:00',
u'Id': None,
u'PhotoName': None,
u'State': None,
u'Url': u'https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/a2f/a2fe1237-e0e1-4456-bd7b-b1d55bc8f00e.jpg.jpg'},
{u'CreateDate': u'0001-01-01T00:00:00',
u'Id': None,
u'PhotoName': None,
u'State': None,
u'Url': u'https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/87f/87fba6a5-8555-4b53-968b-678f832fd28f.jpg.jpg'},
{u'CreateDate': u'0001-01-01T00:00:00',
u'Id': None,
u'PhotoName': None,
u'State': None,
u'Url': u'https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/18d/18d3d6bc-97ec-49c3-80d4-57d4e58d020f.jpg.jpg'},
{u'CreateDate': u'0001-01-01T00:00:00',
u'Id': None,
u'PhotoName': None,
u'State': None,
u'Url': u'https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/eba/eba25a06-4168-49cf-b0cb-e501d0efb965.jpg.jpg'}],
u'RelationshipType': u'E-Posta Arkada\u015fl\u0131\u011f\u0131, , , ',
u'StatusMessage': u'Siz istiyorsunuz ki her \u015fey benim istedi\u011fim gibi olsun, herkes pe\u015fimden ko\u015fsun. Ama her zaman \xf6yle olmuyor.',
u'TownName': None,
u'Username': u'45ahmet35',
u'Weight': 85,
u'Zodiac': u'Ko\xe7',
u'ZodiacId': None},
{u'AboutMe': None,
u'Age': 42,
u'Education': None,
u'EyeColor': u'Kahverengi',
u'Gender': 2,
u'HairColor': u'K\u0131rla\u015fm\u0131\u015f',
u'Height': 175,
u'Id': u'19274893',
u'IsHot': False,
u'IsOnline': True,
u'Job': u'',
u'JobId': None,
u'LastActivityDate': u'2018-03-07T03:43:24.555Z',
u'Location': u'\u0130zmir - Alia\u011fa',
u'LookingFor': None,
u'MaritalStatus': u'Single',
u'MaritalStatusId': None,
u'Photo': None,
u'Photos': [{u'CreateDate': u'0001-01-01T00:00:00',
u'Id': None,
u'PhotoName': None,
u'State': None,
u'Url': u'https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/bf0/bf0fbad6-076a-4924-b496-5385044c08bc.jpg.jpg'},
{u'CreateDate': u'0001-01-01T00:00:00',
u'Id': None,
u'PhotoName': None,
u'State': None,
u'Url': u'https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/998/99893a8e-5342-441f-bd83-82fa20bdb27a.jpg.jpg'},
],
u'RelationshipType': u'',
u'StatusMessage': u'Yok ',
u'TownName': None,
u'Username': u'kaya3510',
u'Weight': 80,
u'Zodiac': u'Ko\xe7',
u'ZodiacId': None},
And My Python Code
import json
json_obj = json.load(open("13.json"))
for i in json_obj['Users']:
print i['Id']
print i['Photos']['Url']
And the Error I get.
19185978
Traceback (most recent call last):
File "/root/Desktop/siberAlem/parser.py", line 7, in
print i['Photos']['Url']
TypeError: list indices must be integers, not str
Thanks in advance.
This should help:
for i in json_obj['Users']:
print i["Id"]
for j in i["Photos"]:
print j["Url"]
Output:
19185978
https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/a2f/a2fe1237-e0e1-4456-bd7b-b1d55bc8f00e.jpg.jpg
https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/87f/87fba6a5-8555-4b53-968b-678f832fd28f.jpg.jpg
https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/18d/18d3d6bc-97ec-49c3-80d4-57d4e58d020f.jpg.jpg
https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/eba/eba25a06-4168-49cf-b0cb-e501d0efb965.jpg.jpg
19274893
https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/bf0/bf0fbad6-076a-4924-b496-5385044c08bc.jpg.jpg
https://diymyqt2ncnnc.cloudfront.net/s3/traktorumnetphotos/998/99893a8e-5342-441f-bd83-82fa20bdb27a.jpg.jpg

How to make grep using Python

Pls help me.
I have some API command result:
[{u'task': {u'url': u'http://192.168.1.1/job/f1111111111/', u'color': u'aborted', u'name': u'f1111111111'}, u'stuck': False, u'url': u'queue/item/37/', u'inQueueSince': 1397554800875L, u'actions': [{u'causes': [{u'userName': u'admin', u'userId': u'admin', u'shortDescription': u'Started by user admin'}]}], u'why': u'Waiting for next available executor on NODE_1', u'buildable': True, u'params': u'', u'buildableStartMilliseconds': 1397554800878L, u'id': 37, u'pending': False, u'blocked': False}, {u'task': {u'url': u'http://192.168.1.1/job/1234/', u'color': u'aborted', u'name': u'1234'}, u'stuck': False, u'url': u'queue/item/36/', u'inQueueSince': 1397554797741L, u'actions': [{u'causes': [{u'userName': u'admin', u'userId': u'admin', u'shortDescription': u'Started by user admin'}]}], u'why': u'Waiting for next available executor on NODE_1', u'buildable': True, u'params': u'', u'buildableStartMilliseconds': 1397554797744L, u'id': 36, u'pending': False, u'blocked': False}]
How can I make grep using PYthon to have the next output:
u'name': u'f1111111111'
u'name': u'1234
Normaly you use grep (or regular expression) only as last resort to get structure out of a string literal. In your case you already got a kind of structured result, thus you should be able to iterate over it. (= much faster parsing)
for row in result: #iterate over the result list
# do something with the row
print row["task"]["name"] #access particular key
The ideal would be to convert the result into a custom dictionary alike structure, where you can access the result properties directly e.g. row.task.name. How to get there is nicely explained here
You'd better loop through your string like this:
>>> s=[{u'task': {u'url': u'http://192.168.1.1/job/f1111111111/', u'color': u'aborted', u'name': u'f1111111111'}, u'stuck': False, u'url': u'queue/item/37/', u'inQueueSince': 1397554800875L, u'actions': [{u'causes': [{u'userName': u'admin', u'userId': u'admin', u'shortDescription': u'Started by user admin'}]}], u'why': u'Waiting for next available executor on NODE_1', u'buildable': True, u'params': u'', u'buildableStartMilliseconds': 1397554800878L, u'id': 37, u'pending': False, u'blocked': False}, {u'task': {u'url': u'http://192.168.1.1/job/1234/', u'color': u'aborted', u'name': u'1234'}, u'stuck': False, u'url': u'queue/item/36/', u'inQueueSince': 1397554797741L, u'actions': [{u'causes': [{u'userName': u'admin', u'userId': u'admin', u'shortDescription': u'Started by user admin'}]}], u'why': u'Waiting for next available executor on NODE_1', u'buildable': True, u'params': u'', u'buildableStartMilliseconds': 1397554797744L, u'id': 36, u'pending': False, u'blocked': False}]
>>> for i in s:
... print i['task']['name']
...
f1111111111
1234
You can store the information in a list:
>>> l=[]
>>> for i in s:
... l.append(i['task']['name'])
...
>>>
>>> print l
[u'f1111111111', u'1234']

Why do I get a pymongo.cursor.Cursor when trying to query my mongodb db via pymongo?

I have consumed a bunch of tweets in a mongodb database. I would like to query these tweets using pymongo. For example, I would like to query for screen_name. However, when I try to do this, python does not return a tweet but a message about pymongo.cursor.Cursor. Here is my code:
import sys
import pymongo
from pymongo import Connection
connection = Connection()
db = connection.test
tweets = db.tweets
list(tweets.find())[:1]
I get a JSON, which looks like this:
{u'_id': ObjectId('51c8878fadb68a0b96c6ebf1'),
u'contributors': None,
u'coordinates': {u'coordinates': [-75.24692983, 43.06183036],
u'type': u'Point'},
u'created_at': u'Mon Jun 24 17:53:19 +0000 2013',
u'entities': {u'hashtags': [],
u'symbols': [],
u'urls': [],
u'user_mentions': []},
u'favorite_count': 0,
u'favorited': False,
u'filter_level': u'medium',
u'geo': {u'coordinates': [43.06183036, -75.24692983], u'type': u'Point'},
u'id': 349223725943623680L,
u'id_str': u'349223725943623680',
u'in_reply_to_screen_name': None,
u'in_reply_to_status_id': None,
u'in_reply_to_status_id_str': None,
u'in_reply_to_user_id': None,
u'in_reply_to_user_id_str': None,
u'lang': u'en',
u'place': {u'attributes': {},
u'bounding_box': {u'coordinates': [[[-79.76259, 40.477399],
[-79.76259, 45.015865],
[-71.777491, 45.015865],
[-71.777491, 40.477399]]],
u'type': u'Polygon'},
u'country': u'United States',
u'country_code': u'US',
u'full_name': u'New York, US',
u'id': u'94965b2c45386f87',
u'name': u'New York',
u'place_type': u'admin',
u'url': u'http://api.twitter.com/1/geo/id/94965b2c45386f87.json'},
u'retweet_count': 0,
u'retweeted': False,
u'source': u'Twitter for iPhone',
u'text': u'Currently having a heat stroke',
u'truncated': False,
u'user': {u'contributors_enabled': False,
u'created_at': u'Fri Oct 28 02:04:05 +0000 2011',
u'default_profile': False,
u'default_profile_image': False,
u'description': u'young and so mischievious',
u'favourites_count': 1798,
u'follow_request_sent': None,
u'followers_count': 368,
u'following': None,
u'friends_count': 335,
u'geo_enabled': True,
u'id': 399801173,
u'id_str': u'399801173',
u'is_translator': False,
u'lang': u'en',
u'listed_count': 0,
u'location': u'Upstate New York',
u'name': u'Joe Catanzarita',
u'notifications': None,
u'profile_background_color': u'D6640D',
u'profile_background_image_url': u'http://a0.twimg.com/profile_background_images/702001815/f87508e73bbfab8c8c85ebe10b29fcf6.png',
u'profile_background_image_url_https': u'https://si0.twimg.com/profile_background_images/702001815/f87508e73bbfab8c8c85ebe10b29fcf6.png',
u'profile_background_tile': True,
u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/399801173/1367200323',
u'profile_image_url': u'http://a0.twimg.com/profile_images/378800000012256721/d8b5f801fb331de6ead4aed42dc77a46_normal.jpeg',
u'profile_image_url_https': u'https://si0.twimg.com/profile_images/378800000012256721/d8b5f801fb331de6ead4aed42dc77a46_normal.jpeg' ,
u'profile_link_color': u'140DE0',
u'profile_sidebar_border_color': u'FFFFFF',
u'profile_sidebar_fill_color': u'E0F5A6',
u'profile_text_color': u'120212',
u'profile_use_background_image': True,
u'protected': False,
u'screen_name': u'JoeCatanzarita',
u'statuses_count': 6402,
u'time_zone': u'Quito',
u'url': None,
u'utc_offset': -18000,
u'verified': False}}
However, when I try to query for this screen_name, I get:
tweets.find({"screen_name": "JoeCatanzarita"})
<pymongo.cursor.Cursor at 0x52c02f0>
And when I then try to count the number of tweets which have "screen_name": "name", I get:
tweets.find({"screen_name": "name"}).count()
0
Any idea what I am doing wrong/how I can get pymongo to return the tweets I am looking for?
Thanks!
PyMongo's find() method returns a Cursor. To actually execute the query on the server and retrieve results, iterate the cursor with list or a for loop:
for doc in tweets.find({'screen_name': 'name'}):
print(doc)
# Or:
docs = list(tweets.find({'screen_name': 'name'}))
If tweets.find({"screen_name": "name"}).count() returns 0, it means no documents match your query.
Edit: now that you've posted an example document, I see you want to query like:
list(tweets.find({'user.screen_name': 'name'}))
... since the screen_name field is embedded in the user sub-document.
I think the problem is that "screen_name" is inside a sub-document if you can provide the document structure I may be able to help you.
Ok now I see what's your problem:
If you look carefully into your document you will notice that "screen_name" is inside the subdocument user, so if you want to acess it all you have to do is the following:
tweets.find({"user.screen_name": "JoeCatanzarita"}) #for example.
Whenever you are in a situation where the element you are trying to find is inside a subdocument like in this situation or inside an array always use this syntax.
I had this same problem with a collection.find() call.
I checked the type of the object and it is python dict. so I took the dict and iterated through it even though there was only one item and she's working like a charm.
myResult = db.find({}, {<!-- blah blah blah for the fields you want -->}).sort({"_id":1}).limit(1)
for item in myResult:
print item
I know this was ages ago but I spent some time surfing this and couldn't find an easy explanation.
Hope this helps.

Categories

Resources