IndentationError with Python - python

I am currently trying to stream tweets for a project using Python, Elasticsearch and Kibana.
While running my Python script, I have an IndentationError and I don't understand why, can anyone help me through this problem ?
Thanks in advance.
My Python script :
import json
import tweepy
import textblob
import elasticsearch
from tweepy import OAuthHandler, Stream
from tweepy.streaming import StreamListener
from textblob import TextBlob
from elasticsearch import Elasticsearch
consumer_key = '...'
consumer_secret = '...'
access_token = '...'
access_token_secret = '...'
elastic_search = Elasticsearch()
class MyStreamListener(StreamListener):
def on_data(self, data):
dict_data = json.loads(data)
tweet = TextBlob(dict_data["text"])
print(tweet.sentiment.polarity)
if tweet.sentiment.polarity < 0:
sentiment = "negative"
elif tweet.sentiment.polarity == 0:
sentiment = "neutral"
else:
sentiment = "positive"
print(sentiment)
elastic_search.index(index="sentiment",
doc_type="test-type",
body={"author": dict_data["user"]["screen_name"],
"date": dict_data["created_at"],
"message": dict_data["text"],
"polarity": tweet.sentiment.polarity,
"subjectivity": tweet.sentiment.subjectivity,
"sentiment": sentiment})
return True
def on_failure(self, status):
print(status)
if __name__ == '__main__':
listener = MyStreamListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, listener)
stream.filter(track=['congress'])
# user_choice = input("Please choose a Hashtag... : ")
# retrieve_tweets = api.search(user_choice)
The error message :
File "sentiment.py", line 21
tweet = TextBlob(dict_data["text"])
^
IndentationError: unindent does not match any outer indentation level

You do have tabs there.
def on_data(self, data):
dict_data = json.loads(data)
# ^ tab and 4 spaces here
tweet = TextBlob(dict_data["text"])
# ^ 8 spaces here
print(tweet.sentiment.polarity)
# ^ ^ two tabs here (equal 16 spaces)
Note that the representation in SO site translates the tabs to spaces, but if you copy the source into a code editor, it reveals the tabs:

Related

how to get specific data key in json response from api twitter in python

I am trying to get attributes of only "id" and "text" in the result of response API Twitter. But the below code result all key attribute. How to get the data "id" and "text"?
from tweepy import Stream
from tweepy.streaming import StreamListener
class StdOutListener(StreamListener):
def on_data(self, data):
try:
with open('python2.json', 'a') as f:
f.write(data)
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
def on_error(self, status_code):
if status_code == 420:
print(status_code)
return False
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
hasil = stream.filter(track=['wedding','sunday'])
the result :
{"created_at":"Sun Apr 04 17:16:28,
"id":1378758380722946049,
"id_str":"1378758380722946049",
"text":"Nonton wedding atta aurel jadi pen nikah",
"source":"Twitter Web",
"truncated":false,
"in_reply_to_status_id":null,
"in_reply_to_status_id_str":null
}
the expected result just id and text
{
"id":1378758380722946049,
"text":"Nonton wedding atta aurel jadi pen nikah"
}
It looks like you want to only write a JSON containing the id and text fields, so this modified version of your code will correctly take both those fields out of jsonData, create it's own new JSON only containing id and text (newJSON) and then write it to your file.
import json
from tweepy import Stream
from tweepy.streaming import StreamListener
class StdOutListener(StreamListener):
def on_data(self, data):
try:
with open('python2.json', 'a') as f:
jsonData = json.loads(data)
id = jsonData["id"]
text = jsonData["text"]
newJSON = {'id': id, 'text': text}
f.write(json.dumps(data))
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
P.S.: Your code in your question has severe formatting problems, on_data and on_error should be indented under the StdOutListener class.

TabError: inconsistent use of tabs and spaces in indentation (except: ^)

I have a code like as below.. but there is something wrong in my main code.. Anyone can help me to spot the problem... I am literally stuck at this indentation error... It says there is indentation error occurered at except: in the main function...
import tweepy
from datetime import datetime,timedelta
import csv
def get_all_tweets(screen_name):
consumer_key = *
consumer_secret = *
access_key = *
access_secret = *
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth, wait_on_rate_limit_notify=True)
noRT = []
search_terms = 'superbowl ', 'super bowl ', '#superbowl'
for page in tweepy.Cursor(api.user_timeline,
screen_name = screen_name,
tweet_mode="extended",
wait_on_rate_limit=True,
include_retweets=False,
count=100).pages(20):
for status in page:
noRT.append([status.id_str, status.created_at, status.full_text.encode("utf-8")])
# do your process on status
with open('{}_tweets.csv'.format(screen_name), 'w') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(noRT)
print('{}_tweets.csv was successfully created.'.format(screen_name))
pass
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
What's wrong with this code?
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
You're mixing tabs and spaces inside your for loops, e.g.:
Python 3 disallows mixing the use of tabs and spaces for indentation.

Keeping track of which (tweepy) filter caught a tweet

I need to track many keywords on twitter and send the tweets to MongoDB. I used this for my code :
How can I consume tweets from Twitter's streaming api and store them in mongodb
import json
import pymongo
import tweepy
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
super(tweepy.StreamListener, self).__init__()
self.db = pymongo.MongoClient().test
def on_data(self, tweet):
self.db.tweets.insert(json.loads(tweet))
def on_error(self, status_code):
return True # Don't kill the stream
def on_timeout(self):
return True # Don't kill the stream
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(api))
to_track = ['keyword1', 'keyword2', 'keyword3']
sapi.filter(track = to_track)
Is there a way for me to keep track of which keyword is responsible for each tweet coming in ? (Without doing a grep search in each one)
I'm not sure how the on_data function works but you could use on_status and do something like below:
import tweepy
consumer_key = ''
consumer_secret = ''
access_key = ''
access_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
tweet = status.text
words = tweet.split()
if 'keyword1' in words:
print "do something with keyword1"
self.db.tweets.insert(json.loads(tweet))
if 'keyword2' in words:
print "do something with keyword2"
self.db.tweets.insert(json.loads(tweet))
if 'keyword3' in words:
print "do something with keyword3"
self.db.tweets.insert(json.loads(tweet))
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(api))
to_track = ['keyword1', 'keyword2', 'keyword3']
sapi.filter(track = to_track)

Get tweets with hashtag from specific time period

I'm new to python programming and Twitter API.
I tired to collect tweets with a hashtag from a specific time period(say 11/24/216-11/27/2017), my goal is to get coordinates from those extracted tweets and save the coordinates and the tweet text into a csv file.
But my problem is that i don't know how to set the time filter and save them into a file. What's more, only a few tweets contained the coordinates, was that common?
Here are the python scripts that i found online.
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
#Enter Twitter API Key information
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
file = open("C:\\Output.csv", "w") #This script didn't work on my Mac#
strong text
file.write("X,Y\n")
data_list = []
count = 0
class listener(StreamListener):
def on_data(self, data):
global count
#How many tweets you want to find, could change to time based
if count <= 2000:
json_data = json.loads(data)
coords = json_data["coordinates"]
if coords is not None:
print coords["coordinates"]
lon = coords["coordinates"][0]
lat = coords["coordinates"][1]
data_list.append(json_data)
file.write(str(lon) + ",")
file.write(str(lat) + "\n")
count += 1
return True
else:
file.close()
return False
def on_error(self, status):
print status
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())
#What you want to search for here
twitterStream.filter(track=[""])

How to encode/decode Tweets in Hindi language from Twitter to display in Hindi font when written to a file?

I am trying to crawl Twitter for Hindi Tweets using Hindi emotion words(eg.खुशी, गुस्सा) to get Tweets with these words using python 2.7. I am using the Streaming API and the code for it is below
import codecs
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
access_token = "xxxxxxxxxxxxxxxx"
access_token_secret = "xxxxxxxxxxxxxxx"
consumer_key = "xxxxxxxxxxxxxxxx"
consumer_secret = "xxxxxxxxxxxxxxxxx"
class StdOutListener(StreamListener):
def on_data(self, data):
print data
saveFile = codecs.open('TweetPrjkhushh.txt', 'a', 'utf-8')
saveFile.write(data)
saveFile.write('\n')
saveFile.close()
return True
def on_error(self, status):
print status
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
t = u"खुशी"
stream.filter(languages=["hi"],track=[t])
I get tweets text in Unicode like this:
{"text":"RT #guru9899: \u092f\u0947 \u092c\u0947\u091c\u093e\u0928 \u0928\u0947 \u092c\u094b\u0932\u093e \u092f\u093e #abpnewshindi \u0915\u0940 \u092e\u0941\u0939\u0940\u092e \u0939\u0948 ??? \u0939\u093e\u0925 \u0935\u093e\u092a\u0938 \u092d\u0940 \u0924\u094b \u0916\u0940\u0902\u091a \u0938\u0915\u0924\u0947 \u0925\u0947 ??? \u091c\u092c\u0930\u0926\u0938\u094d\u0924\u0940 \u0925\u094b\u0921\u093c\u0940 \u0939\u0948 \ud83d\ude02\ud83d\ude02\ud83d\ude02 https:\/\/t.co\/BE0gSEj\u2026"}
I want to display it in Hindi font when we open the file where I am saving the tweets, but using codecs and utf-8 encoding while saving doesn't help. What am I missing here?
data is a dict.
Change your code to specify the key entry of data:
def on_data(self, data):
print data["text"]
saveFile = codecs.open('TweetPrjkhushh.txt', 'a', 'utf-8')
saveFile.write(data["text"])
saveFile.write('\n')
saveFile.close()
return True

Categories

Resources