I am running a sentiment analysis by using twitter and I am having some difficulties on:
Counting how many 'Positive', 'Negative' and 'Neutral' results I have.
Any help will be me more than appreciated.
Please take a look at my code:
import tweepy
from textblob import TextBlob
consumer_key = ''
consumer_key_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
public_tweets = api.search('stackoverflow')
for tweet in public_tweets:
print(tweet.text)
analysis = TextBlob(tweet.text)
print(analysis.sentiment)
if analysis.sentiment[0]>0:
print ('Positive')
elif analysis.sentiment[0]<0:
print('Negative')
else:
print ('Neutral')
I think you can just create variables that keep track of how many labels were in the data. Something like this:
pos, neg, neu = 0, 0, 0
for tweet in public_tweets:
analysis = TextBlob(tweet.text)
if analysis.sentiment[0]>0:
pos += 1
elif analysis.sentiment[0]<0:
neg += 1
else:
neu += 1
print("positive: {}\nnegative: {}\nneutral: {}".format(pos,neg,neu))
Regarding the result dataframe, I was not sure what kind of data you want to save, so could not give a good answer for that.
Related
I want to return all hashtags that match my search, but it is currently taking a very long time to return all the data. In a perfect world I would like to return data where hashtag matches my search query. Get the count of how many times it was mentioned, and then see who tweeted it. Currently just to count the hashtags within a day takes a long time. Here is my current code.
def main():
consumer_key= 'key'
consumer_secret= 'key'
access_token= 'key'
access_token_secret= 'key'
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)
search_words = "#search"
date_since = "2021-04-26"
tweets = tw.Cursor(api.search,
q=search_words,
lang="en",
fromDate=date_since).items()
count = 0
for tweet in tweets:
count = count + 1
#print(tweet.text)
print(count)
if __name__ == "__main__":
main()
EDIT: I found out it is sleeping on the Wait Rate limit. Is their anyay around the wait limit?
in current version of tweepy counts are done like this:
import tweepy
client = tweepy.Client(bearer_token="TWITTER_API_BEARER")
# Replace with your own search query
query = 'search -is:retweet'
counts = client.get_recent_tweets_count(query=query, granularity='day')
for count in counts.data:
print(count)
I'm trying to scrape twitter data for my thesis work. But in this below's code, dataframe isn't working. I mean, dataframe isn't showing at the output line. How can I modify this code to build my dataframe? Another problem is that I want to scrape data by filtering location. How can I do this?
import tweepy
import re
import pandas as pd
import itertools
import collections
import nltk
from nltk.corpus import stopwords
import matplotlib.pyplot as plt
from textblob import TextBlob
import os
consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
latitude = 56.130367 # geographical centre of search
longitude = -106.346771 # geographical centre of search
max_range = 1
tweets = tweepy.Cursor(api.search,
q="Shopify" + " -filter:retweets",
#geocode = "%f,%f,%dkm" % (latitude,longitude,max_range),
lang="en",
since="2020-01-01").items(10)
for tweet in tweets:
print(tweet.text)
analysis = TextBlob(tweet.text)
print('Date=', tweet.created_at,'Location=', tweet.user.location)
print(analysis.sentiment)
if analysis.sentiment[0] > 0:
print('Positive')
elif analysis.sentiment[0] < 0:
print('Negative')
else:
print('Neutral')
print('====================================================================')
print()
user_data = [[tweet.created_at, remove_characters(tweet.user.name), tweet.user.location,
remove_characters(tweet.text), TextBlob(tweet.text).sentiment[0],
'Positive' if TextBlob(tweet.text).sentiment[0] > 0
else 'Negative' if TextBlob(tweet.text).sentiment[0] < 0
else 'Nuetral']
for tweet in tweets]
tweet_df = pd.DataFrame(data=user_data,
columns=['Created At', "User", 'Location', 'Text', 'Sentiment', 'Polarity', 'favorite_count'])
tweet_df.head(10)
I am using tweepy to download tweets about a particular topic but nobody which tutorial I follow I cannot get the tweet to output as a full tweet. There is always an ellipse that cuts it off after a certain number of characters.
Here is the code I am using
import json
import tweepy
from tweepy import OAuthHandler
import csv
import sys
from twython import Twython
nonBmpMap = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
with open ('Twitter_Credentials.json') as cred_data:
info = json.load(cred_data)
consumer_Key = info['Consumer_Key']
consumer_Secret = info['Consumer_Secret']
access_Key = info['Access_Key']
access_Secret = info['Access_Secret']
maxTweets = int(input('Enter the Number of tweets that you want to extract '))
userTopic = input('What topic do you want to search for ')
topic = ('"' + userTopic + '"')
tweetCount = 0
auth = OAuthHandler(consumer_Key, consumer_Secret)
auth.set_access_token(access_Key, access_Secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
tweets = api.search(q=topic, count=maxTweets, tweet_mode= 'extended')
for tweet in tweets:
tweetCount = (tweetCount+1)
with open ('TweetsAbout' + userTopic, 'a', encoding='utf-8') as the_File:
print(tweet.full_text.translate(nonBmpMap))
tweet = (str(tweet.full_text).translate(nonBmpMap).replace(',','').replace('|','').replace('\n','').replace('’','\'').replace('…',"end"))
the_File.write(tweet + "\n")
print('Extracted ' + str(tweetCount) + ' tweets about ' + topic)
Try this, see if it works!
try:
specific_tweets = tweepy.Cursor(api.search, tweet_mode='extended', q=<your_query_string> +" -filter:retweets", lang='en').items(500)
except tweepy.error.TweepError:
pass
for tweet in specific_tweets:
extracted_text = tweet.full_text
all the text your trying to extract should be in extracted_text. Good Luck!!
here is my code..i want to extract tweets from twitter with some keywords....my code dont give any errors but i am not getting the output file generated...please help me........
import re
import csv
import tweepy
from tweepy import OAuthHandler
#TextBlob perform simple natural language processing tasks.
from textblob import TextBlob
def search():
#text = e.get() **************************
consumer_key = ''
consumer_secret = ''
access_token = ' '
access_token_secret = ' '
# create OAuthHandler object
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
api = tweepy.API(auth)
def get_tweets(query, count = 300):
# empty list to store parsed tweets
tweets = []
target = open("tweets.txt", 'w',encoding="utf-8")
t1 = open("review.txt", 'w',encoding="utf-8")
# call twitter api to fetch tweets
q=str(query)
a=str(q+" sarcasm")
b=str(q+" sarcastic")
c=str(q+" irony")
fetched_tweets = api.search(a, count = count)+ api.search(b, count = count)+ api.search(c, count = count)
# parsing tweets one by one
print(len(fetched_tweets))
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
if "http" not in tweet.text:
line = re.sub("[^A-Za-z]", " ", tweet.text)
target.write(line+"\n")
t1.write(line+"\n")
return tweets
# creating object of TwitterClient Class
# calling function to get tweets
tweets = get_tweets(query =text, count = 20000)
root.mainloop()
From this code i am nor getting the output generated file. Can anyone tell me what i am doing wrong ?
Thanks in advance!
I just made some slight changes and it was working perfectly for me. Removed or commented some unnecessary statements (like the review file). Changed the open function to io.open since I have python version 2.7. Here is the running code, hope it helps!!
`
import re
import io
import csv
import tweepy
from tweepy import OAuthHandler
#TextBlob perform simple natural language processing tasks.
#from textblob import TextBlob
consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4'
consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT'
access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G'
access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN'
# create OAuthHandler object
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
api = tweepy.API(auth)
def get_tweets(query, count = 300):
# empty list to store parsed tweets
tweets = []
target = io.open("mytweets.txt", 'w', encoding='utf-8')
# call twitter api to fetch tweets
q=str(query)
a=str(q+" sarcasm")
b=str(q+" sarcastic")
c=str(q+" irony")
fetched_tweets = api.search(a, count = count)+ api.search(b, count = count)+ api.search(c, count = count)
# parsing tweets one by one
print(len(fetched_tweets))
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
if "http" not in tweet.text:
line = re.sub("[^A-Za-z]", " ", tweet.text)
target.write(line+"\n")
return tweets
# creating object of TwitterClient Class
# calling function to get tweets
tweets = get_tweets(query ="", count = 20000)
`
I wrote the code.
But I don't think it's going to work.
I want to extract words from the concept of " or " rather than the concept of " and ".
It seems like only ' keyword 1 ' is extracted.
How do I make corrections?
import tweepy
import time
import os
search_term = 'keyword1'
search_term2= 'keyword2'
lat = "37.6"
lon = "127.0"
radius = "200km"
location = "%s,%s,%s" % (lat, lon, radius)
API_key = "11111"
API_secret = "22222"
Access_token = "33333"
Access_token_secret = "444"
auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(Access_token, Access_token_secret)
api = tweepy.API(auth)
c=tweepy.Cursor(api.search,
q=(search_term or search_term2),
rpp=1000,
geocode=location,
include_entities=True)
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
i += 1
time.sleep(1)
wfile = open(os.getcwd()+"/twtw2.txt", mode='w')
data = {}
i = 0
for tweet in c.items():
data['text'] = tweet.text
wfile.write(data['text']+'\n')
i += 1
time.sleep(1)
wfile.close()
Maybe change this line
q=(search_term or search_term2),
to
q="{}+OR+{}".format(search_term,search_term2),
Case matters here for the OR operator
enter q as a string, not as an expression that is short-circuit evaluated
By the way, your credentials (from your post) also work for me.