cannot getting arabic data from twitter using tweepy? - python

i'm trying to get data from twitter using
tweepy library on python i get this error i already tried some solutions like changing the Keys but still not working
import tweepy
import xlsxwriter
from TweetClassifier import TweetClassifier
from DataCleaner import DataCleaner
import pandas as pd
import os
from tweepy import OAuthHandler
class TwitterAPI:
tweets = None
query = None
number_of_tweets = 100
date = None
consumer_key = "vxxxxxxxxxxxxxxxx6"
consumer_secret = "Exxxxxxxxxxxxxxxxxxxxxxxxxxxxxxvv"
access_token = "295xxx24-eYxxxxxxerE9"
access_secret = "V2xxxxxxxxWadL"
data_clean = DataCleaner()
tweets_classifier = TweetClassifier()
def __init__(self):
return
def Auth(self):
auth = tweepy.OAuthHandler(self.consumer_key, self.consumer_secret)
auth.set_access_token(self. access_token, self.access_secret)
api = tweepy.API(auth)
return api
def retrieve_tweets(self, query, api):
tweets = []
for tweet in tweepy.Cursor(api.search, q=query).items(100):
tweets.append(tweet.text)
return tweets
error

Related

How to get the likes of every tweet containing a specific hashtag with tweepy

I can retrieve tweets with a specific hashtag using tweepy:
Code:
from os import access
import tweepy
import configparser
import pandas as pd
# config = configparser.ConfigParser()
# config.read('config.ini')
api_key = ''
api_key_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# user = '#veritasium'
keywords = '#SheHulk'
limit = 1200
tweets = tweepy.Cursor(api.search_tweets, q = keywords, count = 100, tweet_mode = 'extended').items(limit)
columns = ['User', 'Tweet']
data = []
for tweet in tweets:
data.append([tweet.user.screen_name, tweet.full_text])
df = pd.DataFrame(data, columns=columns)
df.to_excel("output.xlsx")
What I want to know is that if I can get the number of likes with every tweet that is retrieved. Any help would be appreciated.
In the Twitter API V1.1 (see documentation here), that field was called favorite_count.
for tweet in tweets:
print(f"That tweet has {tweet.favorite_count} likes").

Is it possible to use the Tweepy module to get the date followers were added?

I apologize in advance if I don't know how to search the Tweepy documentation. I am quite new to python/programming in general.
I have written a small script to pull Twitter follower data for an account I manage for work. I would like to investigate when followers added us to see if our posts are increasing engagement. What I cannot figure out is if I can use the Tweepy module to pull this particular information (when the follower added us)?
Thank you in advance for any help. My MWE:
import tweepy
import pandas as pd
# Load API keys
consumer_key = "my_consumer_key"
consumer_secret = "my_consumer_secret"
access_token = "my_access_token"
access_token_secret = "my_access_token_secret"
# Authenticate access to Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# Get the list of followers for the account
followers = api.followers_ids()
# Create a user map
userMap = {}
# Loop over all users
for f in followers:
# create a temporary list
tempList = []
try:
tweets = api.user_timeline(f, count = 33) # pull the 33 most recent tweets
except tweepy.TweepError:
print('Failed to run command.') # Tweepy throws an error if a user hasn't tweeted
# Loop over all tweets per each user f
for t in tweets:
tempList.append(t)
userMap[f] = tempList
# Create lists of pertinent data
dateList = []
favList = []
rtList = []
keyList = []
def genList(tweetList):
for tweets in tweetList:
for t in tweets:
keyList.append(str(t.id))
dateList.append(str(t.created_at))
favList.append(str(t.favorite_count))
rtList.append(str(t.retweet_count))
genList(userMap.values())
# Create a pandas data frame
df = pd.DataFrame(list(zip(keyList, dateList, favList, rtList)),
columns = ['userID', 'created_at', 'favorited', 'retweeted'])
This information is not provided by Twitter.
The followers/list (in Tweepy followers() method) returns a list of User objects. It looks like the only solution is to monitor the changes and manage the history yourself.

Get Timeline from Twitter List

I'm trying to get status updates from a list in Twitter, then open in a CSV file, but I keep getting the following error:
AttributeError: 'Status' object has no attribute 'screen_name'.
Any suggestions?
import tweepy
from tweepy import OAuthHandler
import csv
import pandas as pd
consumer_key = 'x'
consumer_secret = 'x'
access_token = 'x'
access_secret = 'x'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
f = csv.writer(open('TodaysNews.csv', 'w'))
f.writerow(["screenName", "name", "text", "followers_count", "listed_count", "statuses_count"])
number_of_tweets = 100
tweets_for_csv = []
for tweet in tweepy.Cursor(api.list_timeline, 'darrenmeritz', 'News',
twtHandle = tweet.screen_name,
name = tweet.name,
text = tweet.text,
followers_count = tweet.followers_count,
listed_count = tweet.listed_count,
statuses_count = tweet.statuses_count,
result_type='recent',
include_entities=True,
trim_user=True,
truncated=False,
lang='en').items(number_of_tweets):
try:
f.writerow([twtHandle, name, text, followers_count, listed_count, statuses_count])
except UnicodeEncodeError:
pass

Tweepy: Ignore previous tweets to improve optimization

Problem: Trying to pull tweets via tweepy using Cursor. I want to make sure I don't pull tweets I previously pulled.
Here is working code:
import tweepy
import pandas as pd
import numpy as np
ACCESS_TOKEN = ""
ACCESS_TOKEN_SECRET = ""
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
# Creation of the actual interface, using authentication
api = tweepy.API(auth, wait_on_rate_limit=True)
csvFile = open(r'filename', 'a')
#Use csv writer
headers = ['UserName', 'Tweet', 'TweetId', 'tweet_date', 'source', 'fav_count', 'retweet_count', 'coordinates', 'geo']
# definitions for writing to CSV
csvWriter = csv.writer(csvFile, lineterminator='\n')
# write the headers once
csvWriter.writerow(headers)
handles = ['pycon', 'gvanrossum']
previousTweets =
['222288832031240000',
'222287080586362000',
'222277240178741000',
'221414283844653000',
'221188011906445000',
'205274818877210000']
for handle in handles:
for status in tweepy.Cursor(api.user_timeline, screen_name= handle, tweet_mode="extended").items():
if status.id not in previousTweets:
csvWriter.writerow([status.user.name.encode('utf-8'), status.full_text.encode('utf-8'), status.id, status.created_at, status.source,
status.favorite_count, status.retweet_count, status.coordinates, status.geo])
print(handle)
This takes a long time and becomes unusable if you want to have a PreviousTweet list of over 75 tweets. Does anyone know a better way to filter out old tweets when using Tweepy and the Cursor function?
You can pass the since_id argument to the cursor.
This allows fetching status that is more recent than the specified ID (
http://docs.tweepy.org/en/v3.5.0/api.html#API.user_timeline)
try:
since_id = previous_tweets[-1]
except IndexError:
since_id = None
for handle in handles:
last_tweet = None
for status in tweepy.Cursor(
api.user_timeline, screen_name=handle,
tweet_mode="extended", since_id=since_id
).items():
# ... persist tweets to flat file or database
last_tweet_id = status.id
# this persists the last_tweet_id in memory.
# you may find that persisting this to a database a better way to go.
previous_tweets.append(last_tweet_id)

Twitters streaming API, sample tweets, getting rate limitations?

I am quite new to Twitter API and Tweepy and I am confused with the rate-limiting concept, I am using the streaming API and I want to gather sample tweets without using any filters such as hashtags or location, some sources state I should not get rate limited with sample tweets as I am getting 1% of tweets and some state otherwise. I keep getting error 420 very often and I was wondering if there is a way to avoid it or make it smoother?
Thank you so much for your help
My code:
import json
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from textblob import TextBlob
from elasticsearch import Elasticsearch
from datetime import datetime
# import twitter keys and tokens
from config import *
# create instance of elasticsearch
es = Elasticsearch()
indexName = "test_new_fields"
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
class TweetStreamListener(StreamListener):
hashtags = []
# on success
def on_data(self, data):
# decode json
dict_data = json.loads(data) # data is a json string
# print(data) # to print the twitter json string
print(dict_data)
# pass tweet into TextBlob
tweet = TextBlob(dict_data["text"])
# determine if sentiment is positive, negative, or neutral
if tweet.sentiment.polarity < 0:
sentiment = "negative"
elif tweet.sentiment.polarity == 0:
sentiment = "neutral"
else:
sentiment = "positive"
# output polarity sentiment and tweet text
print (str(tweet.sentiment.polarity) + " " + sentiment + " " + dict_data["text"])
try:
#check if there r any hashtags
if len(dict_data["entities"]["hashtags"]) != 0:
hashtags = dict_data["entities"]["hashtags"]
#if no hashtags add empty
else:
hashtags= []
except:
pass
es.indices.put_settings(index=indexName, body={"index.blocks.write":False})
# add text and sentiment info to elasticsearch
es.index(index=indexName,
doc_type="test-type",
body={"author": dict_data["user"]["screen_name"],
"date": dict_data["created_at"], # unfortunately this gets stored as a string
"location": dict_data["user"]["location"], # user location
"followers": dict_data["user"]["followers_count"],
"friends": dict_data["user"]["friends_count"],
"time_zone": dict_data["user"]["time_zone"],
"lang": dict_data["user"]["lang"],
#"timestamp": float(dict_data["timestamp_ms"]), # double not recognised as date
"timestamp": dict_data["timestamp_ms"],
"datetime": datetime.now(),
"message": dict_data["text"],
"hashtags": hashtags,
"polarity": tweet.sentiment.polarity,
"subjectivity": tweet.sentiment.subjectivity,
# handle geo data
#"coordinates": dict_data[coordinates],
"sentiment": sentiment})
return True
# on failure
def on_error(self, error):
print "error: " + str(error)
if __name__ == '__main__':
# create instance of the tweepy tweet stream listener
listener = TweetStreamListener()
# set twitter keys/tokens
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
while True:
try:
#create instance of the tweepy stream
stream = Stream(auth, listener)
# search twitter for sample tweets
stream.sample()
except KeyError:
pass
Ok, I have found the solution for this problem, changing the method from on_data to on_status removed all the issues causing the error 420.

Categories

Resources