Set tweets counts for each items in tweepy stream

Set tweets counts for each items in tweepy stream - python

I have a problem and cant get to a solution..
I have written a python script to Stream twitter tweets.
My issue is I need to read 5 tweets for each words in the given list.
Below is the code:
class TweetListener(StreamListener):
def on_status(self,status):
print "TWEET ARRIVED!!!"
print "Tweet Text : %s" % status.text
print "Author's name : %s" % status.author.screen_name
print "Time of creation : %s" % status.created_at
print "Source of Tweet : %s" % status.source
time.sleep(10)
return True
def on_error(self, status):
print status
if status == 420:
print "Too soon reconnected, Exiting!!"
return False
sys.exit()
def search_tweets():
twitterStream = Stream(connect().auth, TweetListener())
twitterStream.filter(track=['Cricket','Maths','Army','Sports'],languages = ["en"],async=True)
Here I need to get 5 tweets each for Cricket, Maths, Army & Sports
What I am getting is an infinite number of tweets for the above elements.
Any help will be highly appreciated.
Thanks & regards.

class TweetListener(StreamListener):
def __init__(self, list_=None,dict_= None):
self.keys_= list_
self.dict = dict_
def on_status(self, status):
str_ = status.text.lower()
for key in self.dict.keys():
if key.lower() in str_.lower():
if self.dict[key] <= 0:
return True
else:
self.dict[key] -=1
self.performAction(key,status)
if all(value == 0 for value in self.dict.values()):
return False
def on_error(self, status):
print status
if status == 420:
print "Too soon reconnected . Will terminate the program"
return False
sys.exit()
def create_dict(list_):
no_of_tweets = 5
dict_ = {k:no_of_tweets for k in list_ }
return dict_
def search_tweets():
search_word = ['Cricket','Maths','Army','Sports']
twitterStream = Stream(connect().auth, TweetListener(list_=search_word , dict_=create_dict(search_word)))
twitterStream.filter(track=search_word ,languages = ["en"],async=True)
Here I initialize a list with all the required words that are to be searched for tweets, then I create a dictionary with key:value as word_to_be_searched:count_as_5 in the create_dict(list_) function, like Cricket:5, Maths:5, Army:5, Sports:5 and so on. Then I pass the list along with the dictionary to the TweetListener class.
I override the on_status function to retrieve tweets and then compare the tweets with the key field of my dictionary. It is obvious there will be a match and then, in that case, I decrease the value(as counter here) by 1.
When all the values become 0, then I return false to break the loop and close the thread.
[Note, if any value corresponding to a key has become zero, it indicates that the required no of tweets are already captured so we will not proceed with any more tweets on that word.]
Then in the performAction(key, status) function {key=one of the searched words and status = tweet captured} I perform my required task.

Related

Extract timestamp and username when streaming tweets using tweepy

I have the following class, in order to extract tweets in real time containing a given hashtag #Today:
class TweetListener(StreamingClient):
def on_data(self, raw_data):
logging.info(raw_data)
producer.send(topic_name, value=raw_data)
return True
def on_error(self, status_code):
if status_code == 420:
return False
def start_streaming_tweets(self):
rule = StreamRule(value="#Today lang:en")
self.add_rules(rule)
self.filter()`
However, in this way, the object sent is something like:
ConsumerRecord(topic='twitter', partition=0, offset=46, timestamp=1675201799030, timestamp_type=0, key=None, value=b'{"data":{"edit_history_tweet_ids":["16205398989347923"],"id":"16205398989347923","text":"#Today is a great day!"},"matching_rules":[{"id":"16238748236833856","tag":""}]}', headers=[], checksum=None, serialized_key_size=-1, serialized_value_size=196, serialized_header_size=-1
And so, I don't have any info about the user, the time of publication, the number of likes... Is there any way to get this info?

prevent bot from replying to an already replied tweet?

Is it possible to have a list to store the ID's of recent tweets that the bot replied to instead of saving it in a text file?
def on_status(self, status):
timelineTweets = api.user_timeline(screen_name="USERNAME")
alreadyReplied = [-1]
if timelineTweets[0].id not in alreadyReplied:
lastTweet = timelineTweets[0]
api.update_status('#USERNAME' + gettext(), in_reply_to_status_id=lastTweet.id)
alreadyReplied.append(lastTweet.id)
if len(alreadyReplied) == 20:
alreadyReplied.pop()
return True

Python class: The data is somehow lost

I am trying to define a Python class which analyzes subreddit data via the praw package.
I am fairly experienced with OOP in C++, but have not had much experience with OOP in Python. Here is the code I have so far:
import praw
class SubRedditAnalyzer:
def __init__(self, reddit_session, name='dataisbeautiful'):
self.name = name # subreddit name
self.reddit_session = reddit_session # assign the reddit session
self.subreddit = self.reddit_session.get_subreddit(self.name) # create the subreddit object
self.timeframe = 'day'
self.max_post_count = 10
self.submissions = self.subreddit.get_top_from_hour(limit=10)
def __del__(self):
class_name = self.__class__.__name__
print class_name, "destroyed"
def get_top_submissions(self, max_post_count):
timeframe = self.timeframe
if (timeframe == 'hour'):
self.submissions = self.subreddit.get_top_from_hour(limit= max_post_count)
elif (timeframe == 'day'):
self.submissions = self.subreddit.get_top_from_day(limit= max_post_count)
elif (timeframe == 'week'):
self.submissions = self.subreddit.get_top_from_week(limit= max_post_count)
elif (timeframe == 'month'):
self.submissions = self.subreddit.get_top_from_month(limit= max_post_count)
elif (timeframe == 'year'):
self.submissions = self.subreddit.get_top_from_year(limit= max_post_count)
elif (timeframe == 'all'):
self.submissions = self.subreddit.get_top_from_all(limit= max_post_count)
def combine_titles(self):
titles = ""
for submission in self.submissions:
titles += submission.title
self.titles = titles
def display_titles(self):
counter = 1
ya = self.submissions
for sub in self.submissions:
sc = sub.score
ti = sub.title
print('T%d- [%d] %s \n' %(counter,sc,ti))
counter += 1
def main():
r = praw.Reddit('Request to fetch data by user')
sr = SubRedditAnalyzer(r, 'dataisbeautiful')
sr.get_top_submissions(15) # top 15 from reddit
sr.combine_titles() # combine the top titles
sr.display_titles() # display all the titles
main()
For some unknown (to me) reason, it seems that the data in class 'sr' is lost after calling:
sr.combine_titles()
When I try to call this method, the data in class is empty:
sr.display_titles()
In fact, I do see the message that the class is destroyed:
SubRedditAnalyzer destroyed
What is it that I am doing wrong?
In advance, thanks for your attention.

It seems that self.submissions may be an iterable but not a collection (e.g. a list). The docs call get_top_from_hour() a generator method (although they state also that what is returned is a list...). If it is indeed a generator method, the result can be iterated over only once. All other attempts at iteration will fail silently (the loop in display_titles() executes nothing).
So, the solution would be:
self.submissions = list(self.subreddit.get_top_from_hour(limit=10))
in __init__() to convert an iterable into a permanent collection (list) that can be iterated over multiple times.

According to the PRAW docs, get_content and its associated methods like get_top_from_hour return a generator. A generator can only be iterated once, which you do in combine_titles. After that iteration, the generator is exhausted and cannot be iterated again.
You could presumably convert the submissions to a list when you get them in __init__:
self.submissions = list(self.subreddit.get_top_from_hour(limit=10))

Python SMS store program using class and methods - has_been_viewed status

from datetime import datetime
class sms_store:
store = []
read = []
def add_new_arrival(self,number,time,text):
sms_store.read.append(len(sms_store.store))
sms_store.store.append(("From: {}, Recieved: {}, Msg: {}".format(number,time,text)))
def delete(self,i):
try:
del sms_store.store[i]
except IndexError:
print("Index is out of range. Cannot delete")
def message_count(self):
return print("Amt of messages in inbox: {}".format(len(sms_store.store)))
def viewall(self):
print(sms_store.store)
def get_unread_indexes(self):
#### ###################################I need help for this method.
def get_message(self,i)
print(sms_store.store[i])
### tests ####
time = datetime.now().strftime('%H:%M:%S')
my_inbox = sms_store() #instantiate an object 'store' for class
my_inbox.add_new_arrival("12345",time,"Hello how are you?") #instance of store object
my_inbox.add_new_arrival("1111111",time,"BYE BYE BYE")
my_inbox.viewall()
my_inbox.msgcount()
Thanks for viewing this.
This is what I need to do:
my_inbox.add_new_arrival()
When adding a new message, its has_been_viewed status is set False.
my_inbox.get_unread_indexes()
Returns list of indexes of all not-yet-viewed SMS messages
my_inbox.get_message(i)**
Return (from_number, time_arrived, text_of_sms) for message[i]
Also change its state to "has been viewed".
If there is no message at position i, return None
Please help me on those above methods!?
Thank you so much!

Hi I tweaked your code a bit, I think I have done this before in the "How to think like a computer Scientist Book", Hope it works for you.
from datetime import datetime
and
class SMS_store:
then
def __init__(self):
self.store = []
def __str__(self):
return ("{0}".format(self))
def add_new_arrival(self, number, time, text ):
self.store.append(("Read: False", "From: "+number, "Recieved: "+time, "Msg: "+text))
def message_count(self):
return (len(self.store))
def get_unread_indexes(self):
result = []
for (i, v) in enumerate(self.store):
if v[0] == "Read: False":
result.append(i)
return (result)
def get_message(self, i):
msg = self.store[i]
msg = ("Read: True",) + msg[1:]
self.store[i] = (msg)
return (self.store[i][1:])
def delete(self, i):
del self.store[i]
def clear(self):
self.store = []

Why don't you add another list to your class called unread. Change add_new_arrival to add the message to unread.
Then under the get_message method move the specified message from unread to read.
Lastly your get_unread method just lists the indexes of the unread list.

Python SMS store program using class and methods - has_been_viewed status
import time
class SMS_store:
def __init__(self):
self.inbox = []
def add_new_arrival(self, from_number, text_of_sms,read_status = False):
number = str(from_number)
time_received = time.strftime("%D %T")
self.inbox.append([time_received, number, text_of_sms, read_status])
def message_count(self):
return "There are {0} messages in your Inbox".format(len(self.inbox))
def get_unread_indexes(self):
unread = []
for index, message in enumerate(self.inbox):
if False in message:
unread.append(index)
return "Unread Messages in:", unread
def get_message(self, index):
message = self.inbox[index]
message[3] = "Read"
return message[ : 3]
def delete(self, index):
del self.inbox[index]
return "Deleted Message", index
def clear(self):
self.inbox = []
return "Empty Inbox"

'Queue' object has no attribute 'size'

I have seen other examples of this happening on StackOverflow, but I didn't understand any of the answers (I'm still a new programmer,) nor did the other examples I saw look quite like mine, else I wouldn't post this question.
I'm running Python 3.2 on Windows 7.
I have never had this happen to me before and I've done classes this way many times, so I don't really know what is different this time. The only difference is that I didn't make all of the Class file; I was given a template to fill in and a test file to try it on. It worked on the test file, but is not working on my file. I have been calling on the methods in the class in the exact same way as the test file (e.g. Lineup.size())
This is my Class:
class Queue:
# Constructor, which creates a new empty queue:
def __init__(self):
self.__items = []
# Adds a new item to the back of the queue, and returns nothing:
def queue(self, item):
self.__items.insert(0,item)
return
# Removes and returns the front-most item in the queue.
# Returns nothing if the queue is empty.
def dequeue(self):
if len(self.__items) == 0:
return None
else:
return self.__items.pop()
# Returns the front-most item in the queue, and DOES NOT change the queue.
def peek(self):
if len(self.__items) == 0:
return None
else:
return self.__items[(len(self.__items)-1)]
# Returns True if the queue is empty, and False otherwise:
def is_empty(self):
return len(self.__items) == 0
# Returns the number of items in the queue:
def size(self):
return len(self.__items)
# Removes all items from the queue, and sets the size to 0:
def clear(self):
del self.__items[0:len(self.__items)]
return
# Returns a string representation of the queue:
def __str__(self):
return "".join(str(i) for i in self.__items)
This is my program:
from queue import Queue
Lineup = Queue()
while True:
decision = str(input("Add, Serve, or Exit: ")).lower()
if decision == "add":
if Lineup.size() == 3:
print("There cannot be more than three people in line.")
continue
else:
person = str(input("Enter the name of the person to add: "))
Lineup.queue(person)
continue
elif decision == "serve":
if Lineup.is_empty() == True:
print("The lineup is already empty.")
continue
else:
print("%s has been served."%Lineup.peek())
Lineup.dequeue()
continue
elif (decision == "exit") or (decision == "quit"):
break
else:
print("%s is not a valid command.")
continue
And this is my error message when I enter "add" as my decision variable:
line 8, in
builtins.AttributeError: 'Queue' object has no attribute 'size'
So, what is going on here? What is different about this one?

Python 3 already has a queue module (which you might want to take a look at). When you import queue, Python finds that queue.py file before it finds your queue.py.
Rename your queue.py file to my_queue.py, change your import statements to from my_queue import Queue, and your code will work as you intend.

try rename size for other name or implement a counter to the list __items some like
def get_size(self):
cnt = 0
for i in self.__items:
cnt++
return cnt

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Set tweets counts for each items in tweepy stream - python

Related

Extract timestamp and username when streaming tweets using tweepy

prevent bot from replying to an already replied tweet?

Python class: The data is somehow lost

Python SMS store program using class and methods - has_been_viewed status

'Queue' object has no attribute 'size'

Categories

Resources