This is simple code I wrote to scrape data from twitter using tweepy.
import tweepy
import csv
import pandas as pd
from datetime import date
####input your credentials here
consumer_key = '(Hidden)'
consumer_secret = '(Hidden)'
access_token = '(Hidden)'
access_token_secret = '(Hidden)'
today = date.today()
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)
csvFile = open('remotejob.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
for tweet in tweepy.Cursor(api.search,q="#jobs #remote",count=5000,
lang="en",
since=today,tweet_mode = 'extended').items():
print (tweet.created_at, tweet.id)
csvWriter.writerow([tweet.created_at, tweet.full_text.encode('utf-8'), tweet.id, tweet.user.name.encode('utf-8'), tweet.user.screen_name.encode('utf-8'), tweet.user.statuses_count, tweet.retweet_count, tweet.favorite_count])
How can I schedule it to run every hour automatically?
Related
I ma doing research on tweets sentiment analysis.
I have number of different keywords that i want to track (track=['Trump', 'ocean', 'Democrat', 'Republican', 'gardening','BMCSoftware', 'school', 'CT', '#metoo', 'governor', 'Iran']).
Problem is that how I can identify that which tweet corresponds to which keyword.
I tried my best to solve this problem but I can’t.
Please help me to identify tweets related to keywords.
here is my code..
import tweepy
from tweepy import Stream
from tweepy import StreamListener
import json
from textblob import TextBlob
import re #Regulor Expretion
import csv
import nltk
nltk.download('punkt')
class MyListner(StreamListener):
def on_data(self, data):
raw_twitts=json.loads(data)
try:
created_at=raw_twitts['created_at']
tweets=raw_twitts['text']
tweet_id=raw_twitts['id']
screen_name=raw_twitts['user']['screen_name']
hasgtages=raw_twitts['entities']['hashtags']
description = raw_twitts['user']['description']
retweet=raw_twitts['retweet_count']
with open('Stream_data.csv', 'a') as myFile:
writer = csv.writer(myFile)
writer.writerow([created_at,tweet_id,screen_name,hasgtages,description,tweets,retweet])
print("created_at: ",created_at)
print("id: ",tweet_id)
print("screen_name: ",screen_name)
print("hasgtage: ",hasgtages)
print("description: ",description)
print("text: ",tweets)
print("retweet: ",retweet)
except:
print("Errors got")
def on_error(self,status):
print(status)
auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token, access_token_secret)
twitter_stream=Stream(auth,MyListner())
twitter_stream.filter(track=['Trump', 'ocean', 'Democrat', 'Republican', 'gardening','BMCSoftware', 'school', 'CT', '#metoo', 'governor', 'Iran'])
I am coding a twitter bot but I am having challenges along the way.
The code will get data from an API and tweet out data daily. Unfortunely, I am having trouble getting the return function on the last line to work that will actually send the tweet out.
import urllib.request
from pprint import pprint
import json
import datetime
import tweepy
import time
import os
import logging
def importVax():
link = 'https://data.ontario.ca/api/3/action/datastore_search?resource_id=8a89caa9-511c-4568-af89-7f2174b4378c&limit=100'
query = urllib.request.urlopen(link)
query = json.loads(query.read())
for r in query['result']['records']:
date = datetime.datetime.strptime(r['report_date'][0:10], "%Y-%m-%d").date()
previous_day_admin = r['previous_day_doses_administered']
total_admin = r['total_doses_administered']
total_complete = r['total_vaccinations_completed']
if previous_day_admin == '':
previous_day_admin = 0
print(date, previous_day_admin, total_admin)
if __name__ == '__main__':
importVax()
consumer_key = 'REDACTED'
consumer_secret ='REDACTED'
access_token = 'REDACTED'
access_token_secret = 'REDACTED'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
def daily_update(date, previous_day_admin, total_admin, total_complete):
message = str(
f'''
[{date}]
Doses Administered: {previous_day_admin}
Total Completed Vaccinations: {total_complete}
% Immune: {round(total_complete/14570000)}
'''
)
return api.update_status(message)
My code is:
import tweepy
import csv
import pandas as pd
import sys
#input your credentials here
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)
# Open/Create a file to append data
csvFile = open('amaravathi.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
for tweet in tweepy.Cursor(api.search,q="#amaravathi",count=10,
lang="en").items():`
csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])
print(tweet.created_at,tweet.text)
csvFile.close()
In Python 3 and tweepy I have this script to do hashtags searches on Twitter:
import tweepy
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
#test = api.get_user('some user')._json
#test
#The test worked
search_result = api.search('#maconhamedicinal' or '#cannabismedicinal')
search_result
[]
The result is an empty list. Please, does anyone know what the problem is?
keywords = ['#maconhamedicinal','#cannabismedicinal']
results = []
for key in keywords:
search_results = api.search(q=key, count=100)
results = results + search_results
for result in results:
# do whatever u wanna do
I did.(Pip install json, Pip install simplejson)
However, errors occur.
simplejson.scanner.JSONDecodeError: Unterminated string starting at:
line 1 column 65922 (char 65921)
tweepy.error.TweepError: Failed to parse JSON payload: Unterminated string starting at: line 1 column 65922 (char 65921)
What should I do?
import tweepy
import time
import os
import json
import simplejson
search_term = 'word1'
search_term2= 'word2'
search_term3='word3'
lat = "xxxx"
lon = "xxxx"
radius = "xxxx"
location = "%s,%s,%s" % (lat, lon, radius)
API_key = "xxxx"
API_secret = "xxxx"
Access_token = "xxxx"
Access_token_secret = "xxxx"
auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(Access_token, Access_token_secret)
api = tweepy.API(auth)
c=tweepy.Cursor(api.search,
q="{}+OR+{}".format(search_term, search_term2, search_term3),
rpp=1000,
geocode=location,
include_entities=True)
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
i += 1
time.sleep(1)
wfile = open(os.getcwd()+"/workk2.txt", mode='w')
data = {}
i = 0
for tweet in c.items():
data['text'] = tweet.text
wfile.write(data['text']+'\n')
i += 1
wfile.close()