I am trying to print tweet data but I get an error that I can't fix. When I try and run the code in the docs I still get the same error. Is this a python 3.8 issue?
Code in docs:
for tweet in tweepy.Paginator(client.search_recent_tweets, "Tweepy",
max_results=100).flatten(limit=250):
print(tweet.id)
Stack Trace:
Traceback (most recent call last):
File "scraper.py", line 38, in <module>
main()
File "scraper.py", line 34, in main
for item in paginator:
File "/Users/troy/Desktop/streamlit/env/lib/python3.8/site-packages/tweepy/pagination.py", line 100, in __next__
self.previous_token = response.meta.get("previous_token")
AttributeError: 'Response' object has no attribute 'meta'
My Code:
import tweepy
import requests
import os
import pandas as pd
# global tokens
api_key = os.environ.get('Twitter_API_Key')
api_secret = os.environ.get('Twitter_API_Secret')
access_token = os.environ.get('Twitter_Access_Token')
access_secret = os.environ.get('Twitter_Access_Secret')
bearer = os.environ.get('bearer_token')
def create_client():
client = tweepy.Client( bearer_token=bearer,
return_type=requests.Response,
wait_on_rate_limit=True)
return client
def create_paginator(authenticated_client):
paginator = tweepy.Paginator(
authenticated_client.search_recent_tweets,
query='from:elonmusk',
tweet_fields=['author_id', 'id', 'created_at'],
max_results=100,
limit=5)
return paginator
def main():
client = create_client()
paginator = create_paginator(client)
print(paginator)
for item in paginator:
print(item)
if __name__ == "__main__":
main()
Turns out I needed .flatten(). Don't know why but hey that's show business.
def create_paginator(authenticated_client, query):
paginator = tweepy.Paginator(authenticated_client.search_recent_tweets, query=query,
tweet_fields=['author_id', 'id', 'created_at'],max_results=10).flatten(limit=5)
Related
I want to read to humidity data from the API but I keep getting an atribute error.
Anybody can help, I am new to coding and python.
Error:
Traceback (most recent call last):
File "C:\Users...HidenForPrivacy", line 27, in <module>
test.current_humidity()
File "C:\Users...HidenForPrivacy", line 20, in current_humidity
response = requests.get(self.url)
AttributeError: 'humidity' object has no attribute 'url'
import requests
import json
class humidity():
def init(self,humidity, url):
self.humidity = humidity
self.api_key = "hiddenforprivacy"
self.lat = "53.5502"
self.lon = "9.9920"
self.url = url
def current_humidity(self):
response = requests.get(self.url)
data = json.loads(response.text)
self.url = "https://api.openweathermap.org/data/2.5/onecall?lat=%s&lon=%s&appid=%s&units=metric" % (self.lat, self.lon, self.api_key)
self.humidity = data["current"]["humidity"]
print(humidity)
test = humidity()
test.current_humidity()
The problem is that you have not yet set any value to self.url
When you call test = humidity(), you do not call the init(self, humidity, url) method, but the empty __init__(self) method (the constructor in Python is called __init__). So there the url is not set.
In your code you do set the url in line 22 self.url = "https://api.openweath..., but that happens after you already called response = requests.get(self.url).
One solution might be to put the line self.url = "https://api.openweath... before response = requests.get(self.url)
I want to work some example codes from github(https://github.com/kaantas/spark-twitter-sentiment-analysis). I follow steps below;
Started zkserver
Started kafka 2.5.0 version (also i am using apache spark 3.0.0 and jdk 8)
Started tweeetlistener.py (tweets start to stream, i can see the tweet cmd window)
I open the twitter_topic_avg_sentiment_val.py with Spyder and it just shows bottom text
Note: i dont know any idea about jars, if i will use external jar, please explaing how?
THANKS A LOT...
Traceback (most recent call last):
File "C:\Users\merha\Desktop\spark-twitter-sentiment-analysis-master\twitter_topic_avg_sentiment_val.py", line 40, in <module>
query.awaitTermination()
File "C:\Anaconda3\lib\site-packages\pyspark\sql\streaming.py", line 103, in awaitTermination
return self._jsq.awaitTermination()
File "C:\Anaconda3\lib\site-packages\py4j\java_gateway.py", line 1305, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "C:\Anaconda3\lib\site-packages\pyspark\sql\utils.py", line 137, in deco
raise_from(converted)
File "<string>", line 3, in raise_from
StreamingQueryException: org/apache/spark/kafka010/KafkaConfigUpdater
=== Streaming Query ===
Identifier: [id = f5dd9cb5-fcea-42ec-a20e-93a2ad233e1f, runId = 6cffdd89-3792-4500-a508-e4abc76425fb]
Current Committed Offsets: {}
Current Available Offsets: {}
Current State: INITIALIZING
Thread State: RUNNABLE
------------------<<<<<<<<<<<<<<<<<<tweet_listener.py>>>>------------------------
from tweepy import Stream
from tweepy.streaming import StreamListener
import json
import twitter_config
import pykafka
from afinn import Afinn
import sys
from sys import exit
class TweetListener(StreamListener):
def __init__(self):
self.client = pykafka.KafkaClient("localhost:9092")
self.producer = self.client.topics[bytes('twitter3','ascii')].get_producer()
def on_data(self, data):
try:
json_data = json.loads(data)
send_data = '{}'
json_send_data = json.loads(send_data)
json_send_data['text'] = json_data['text']
json_send_data['senti_val']=afinn.score(json_data['text'])
print(json_send_data['text'], " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ", json_send_data['senti_val'])
self.producer.produce(bytes(json.dumps(json_send_data),'ascii'))
return True
except KeyError:
return True
def on_error(self, status):
print(status)
return True
consumer_key = "xxxxxxxxxx"
consumer_secret = "xxxxxxxxxxx"
access_token = "xxxxxxxxxxxx"
access_secret = "xxxxxxxxxx"
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
# create AFINN object for sentiment analysis
afinn = Afinn()
twitter_stream = Stream(auth, TweetListener())
twitter_stream.filter(languages=['en'], track=["big data"])
----------------------<<<twitter_topic_avg_sentiment_val.py>>>>>>---------------
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
import json
import sys
from pyspark.sql.types import *
def fun(avg_senti_val):
try:
if avg_senti_val < 0: return 'NEGATIVE'
elif avg_senti_val == 0: return 'NEUTRAL'
else: return 'POSITIVE'
except TypeError:
return 'NEUTRAL'
if __name__ == "__main__":
schema = StructType([
StructField("text", StringType(), True),
StructField("senti_val", DoubleType(), True)
])
spark = SparkSession.builder.appName("TwitterSentimentAnalysis") .getOrCreate()
kafka_df = spark.readStream.format("kafka").option("kafka.bootstrap.servers", "localhost:9092").option("subscribe", "twitter3").option("startingOffsets", "earliest").load()
kafka_df_string = kafka_df.selectExpr("CAST(value AS STRING)")
tweets_table = kafka_df_string.select(from_json(col("value"), schema).alias("data")).select("data.*")
sum_val_table = tweets_table.select(avg('senti_val').alias('avg_senti_val'))
# udf = USER DEFINED FUNCTION
udf_avg_to_status = udf(fun, StringType())
# avarage of senti_val column to status column
new_df = sum_val_table.withColumn("status", udf_avg_to_status("avg_senti_val"))
query = kafka_df_string.writeStream.format("console").option("truncate","false").start()
query.awaitTermination()```
after I downloaded and copy this jar file
spark-token-provider-kafka-0-10
to spark jars folder (or add it to Spark_CLASSPATH), my problem resolved.
Have you ever submitted spark with kafka package as a configuration? See the third line.
spark-submit --master yarn --deploy-mode cluster \
--py-files "${PY_ZIP}" \
--packages "org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1" \
I wanna execute this code.
This is the error I'm getting in terminal after running the code
Traceback (most recent call last):
File "/private/var/folders/6j/n37bd5r92sj8wfkn3k_k9k580000gp/T/Cleanup At Startup/bday-459934533.707.py", line 68, in <module>
print 'total='+str(process_posts(url))
File "/private/var/folders/6j/n37bd5r92sj8wfkn3k_k9k580000gp/T/Cleanup At Startup/bday-459934533.707.py", line 32, in process_posts
posts=res_obj["data"]
KeyError: 'data'
The code is -
import httplib, urllib
from bs4 import BeautifulSoup
import os
import json
import time
import calendar
access_token='value'
dob='2015-07-30'
conn = httplib.HTTPSConnection("graph.facebook.com")
print 'requesting...'
#conn.request("GET",path,urllib.urlencode(data),{})
has_more=False
def convert_to_local(s):
t=time.strptime(s[:19],"%Y-%m-%dT%H:%M:%S")
t=time.localtime(calendar.timegm(t))
t=time.strftime("%Y-%m-%d",t)
return t
def getRandomThnx(msg):
return 'thanks :)'
def process_posts(url):
conn = httplib.HTTPSConnection("graph.facebook.com")
conn.request("GET",url)
res = conn.getresponse()
conn.getresponse
data=res.read()
res_obj=json.loads(data)
posts=res_obj["data"]
processed=0
for post in posts:
if not "message" in post:
continue
msg=post["message"]
post_date=convert_to_local(post["created_time"])
if dob == post_date:
if "from" in post and "message" in post:
user= post["from"]["name"]
path='/'+post['id']+'/comments'
param_data={ 'format':'json',
'message':getRandomThnx(msg),
'access_token':access_token
}
conn = httplib.HTTPSConnection("graph.facebook.com")
if post["comments"]["count"]==0:
print 'responding to :'+user+'->'+msg
conn.request("POST",path,urllib.urlencode(param_data),{})
res = conn.getresponse()
path='/'+post['id']+'/likes'
param_data={ 'format':'json',
'access_token':access_token
}
conn = httplib.HTTPSConnection("graph.facebook.com")
processed+=1
if "paging" in res_obj:
return processed+process_posts(res_obj["paging"]["next"] [len("https://graph.facebook.com"):])
else:
print "Finished"
return processed
url='/me/feed?access_token='+access_token
print 'total='+str(process_posts(url))
print 'Thanx to all wisher :)'
It means that the data json you receive in response to your GET query doesn't contain a "data" key.
You can visualise how the json data looks like by doing something like:
import httplib, urllib
from bs4 import BeautifulSoup
import os
import json
import time
import calendar
access_token='value'
dob='2015-07-30'
conn = httplib.HTTPSConnection("graph.facebook.com")
print 'requesting...'
#conn.request("GET",path,urllib.urlencode(data),{})
has_more=False
def convert_to_local(s):
t=time.strptime(s[:19],"%Y-%m-%dT%H:%M:%S")
t=time.localtime(calendar.timegm(t))
t=time.strftime("%Y-%m-%d",t)
return t
def getRandomThnx(msg):
return 'thanks :)'
def process_posts(url):
conn = httplib.HTTPSConnection("graph.facebook.com")
conn.request("GET",url)
res = conn.getresponse()
conn.getresponse
data=res.read()
res_obj=json.loads(data)
try:
posts=res_obj["data"]
except:
print "res_obj does not contain 'data', here is what res_obj looks like:"
print data
...
I'm trying to create a Collection Class in Python to access the various collections in my db. Here's what I've got:
import sys
import os
import pymongo
from pymongo import MongoClient
class Collection():
client = MongoClient()
def __init__(self, db, collection_name):
self.db = db
self.collection_name = collection_name
# self.data_base = getattr(self.client, db)
# self.collObject = getattr(self.data_base, self.collection_name)
def getCollection(self):
data_base = getattr(self.client, self.db)
collObject = getattr(data_base, self.collection_name)
return collObject
def getCollectionKeys(self, collection):
"""Get a set of keys from a collection"""
keys_list = []
collection_list = collection.find()
for document in collection_list:
for field in document.keys():
keys_list.append(field)
keys_set = set(keys_list)
return keys_set
if __name__ == '__main__':
print"Begin Main"
agents = Collection('hkpr_restore','agents')
print "agents is" , agents
agents_collection = agents.getCollection
print agents_collection
print agents.getCollectionKeys(agents_collection)
I get the following output:
Begin Main
agents is <__main__.Collection instance at 0x10ff33e60>
<bound method Collection.getCollection of <__main__.Collection instance at 0x10ff33e60>>
Traceback (most recent call last):
File "collection.py", line 52, in <module>
print agents.getCollectionKeys(agents_collection)
File "collection.py", line 35, in getCollectionKeys
collection_list = collection.find()
AttributeError: 'function' object has no attribute 'find'
The function getCollectionKeys works fine outside of a class. What am I doing wrong?
This line:
agents_collection = agents.getCollection
Should be:
agents_collection = agents.getCollection()
Also, you don't need to use getattr the way you are. Your getCollection method can be:
def getCollection(self):
return self.client[self.db][self.collection_name]
Ive the following function which is do POST request to provider , I need to add new param to post request to incress the timeout ( which is by default is 5 mints i want to incress it to 1 hour , i did changes but i keep getting errors
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib64/python2.6/threading.py", line 532, in __bootstrap_inner
self.run()
File "/opt/lvptest/lvp_upload.py", line 226, in run
op = uploadMedia(mediaName, "PyUploader", env)
File "/opt/lvptest/lvp_upload.py", line 121, in uploadMedia
expires = math.ceil(time() + 3000) ["expires"]
TypeError: 'module' object is not callable
Here is my function
def uploadMedia(filepath, description, env):
global verbose
global config
orgId = config[env]["org_id"]
accessKey = config[env]["access_key"]
secret = config[env]["secret"]
expires = math.ceil(time() + 3000) ["expires"]
filename = os.path.basename(filepath)
baseUrl = "http://api.videoplatform.limelight.com/rest/organizations/%s/media" %(orgId)
signedUrl = lvp_auth_util.authenticate_request("POST", baseUrl, accessKey, secret, expires)
c = pycurl.Curl()
c.setopt(c.POST, 1)
c.setopt(c.HEADER, 0)
c.setopt(c.HTTPPOST, [('title', filename), ("description", description), (("media_file", (c.FORM_FILE, filepath)))])
if verbose:
c.setopt(c.VERBOSE, 1)
bodyOutput = StringIO()
headersOutput = StringIO()
c.setopt(c.WRITEFUNCTION, bodyOutput.write)
c.setopt(c.URL, signedUrl)
c.setopt(c.HEADERFUNCTION, headersOutput.write)
try:
c.perform()
c.close()
Any tips if im mistaken adding param "expires" ?
here is example how is my POST request looks like
POST /rest/organizations/9fafklsdf/media?access_key=sfdfsdfsdfsdfsdf89234 &expires=1400406364&signature=Mc9Qsd4sdgdfg0iEOFUaRC4iiAJBtP%2BMCot0sFKM8A$
Two errors:
You should do from time import time instead of just time. Because the time module has a time function inside it.
math.ceil returns a float and you are trying to use it as a dict after:
expires = math.ceil(time() + 3000) ["expires"]
This doesn't make sense. math.ceil(time() + 3000) will be equal to something like 1400406364 and you can't retrieve a data from it.
Removing the ["expires"] should solve the problem.
The time module is not callable, you need to call time method from it:
>>> import time
>>> import math
>>> math.ceil(time())
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'module' object is not callable
>>> math.ceil(time.time())
1400657920.0
Then you need to get rid of ["expires"] after it, since it will return a float number not a dictionary.
I don't know why you are using cURL here, with requests your code is a lot simpler:
import time
import math
import urllib
import requests
url = 'http://api.videoplatform.limelight.com/rest/organizations/{}/media'
filename = 'foo/bar/zoo.txt'
params = {}
params['access_key'] = 'dfdfdeef'
params['expires'] = math.ceil(time.time()+3000)
url = '{}?{}'.format(url.format(org_id), urllib.urlquote(params))
payload = {}
payload['title'] = os.path.basename(filename)
payload['description'] = 'description'
file_data = {'media_file': open(filename, 'rb')}
result = requests.post(url, data=payload, files=file_data)
result.raise_for_status() # This will raise an exception if
# there is a problem with the request