I'm trying to build a scraper for my smart mirror. I have built a python script to get weather, pollution level & news and something I don't get is why I am getting an error in this block of function when scraping the data instead of the other functions like the weather & pollution level. This is the code I am getting errors for:
def news(self):
reddit1 = "https://www.reddit.com/r/LifeProTips/hot.json"
reddit2 = "https://reddit.com/r/technology/hot.json"
reddit3 = "https://reddit.com/r/Futurology/hot.json"
reddit4 = "https://reddit.com/r/Showerthoughts/hot.json"
lpt = requests.get(reddit1)
tech = requests.get(reddit2)
futu = requests.get(reddit3)
show = requests.get(reddit4)
lptd = json.loads(lpt.text)
techd = json.loads(tech.text)
futud = json.loads(futu.text)
showd = json.loads(show.text)
lpttitle = lptd['data']['children'][0]['data']['title']
print(lpttitle)
lptcontent = lptd['data']['children'][0]['data']['selftext']
print(lptcontent)
techttitle = techd['data']['children'][0]['data']['title']
print(techtitle)
techcontent = techd['data']['children'][0]['data']['selftext']
print(techcontent)
fututitle = futud['data']['children'][2]['data']['title']
print(fututitle)
futucontent = futud['data']['children'][0]['data']['selftext']
print(futucontent)
showtitle = showd['data']['children'][0]['data']['title']
print(showdata)
showcontent = showd['data']['children'][0]['data']['selftext']
print(showcontent)
lptdump = requests.put('firebaseurl',json={"Title": lpttitle, "Content": lptcontent})
techdump = requests.put('firebaseurl',json={"Title": techtitle, "Content": techcontent})
futudump = requests.put('firebaseurl',json={"Title": fututitle, "Content": futucontent})
showdump = requests.put('firebaseurl',json={"Title": showtitle, "Content": showcontent})
but for a function like this:
def pollution(self):
url = "api"
url2 = "api"
pm10 = requests.get(url)
pm25 = requests.get(url2)
pm10leveldata = json.loads(pm10.text)
pm25leveldata = json.loads(pm25.text)
pm10level = pm10leveldata[0]['data']
pm25level = pm25leveldata[0]['data']
print pm10level
print pm25level
# pm10 = round(pm10level)
# pm25 = round(pm25level)
pdump = requests.put('firebaseurl',json={"Pm10": pm10level, "Pm25": pm25level})
No errors. Why am I getting a key error at that one but not in a function like this
Print out lptd before you try to use it:
{'error': 429, 'message': 'Too Many Requests'}
You have angered reddit. This could happen on any request, so it could appear as though the error is jumping around.
As an aside, you can get the json from requests directly:
lptd = requests.get(reddit1).json()
Related
I was wondering if i could get some input from some season python exports, i have a couple questions
I am extracting data from an api request and calculating the total vulnerabilities,
what is the best way i can return this data so that i can call it in another function
what is the way i can add up all the vulnerabilities (right now its just adding it per 500 at a time, id like to do the sum of every vulnerability
def _request():
third_party_patching_filer = {
"asset": "asset.agentKey IS NOT NULL",
"vulnerability" : "vulnerability.categories NOT IN ['microsoft patch']"}
headers = _headers()
print(headers)
url1 = f"https://us.api.insight.rapid7.com/vm/v4/integration/assets"
resp = requests.post(url=url1, headers=headers, json=third_party_patching_filer, verify=False).json()
jsonData = resp
#print(jsonData)
has_next_cursor = False
nextKey = ""
if "cursor" in jsonData["metadata"]:
has_next_cursor = True
nextKey = jsonData["metadata"]["cursor"]
while has_next_cursor:
url2 = f"https://us.api.insight.rapid7.com/vm/v4/integration/assets?&size=500&cursor={nextKey}"
resp2 = requests.post(url=url2, headers=headers, json=third_party_patching_filer, verify=False).json()
cursor = resp2["metadata"]
print(cursor)
if "cursor" in cursor:
nextKey = cursor["cursor"]
print(f"next key {nextKey}")
#print(desktop_support)
for data in resp2["data"]:
for tags in data['tags']:
total_critical_vul_osswin = []
total_severe_vul_osswin = []
total_modoer_vuln_osswin = []
if tags["name"] == 'OSSWIN':
print("OSSWIN")
critical_vuln_osswin = data['critical_vulnerabilities']
severe_vuln_osswin = data['severe_vulnerabilities']
modoer_vuln_osswin = data['moderate_vulnerabilities']
total_critical_vul_osswin.append(critical_vuln_osswin)
total_severe_vul_osswin.append(severe_vuln_osswin)
total_modoer_vuln_osswin.append(modoer_vuln_osswin)
print(sum(total_critical_vul_osswin))
print(sum(total_severe_vul_osswin))
print(sum(total_modoer_vuln_osswin))
if tags["name"] == 'DESKTOP_SUPPORT':
print("Desktop")
total_critical_vul_desktop = []
total_severe_vul_desktop = []
total_modorate_vuln_desktop = []
critical_vuln_desktop = data['critical_vulnerabilities']
severe_vuln_desktop = data['severe_vulnerabilities']
moderate_vuln_desktop = data['moderate_vulnerabilities']
total_critical_vul_desktop.append(critical_vuln_desktop)
total_severe_vul_desktop.append(severe_vuln_desktop)
total_modorate_vuln_desktop.append(moderate_vuln_desktop)
print(sum(total_critical_vul_desktop))
print(sum(total_severe_vul_desktop))
print(sum(total_modorate_vuln_desktop))
else:
pass
else:
has_next_cursor = False
If you have a lot of parameters to pass, consider using a dict to combine them. Then you can just return the dict and pass it along to the next function that needs that data. Another approach would be to create a class and either access the variables directly or have helper functions that do so. The latter is a cleaner solution vs a dict, since with a dict you have to quote every variable name, and with a class you can easily add additional functionally beyond just being a container for a bunch of instance variables.
If you want the total across all the data, you should put these initializations:
total_critical_vul_osswin = []
total_severe_vul_osswin = []
total_modoer_vuln_osswin = []
before the while has_next_cursor loop (and similarly for the desktop totals). The way your code is currently, they are initialized each cursor (ie, each 500 samples based on the URL).
I am making telegram bot using python 3 on RPI and for HTTP requesting I used requests library
I wrote the code that should answer &start command:
import requests as rq
updateURL="https://api.telegram.org/bot925438333:AAGEr3pf3c4Fz91sL79mwJ6aGYm-Y6BM7_4/getUpdates"
while True:
r=rq.post(url = updateURL)
data = r.json()
messageArray = data['result']
lastMsgID=len(messageArray)-1
lastMsgData = messageArray[lastMsgID]
lastMsgSenderID = lastMsgData['message']['from']['id']
lastMsgUsername = lastMsgData['message']['from']['username']
lastMsgText = lastMsgData["message"]["text"]
lastMsgChatType = lastMsgData['message']['chat']['type']
if lastMsgChatType == "group":
lastMsgGroupID = lastMsgData['message']['chat']['id']
if lastMsgText == "&start":
if lastMsgChatType == "private":
URL="https://api.telegram.org/bot925438333:AAGEr3pf3c4Fz91sL79mwJ6aGYm-Y6BM7_4/sendMessage"
chatText="Witamy w KozelBot"
chatID=lastMsgSenderID
Params={"chat_id":chatID,"text":chatText}
rs = rq.get(url = URL, params = Params)
if lastMsgChatType == "group":
URL="https://api.telegram.org/bot925438333:AAGEr3pf3c4Fz91sL79mwJ6aGYm-Y6BM7_4/sendMessage"
chatText="Witamy w KozelBot"
chatID=lastMsgGroupID
Params={"chat_id":chatID,"text":chatText}
rs = rq.get(url = URL, params = Params)
but the code outputs an error:
File "/home/pi/telegramResponse.py", line 16, in
lastMsgText = lastMsgData["message"]["text"]
KeyError: 'text'
I don't know how to solve this problem because this fragment is working fine in my other scripts!
Please help!
The reason was simple!
Last message that program finds doesn't contain any text because it was new user notification. KeyError just occured because last message doesn't have ['text'] parameter.
I am teaching myself how to use python and django to access the google places api to make nearby searches for different types of gyms.
I was only taught how to use python and django with databases you build locally.
I wrote out a full Get request for they four different searches I am doing. I looked up examples but none seem to work for me.
allgyms = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
all_text = allgyms.text
alljson = json.loads(all_text)
healthclubs = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&keyword=healthclub&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
health_text = healthclubs.text
healthjson = json.loads(health_text)
crossfit = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&keyword=crossfit&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
cross_text = crossfit.text
crossjson = json.loads(cross_text)
I really would like to be pointed in the right direction on how to have the api key referenced only one time while changing the keywords.
Try this for better readability and better reusability
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'
LOCATION = '38.9208,-77.036'
RADIUS = '2500'
TYPE = 'gym'
API_KEY = 'AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg'
KEYWORDS = ''
allgyms = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&key='+API_KEY) all_text = allgyms.text
alljson = json.loads(all_text)
KEYWORDS = 'healthclub'
healthclubs = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&keyword='+KEYWORDS+'&key='+API_KEY)
health_text = healthclubs.text
healthjson = json.loads(health_text)
KEYWORDS = 'crossfit'
crossfit = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&keyword='+KEYWORDS+'&key='+API_KEY)
cross_text = crossfit.text
crossjson = json.loads(cross_text)
as V-R suggested in a comment you can go further and define function which makes things more reusable allowing you to use the that function in other places of your application
Function implementation
def makeRequest(location, radius, type, keywords):
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'
API_KEY = 'AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg'
result = requests.get(BASE_URL+'location='+location+'&radius='+radius+'&type='+type+'&keyword='+keywords+'&key='+API_KEY)
jsonResult = json.loads(result)
return jsonResult
Function invocation
json = makeRequest('38.9208,-77.036', '2500', 'gym', '')
Let me know if there is an issue
I am having issues with my below API request to Flickr. My function takes as input a list of 10 photo ids. However when I print the data from my function I am only getting information based on 1 photo ID. Looking at my below function any ideas on what may be causing the contents of only 1 photo ID to print? Any help would be great.
for item in get_flickr_data(word)["photos"]["photo"]:
photo_ids =item["id"].encode('utf-8')
lst_photo_ids.append(photo_ids)
print lst_photo_ids
lst_photo_ids = ['34117701526', '33347528313', '34158745075', '33315997274', '33315996984', '34028007021', '33315995844', '33347512113', '33315784134', '34024299271']
def get_photo_data(lst_photo_ids):
baseurl = "https://api.flickr.com/services/rest/"
params_d = {}
params_d["method"] = "flickr.photos.getInfo"
params_d["format"] = "json"
params_d["photo_id"] = photo_ids
params_d["api_key"] = FLICKR_KEY
unique_identifier = params_unique_combination(baseurl,params_d)
if unique_identifier in CACHE_DICTION:
flickr_data_diction = CACHE_DICTION[unique_identifier]
else:
resp = requests.get(baseurl,params_d)
json_result_text = resp.text[14:-1]
flickr_data_diction = json.loads(json_result_text)
CACHE_DICTION[unique_identifier] = flickr_data_diction
fileref = open(CACHE_FNAME,"w")
fileref.write(json.dumps(CACHE_DICTION))
fileref.close()
return flickr_data_diction
print get_photo_data(photo_ids)
Here is a peace of request:
<ContinuousMove xmlns="http://www.onvif.org/ver20/ptz/wsdl">
<ProfileToken>0</ProfileToken>
<Velocity>
<PanTilt x="-0.5" y="0" xmlns="http://www.onvif.org/ver10/schema"/>
</Velocity>
</ContinuousMove>
This request is called like self._client.service.ContinuousMove(0, params), where params is like params = {'PanTilt': [{'x': -0.5}, {'y': 0.5}]}. The main problem is that i cant figure out how to add attributes, not values to the request. On objects it throws error that type is not found, on tuples generates this xml:
<ns0:ContinuousMove>
<ns0:ProfileToken>0</ns0:ProfileToken>
<ns0:Velocity>
<ns1:PanTilt>x</ns1:PanTilt>
<ns1:PanTilt>0.5</ns1:PanTilt>
<ns1:PanTilt>y</ns1:PanTilt>
<ns1:PanTilt>0</ns1:PanTilt>
</ns0:Velocity>
</ns0:ContinuousMove>
You can use suds.client.Client.factory.create to create parameter.
Here is a example I ever wrote:
query_param = client.factory.create("QueryCriteriaBaseOfMyTaskCriteria")
pagination_model = client.factory.create("PaginationModel")
pagination_model.PageSize = 5000
query_param.PagingInfo = pagination_model
task_criteria = client.factory.create("MyTaskCriteria")
task_criteria.LoginId = user
task_criteria.OriginatorLoginId = client.factory.create("ArrayOfInt")
task_criteria.OriginatorLoginId.int.append(int(APPLICANT))
task_criteria.ProcInstId = client.factory.create("ArrayOfInt")
if instId is not None:
task_criteria.ProcInstId.int.append(instId)
task_criteria.ProcessCode = client.factory.create("ArrayOfString")
task_criteria.ProcessCode.string.append(PROCESSCODE)
task_criteria.Folio = ""
task_criteria.TaskStartDate = client.factory.create("DatePeriodModel")
task_criteria.ProcessStartDate = client.factory.create("DatePeriodModel")
query_param.QueryCriteria = task_criteria
ret = client.service.GetTaskList(query_param,apiKey)