Results not showing after search term entered from keyboard - python

I am learning to develop an addon for Kodi and need to implement a search functionality. I found some resources online to get user input from keyboard and then calling an API with the search term to fetch results. The API is being requested fine but the results are not being shown through ListItems. Below is my code
_url = sys.argv[0]
_handle = int(sys.argv[1])
def get_url(**kwargs):
return '{0}?{1}'.format(_url, urlencode(kwargs))
def display_main_menu():
list_item = xbmcgui.ListItem(label="Search")
url = get_url(action='search')
xbmcplugin.addDirectoryItem(_handle, url, list_item)
def perform_search(search_term):
link = "api_url_here" + search_term
r = requests.get(link)
resp = json.loads(r.text)
for result in resp:
list_item = xbmcgui.ListItem(label=result["name"])
list_item.setArt({'thumb': result["img"], 'icon' : result["img"], 'fanart' : result["img"]})
url = '' #blank url for testing
is_folder = True
xbmcplugin.addDirectoryItem(_handle, url, list_item, is_folder)
xbmcplugin.endOfDirectory(_handle)
def search():
keyb = xbmc.Keyboard('',"Search for Videos", False)
keyb.setDefault('')
keyb.doModal()
if (keyb.isConfirmed() and len(keyb.getText()) > 0):
perform_search(keyb.getText())
def router(paramstring):
params = dict(parse_qsl(paramstring))
if params:
if params['action'] == 'search':
search()
else:
raise ValueError('Invalid paramstring: {0}!'.format(paramstring))
else:
display_main_menu()
if __name__ == '__main__':
router(sys.argv[2][1:])
When I select Search and then type my word to search the keyboard is dismissed but nothing happens. The same menu is being displayed and new ListItems from the perform_search function are not being displayed. Also, there is no error. Please help.

Add some logging, the interesting part will be if you actually hit the for. So add some xbmc.log('hit') or even log your values.
If you want a full blown logging check this example https://github.com/xbmc/generator-kodi-addon/blob/master/generators/app/templates/resources/lib/kodilogging.py

Related

How to compare variables if not http 200 status

I have currently written a webscraping where I compare two values to see if there has been any increased value from previous request compare to new request.
import json
import re
import time
from dataclasses import dataclass
from typing import Optional, List
import requests
from bs4 import BeautifulSoup
#dataclass
class Product:
name: Optional[str]
price: Optional[str]
image: Optional[str]
sizes: List[str]
#staticmethod
def get_sizes(doc: BeautifulSoup) -> List[str]:
pat = re.compile(
r'^<script>var JetshopData='
r'(\{.*\})'
r';</script>$',
)
for script in doc.find_all('script'):
match = pat.match(str(script))
if match is not None:
break
else:
return []
data = json.loads(match[1])
return [
variation
for get_value in data['ProductInfo']['Attributes']['Variations']
if get_value.get('IsBuyable')
for variation in get_value['Variation']
]
#classmethod
def from_page(cls, url: str) -> Optional['Product']:
with requests.get(url) as response:
response.raise_for_status()
doc = BeautifulSoup(response.text, 'html.parser')
name = doc.select_one('h1.product-page-header')
price = doc.select_one('span.price')
image = doc.select_one('meta[property="og:image"]')
return cls(
name=name and name.text.strip(),
price=price and price.text.strip(),
image=image and image['content'],
sizes=cls.get_sizes(doc),
)
def main():
product = Product.from_page("https://shelta.se/sneakers/nike-air-zoom-type-whiteblack-cj2033-103")
previous_request = product.sizes
while True:
product = Product.from_page("https://shelta.se/sneakers/nike-air-zoom-type-whiteblack-cj2033-103")
if set(product.sizes) - set(previous_request):
print("new changes on the webpage")
previous_request = product.sizes
else:
print("No changes made")
time.sleep(500)
if __name__ == '__main__':
main()
The problem I am facing is that there is a scenario where the product can be taken down. For example if I now have found sizes['US 9,5/EUR 43', 'US 10,5/EUR 44,5'] and the webpage gets taken down by the admin where it returns 404. After few hours they re-add back the webpage and add again the values ['US 9,5/EUR 43', 'US 10,5/EUR 44,5']- That would not print the value we already had it before on our previous valid request.
I wonder what would be the best way to print out the values if a webpage returns from 404 back to 200 (even if they add the same value?)
The use of response.raise_for_status() is incorrect in this case. That will simply raise an exception if the website returns a 404, 500 or similar, exiting your program. change out response.raise_for_status() with:
if response.status_code is not 200:
return cls(None,None,None,None)
EDIT as i misinterpreted the question:
An empty product will now be returned if an error occurred. The only check required now is if the sizes has changed.
def main():
url = "https://shelta.se/sneakers/nike-air-zoom-type-whiteblack-cj2033-103"
previous_product = Product.from_page(url)
while True:
product = Product.from_page(url)
if not product.sizes == previous_product.sizes:
print("new changes on the webpage")
else:
print("No changes made")
previous_product = product
time.sleep(500)
previous_product has been moved outside. In this exact case, it does not matter, but it improves readability.
The use of set(...) - set(...) has been removed as it does not catch when something has been removed from the website, only when something is added. If something is first removed and then re-added, it would be have been caught by your program either.

Handling final page in Python paginated API request

I'm requesting Microsoft's Graph API, where I'm using the following function to request multiple pages. I'm trying to request all pages, merge the json files and finally write them to a pandas dataframe.
v = "v1.0"
r = "/users?$filter=userType eq 'Member'&$select=displayName,givenName,jobTitle,mail,department&$top=200"
def query(v, r):
all_records = []
url = uri.format(v=v, r=r)
while True:
if not url:
break
result = requests.get(url, headers=headers)
if result.status_code == 200:
json_data = json.loads(result.text)
all_records = all_records + json_data["value"]
url = json_data["#odata.nextLink"]
return all_records
The while-loop goes through all the pages, but when I run the function I'm getting a error:
KeyError: '#odata.nextLink'
I assume this is because the loop reaches the final page, and thus the '#odata.nextLink' cannot be found. But how can I handle this?
You are doing
url = json_data["#odata.nextLink"]
which suggest json_data is dict, so you should be able to use .get method which returns default value when key not found (None by default), please try doing following and write if it does work as excepted:
url = json_data.get("#odata.nextLink")
if url is None:
print("nextLink not found")
else:
print("nextLink found")

data scraping on discord using python

I'm currently trying to learn web scraping and decided to scrape some discord data. Code follows:
import requests
import json
def retrieve_messages(channelid):
num=0
headers = {
'authorization': 'here we enter the authorization code'
}
r = requests.get(
f'https://discord.com/api/v9/channels/{channelid}/messages?limit=100',headers=headers
)
jsonn = json.loads(r.text)
for value in jsonn:
print(value['content'], '\n')
num=num+1
print('number of messages we collected is',num)
retrieve_messages('server id goes here')
The problem: when I tried changing the limit here messages?limit=100 apparently it only accepts numbers between 0 and 100, meaning that the maximum number of messages I can get is 100. I tried changing this number to 900, for example, to scrape more messages. But then I get the error TypeError: string indices must be integers.
Any ideas on how I could get, possibly, all the messages in a channel?
Thank you very much for reading!
APIs that return a bunch of records are almost always limited to some number of items.
Otherwise, if a large quantity of items is requested, the API may fail due to being out of memory.
For that purpose, most APIs implement pagination using limit, before and after parameters where:
limit: tells you how many messages to fetch
before: get messages before this message ID
after: get messages after this message ID
Discord API is no exception as the documentation tells us.
Here's how you do it:
First, you will need to query the data multiple times.
For that, you can use a while loop.
Make sure to add an if the condition that will prevent the loop from running indefinitely - I added a check whether there are any messages left.
while True:
# ... requests code
jsonn = json.loads(r.text)
if len(jsonn) == 0:
break
for value in jsonn:
print(value['content'], '\n')
num=num+1
Define a variable that has the last message that you fetched and save the last message id that you already printed
def retrieve_messages(channelid):
last_message_id = None
while True:
# ...
for value in jsonn:
print(value['content'], '\n')
last_message_id = value['id']
num=num+1
Now on the first run the last_message_id is None, and on subsequent requests it has the last message you printed.
Use that to build your query
while True:
query_parameters = f'limit={limit}'
if last_message_id is not None:
query_parameters += f'&before={last_message_id}'
r = requests.get(
f'https://discord.com/api/v9/channels/{channelid}/messages?{query_parameters}',headers=headers
)
# ...
Note: discord servers give you the latest message first, so you have to use the before parameter
Here's a fully working example of your code
import requests
import json
def retrieve_messages(channelid):
num = 0
limit = 10
headers = {
'authorization': 'auth header here'
}
last_message_id = None
while True:
query_parameters = f'limit={limit}'
if last_message_id is not None:
query_parameters += f'&before={last_message_id}'
r = requests.get(
f'https://discord.com/api/v9/channels/{channelid}/messages?{query_parameters}',headers=headers
)
jsonn = json.loads(r.text)
if len(jsonn) == 0:
break
for value in jsonn:
print(value['content'], '\n')
last_message_id = value['id']
num=num+1
print('number of messages we collected is',num)
retrieve_messages('server id here')
To answer this question, we must look at the discord API. Googling "discord api get messages" gets us the developer reference for the discord API. The particular endpoint you are using is documented here:
https://discord.com/developers/docs/resources/channel#get-channel-messages
The limit is documented here, along with the around, before, and after parameters. Using one of these parameters (most likely after) we can paginate the results.
In pseudocode, it would look something like this:
offset = 0
limit = 100
all_messages=[]
while True:
r = requests.get(
f'https://discord.com/api/v9/channels/{channelid}/messages?limit={limit}&after={offset}',headers=headers
)
all_messages.append(extract messages from response)
if (number of responses < limit):
break # We have reached the end of all the messages, exit the loop
else:
offset += limit
By the way, you will probably want to print(r.text) right after the response comes in so you can see what the response looks like. It will save a lot of confusion.
Here is my solution. Feedback is welcome as I'm newish to Python. Kindly provide me w/ credit/good-luck if using this. Thank you =)
import requests
CHANNELID = 'REPLACE_ME'
HEADERS = {'authorization': 'REPLACE_ME'}
LIMIT=100
all_messages = []
r = requests.get(f'https://discord.com/api/v9/channels/{CHANNELID}/messages?limit={LIMIT}',headers=HEADERS)
all_messages.extend(r.json())
print(f'len(r.json()) is {len(r.json())}','\n')
while len(r.json()) == LIMIT:
last_message_id = r.json()[-1].get('id')
r = requests.get(f'https://discord.com/api/v9/channels/{CHANNELID}/messages?limit={LIMIT}&before={last_message_id}',headers=HEADERS)
all_messages.extend(r.json())
print(f'len(r.json()) is {len(r.json())} and last_message_id is {last_message_id} and len(all_messages) is {len(all_messages)}')

how to make text clickable in python

How to make text clickable ?
class ComplainceServer():
def __init__(self, jira_server, username, password, encoding='utf-8'):
if jira_server is None:
error('No server provided.')
#print(jira_server)
self.jira_server = jira_server
self.username = username
self.password = password
self.encoding = encoding
def checkComplaince(self, appid, toAddress):
query = "/rest/api/2/search?jql=issuetype = \"Application Security\" AND \"Prod Due Date\" < now()
request = self._createRequest()
response = request.get(query, contentType='application/json')
# Parse result
if response.status == 200 and action == "warn":
data = Json.loads(response.response)
print "#### Issues found"
issues = {}
msg = "WARNING: The below tickets are non-complaint in fortify, please fix them or raise exception.\n"
issue1 = data['issues'][0]['key']
for item in data['issues']:
issue = item['key']
issues[issue] = item['fields']['summary']
print u"* {0} - {1}".format(self._link(issue), item['fields']['summary'])
print "\n"
data = u" {0} - {1}".format(self._link(issue), item['fields']['summary'])
msg += '\n'+ data
SOCKET_TIMEOUT = 30000 # 30s
email = SimpleEmail()
email.setHostName('smtp.com')
email.setSmtpPort(25)
email.setSocketConnectionTimeout(SOCKET_TIMEOUT);
email.setSocketTimeout(SOCKET_TIMEOUT);
email.setFrom('R#group.com')
for toAddress in toAddress.split(','):
email.addTo(toAddress)
email.setSubject('complaince report')
email.addHeader('X-Priority', '1')
email.setMsg(str(msg))
email.send()
def _createRequest(self):
return HttpRequest(self.jira_server, self.username, self.password)
def _link(self, issue):
return '[{0}]({1}/browse/{0})'.format(issue, self.jira_server['url'])
This is the calling function. APPid and toAddress will be passed in from different UI.
from Complaince import ComplainceServer
jira = ComplainceServer(jiraServer, username, password)
issues = jira.checkComplaince(appid, toAddress)
I want issueid to be an embedded link.
currently the email sends as below:
MT-4353(https://check.com/login/browse/MT-4353) - Site Sc: DM isg_cq5
but i want [MT-4353] as hyperlink to the URL https://check.com/login/browse/MT-4353
Firstly, you need to encode your email as html. I'm not familiar with the library you are using so I cannot give an example of this.
I have replaced a snippet of your code with html syntax just to illustrate the point that you are meant to use html syntax to have clickable links in an email.
msg = "<p>WARNING: The below tickets are non-compliant in fortify, please fix them or raise exception.</p>"
issue1 = data['issues'][0]['key']
for item in data['issues']:
issue = item['key']
issues[issue] = item['fields']['summary']
data = u"<a href='{0}'>{1}</a>".format(self._link(issue), item['fields']['summary'])
msg += '<br />'+ data
In future, please ask your questions carefully as your title does not question does not indicate what you are actually meaning. You also have spelling mistakes: Compliant
Oh, I missed the point of self._link(issue) not returning the correct link. It returns MT-4353(https://check.com/login/browse/MT-4353) so you are going to need to extract the link part between the brackets. I suggest a regular expression.

Web Crawler not working with Python

I'm having issues with a simple web crawler, when I run the following script, it is not iterating through the sites and it does not give me any results.
This is what I get:
1 Visiting: https://www.mongodb.com/
Word never found
Process finished with exit code 0
Any tips as why this is not working correctly? I'm using the following example (http://www.netinstructions.com/how-to-make-a-web-crawler-in-under-50-lines-of-python-code/)
Here is the code:
from html.parser import HTMLParser
from urllib.request import urlopen
from urllib import parse
class LinkParser(HTMLParser):
# This is a function that HTMLParser normally has
# but we are adding some functionality to it
def handle_starttag(self, tag, attrs):
""" We are looking for the begining of a link.
Links normally look
like """
if tag == 'a':
for (key,value) in attrs:
if key == 'href':
# We are grabbing the new URL. We are also adding the
# base URL to it. For example:
# www.netinstructions.com is the base and
# somepage.html is the new URL (a relative URL)
#
# We combine a relative URL with the base URL to create
# an absolute URL like:
# www.netinstructions.com/somepage.html
newUrl = parse.urljoin(self.baseUrl, value)
# And add it to our colection of links:
self.links = self.links + [newUrl]
def getLinks(self, url):
self.links = []
# Remember the base URL which will be important when creating
# absolute URLs
self.baseUrl = url
# Use the urlopen function from the standard Python 3 library
response = urlopen(url)
# Make sure that we are looking at HTML and not other things that
# are floating around on the internet (such as
# JavaScript files, CSS, or .PDFs for example)
if response.getheader('Content-Type') == 'text/html':
htmlBytes = response.read()
# Note that feed() handles Strings well, but not bytes
# (A change from Python 2.x to Python 3.x)
htmlString = htmlBytes.decode("utf-8")
self.feed(htmlString)
return htmlString, self.links
else:
return "", []
# And finally here is our spider. It takes in an URL, a word to find,
# and the number of pages to search through before giving up
def spider(url, word, maxPages):
pagesToVisit = [url]
numberVisited = 0
foundWord = False
# The main loop. Create a LinkParser and get all the links on the page.
# Also search the page for the word or string
# In our getLinks function we return the web page
# (this is useful for searching for the word)
# and we return a set of links from that web page
# (this is useful for where to go next)
while numberVisited < maxPages and pagesToVisit != [] and not foundWord:
numberVisited = numberVisited +1
# Start from the beginning of our collection of pages to visit:
url = pagesToVisit[0]
pagesToVisit = pagesToVisit[1:]
try:
print(numberVisited, "Visiting:", url)
parser = LinkParser()
data, links = parser.getLinks(url)
if data.find(word)>-1:
foundWord = True
# Add the pages that we visited to the end of our collection
# of pages to visit:
pagesToVisit = pagesToVisit + links
print(" **Success!**")
except:
print(" **Failed!**")
if foundWord:
print("The word", word, "was found at", url)
else:
print("Word never found")
if __name__ == "__main__":
spider("https://www.mongodb.com/", "MongoDB" ,400)
First, edit the content-type checker line to:
if response.getheader('Content-Type') == 'text/html; charset=utf-8':
as suggested by #glibdud.
If you would like your program to check all links until maxPages is reached or pagesTovisit = [], simply remove the and condition for found word on the line:
while numberVisited < maxPages and pagesToVisit != [] and not foundWord:
to:
while numberVisited < maxPages and pagesToVisit != []:

Categories

Resources