I'm trying to make my bot send a message to a discord channel if certain conditions are met, but I can't seem to get the code working. The code checks every 5 seconds if a list contains the string '.12.' and should then forward the message.
import requests
import time
import discord
from discord.ext import commands, tasks
from bs4 import BeautifulSoup
while True:
client = commands.Bot(command_prefix='.')
#client.event
async def on_ready():
print('bot is active')
url = 'website link'
res = requests.get(url)
html = res.text
soup = BeautifulSoup(html, 'html.parser')
html_element = soup.find_all( 'td', { "class" : "eksam-ajad-aeg" } )
ret = []
for t in html_element:
ret.append(t.text)
print(ret)
if '.12.' in ret:
#client.event
async def send():
channel = client.get_channel(758088198852182037)
await channel.send('message')
client.run('token')
time.sleep(5)
Here is a bot script that appears to be working. Without having the url that you're attempting to search, I'm not able to help completely, but give this a try and see if it works for you:
import discord
import requests
from bs4 import BeautifulSoup
import asyncio
client = discord.Client()
#client.event
async def on_ready():
# Create a task and run check_html and feed it a parameter of 5 seconds
client.loop.create_task(check_html(5))
print("Bot is active")
async def check_html(time):
while True:
url = 'url here'
res = requests.get(url)
html = res.text
soup = BeautifulSoup(html, 'html.parser')
html_element = soup.find_all( 'td', { "class" : "eksam-ajad-aeg" } )
ret = []
for t in html_element:
ret.append(t.text)
print(ret)
if '.12.' in ret:
for guild in client.guilds:
for channel in guild.channels:
if channel.id == 758088198852182037:
await channel.send('message')
# Asyncronously sleep for 'time' seconds
await asyncio.sleep(time)
client.run('token')
Related
async def get_html(self, url):
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as resp:
return await resp.text()
async def getrank(self, url):
response = await self.get_html(f'url')
print(f'{response.site} | got site')
soup = BeautifulSoup(response, "html.parser")
rank = soup.find("div", {"id": "productDetails_db_sections"})
test2 = rank.find_all("span")
rank = str(test2[-2].text).replace(",","")
finalRank = int(re.search("\d+", rank).group())
if finalRank < 20000:
print(f'product has low rank, starting new function')
await self.getPriceFinal(url, finalrank)
async def getPriceFinal(self, url, rank):
try:
print(f'Checking for Price....') #THIS PRINTS
s = aiohttp.ClientSession()
response = await s.get(f"{url}", headers = self.headers) #THIS WAITS UNTIL getrank finished
print(response.status)
The main problem I have is that the function getPriceFinal() runs to the print and after that waits for the getrank() function to finish ... however what I would like to do is to start the getPriceFinal() function with the url from getrank() function concurrently .. and ideas on how to solve this issue?
import threading
import asyncio
import discord
import requests
from bs4 import BeautifulSoup
client = discord.Client()
def set_interval(func, sec):
async def func_wrapper():
set_interval(func, 1)
await func()
t = threading.Timer(sec, func_wrapper)
t.start()
return t
async def takip():
url = ""
R = requests.get(url)
Soup = BeautifulSoup(R.text, "html5lib")
Title = Soup.find("h1", {"class": "pr-new-br"}).getText()
List = Soup.find("div", {"class": "pr-bx-nm with-org-prc"})
fiyat = List.find("span", {"class": "prc-dsc"}).getText()
degisenfiyat = float(fiyat.replace(",", ".").replace(" TL", ""))
if (degisenfiyat <= 200):
channel = client.get_channel(973939538357522474)
await channel.send("Fiyat düştü.")
print(Title)
print(fiyat)
print(degisenfiyat)
#client.event
async def on_ready():
print(f'{client.user} has connected to Discord!')
set_interval(takip, 1)
#client.event
async def on_message(message):
if message.author == client.user:
return
if message.content.startswith('$hello'):
await message.channel.send('Hello!')
client.run("")
RuntimeWarning: coroutine 'set_interval.<locals>.func_wrapper' was never awaited
self.function(*self.args, **self.kwargs)
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
This is a function works with discord.py to solve a scraping problem. But when trying to use set_interval function gives this error. How to solve it any idea?
I am trying to scrape the searching result's url by Pyppeteer in my Python program, but it doesn't work...
And here is my code:
import asyncio
from pyppeteer import launch
URL = 'https://hk.appledaily.com/search/apple'
async def main():
browser = await launch()
page = await browser.newPage()
await page.goto(URL)
await page.waitForSelector(".flex-feature")
elements = await page.querySelectorAll('.flex-feature')
for el in elements:
text = await page.evaluate('(el) => el.innerHTML.querySelectorAll("story-card")', el)
print(text)
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
Hope anyone can help! Thanks!
Sorry for stupid question! I have done it just now haha...
import asyncio
from pyppeteer import launch
# https://pypi.org/project/pyppeteer/
URL = 'https://hk.appledaily.com/search/apple'
async def main():
browser = await launch()
page = await browser.newPage()
await page.goto(URL)
await page.waitForSelector(".flex-feature")
elements = await page.querySelectorAll('.flex-feature')
elements = await page.querySelectorAll('.story-card')
for el in elements:
text = await page.evaluate('(el) => el.textContent', el)
text2 = await page.evaluate('(el) => el.href', el)
print(text2)
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
And the result will be:
https://hk.appledaily.com/entertainment/20201227/LJL5DQ64QZHLTHI7LFKHVXB7JM/
https://hk.appledaily.com/sports/20201227/7MQKJNXPQNA6HDXTFUCMWNGUAU/
https://hk.appledaily.com/local/20201227/SWIBOUDSLZB5JBULTIT4DPSEIQ/
https://hk.appledaily.com/entertainment/20201227/TA457F5YYRGQZCNDIR5OFJDLPU/
https://hk.appledaily.com/china/20201227/DY2RQZJVSZHJBDV6XDYBH5G73I/
https://hk.appledaily.com/sports/20201227/4FLJFIHZOFF3JMWPOOSTO5QLCQ/
https://hk.appledaily.com/local/20201227/NIWG4U4LBFGPHLA73RTWHEQCY4/
https://hk.appledaily.com/china/20201227/SUR6Q4UEIVE5HD7OLSCAYIVUUY/
https://hk.appledaily.com/international/20201227/N2P5IPMBKBEGRALQWMDFXJCVGY/
https://hk.appledaily.com/entertainment/20201227/MGG6H2JIJVGODEV3EE7OI6HEGI/
https://hk.appledaily.com/local/20201227/N3TQO3VOBRC3NKT2ILES76CSKY/
https://hk.appledaily.com/international/20201227/GJXFM53DAFAUVOFFZIRKBH3X24/
https://hk.appledaily.com/sports/20201227/2UQC7A4HCBFD5IF7IGJWVK3AOA/
https://hk.appledaily.com/entertainment/20201226/AI7CAJD6O5D5XP7UMZCWSQ5VU4/
https://hk.appledaily.com/entertainment/20201227/3BIOQMUCQVGHXKNP3A4KF7VC6A/
https://hk.appledaily.com/local/20201227/OOYOPLI5WFGJZGAFKGLHSVINPM/
https://hk.appledaily.com/local/20201227/6FXZ5FKNMVHS5JTTO6YWO55JZY/
https://hk.appledaily.com/local/20201227/VQTZMOKCUZGMFL4PYBZ5YZYOSQ/
https://hk.appledaily.com/international/20201227/4VPFDXJFKZH5ZFRXSKZW3OASAA/
https://hk.appledaily.com/entertainment/20201227/TCVCDXKK4JHE7HHEJ7U6MFSS5U/
https://hk.appledaily.com/local/20201227/NIWG4U4LBFGPHLA73RTWHEQCY4/
https://hk.appledaily.com/entertainment/20201227/GY4WJIFLPREKJHGJ2VQO7LDZAU/
https://hk.appledaily.com/entertainment/20201227/3BIOQMUCQVGHXKNP3A4KF7VC6A/
https://hk.appledaily.com/local/20201227/OOYOPLI5WFGJZGAFKGLHSVINPM/
https://hk.appledaily.com/local/20201227/N3TQO3VOBRC3NKT2ILES76CSKY/
https://hk.appledaily.com/local/20201227/Z4CRG7TLUJFMLO3JIY2KWBTL5A/
https://hk.appledaily.com/local/20201227/353WEBFTBZFHBCP2O4IXIARBEM/
Process finished with exit code 0
I am looking for guidance around best practices with asyncio and aiohttp in Python 3. I have a basic scraper but I am not sure how to:
Properly implement error handling. More specific around my fetch function.
Do I really need the last main function to wrap my async crawler around?
Here is my code so far, it is working but I would like feedback on the two item above.
urls = []
async def fetch(url, payload={}):
async with ClientSession() as s:
async with s.get(url, params=payload) as resp:
content = await resp.read()
return content
async def get_profile_urls(url, payload):
content = await fetch(url, payload)
soup = BeautifulSoup(content, 'html.parser')
soup = soup.find_all(attrs={'class': 'classname'})
if soup:
urls.extend([s.find('a')['href'] for s in soup])
async def main():
tasks = []
payload = {
'page': 0,
'filter': 88}
for i in range(max_page + 1):
payload['page'] += 1
tasks.append(get_profile_urls(search_ulr, payload))
await asyncio.wait(tasks)
asyncio.run(main())
I try to use aiohttp and asyncio to do the request.But I got the error
' An asyncio.Future, a coroutine or an awaitable is required'
here's my code.How can I fix it.
import requests
from bs4 import BeautifulSoup
import asyncio
import aiohttp
res = requests.get('https://www.rottentomatoes.com/top/')
soup = BeautifulSoup(res.text,'lxml')
movie_list=[]
for link in soup.select('section li a[href]'):
movie_list.append('https://www.rottentomatoes.com'+link.get('href'))
async def request(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
body = await resp.text(encoding='utf-8')
soup =BeautifulSoup(body,'lxml')
movie = []
async for link in soup.select('tbody tr td a '):
await movie.append(link.get('href'))
return movie
async def main():
results = await asyncio.gather(*[request(url) for url in movie_list])
print(results)
return results
print(movie_list)
loop = asyncio.get_event_loop()
results = loop.run_until_complete(main)
You need to call loop.run_until_complete(main()), not just a function main (without parenthesis). The next thing is you don't need async keyword in soup.select(). I also changed a select string, to parse something:
import requests
from bs4 import BeautifulSoup
import asyncio
import aiohttp
res = requests.get('https://www.rottentomatoes.com/top/')
soup = BeautifulSoup(res.text,'lxml')
movie_list=[]
for link in soup.select('section li a[href]'):
movie_list.append('https://www.rottentomatoes.com'+link.get('href'))
async def request(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
body = await resp.text(encoding='utf-8')
soup = BeautifulSoup(body,'lxml')
movie = []
# no need to call async for here!
for link in soup.select('section#top_movies_main table a'):
movie.append(link['href'])
return movie
async def main():
results = await asyncio.gather(*[request(url) for url in movie_list])
print(results)
return results
print(movie_list)
loop = asyncio.get_event_loop()
results = loop.run_until_complete(main()) # you need to create coroutine
Prints:
['https://www.rottentomatoes.com/top/bestofrt/top_100_action__adventure_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_animation_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_art_house__international_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_classics_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_comedy_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_documentary_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_drama_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_horror_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_kids__family_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_musical__performing_arts_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_mystery__suspense_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_romance_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_science_fiction__fantasy_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_special_interest_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_sports__fitness_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_television_movies/', 'https://www.rottentomatoes.com/top/bestofrt/top_100_western_movies/']
[['/m/mad_max_fury_road', '/m/1013775-metropolis', '/m/wonder_woman_2017', '/m/logan_2017', '/m/1011615-king_kong', '/m/zootopia', '/m/1000355-adventures_of_robin_hood', '/m/star_wars_episode_vii_the_force_awakens',
... and so on