from urllib.request import urlopen
from bs4 import BeautifulSoup
import random
google = "https://www.flickr.com/search/?text=Nike"
page = urlopen(google).read()
soup = BeautifulSoup(page, "html.parser")
img_tags = soup.find_all('img')
Now I'm stuck.
I'm trying to download all images on the link, can someone help me?
Related
this is the code I used to get the data from a website with all the wordle possible words, im trying to put them in a list so I can create a wordle clone but I get a weird output when I do this. please help
import requests
from bs4 import BeautifulSoup
url = "https://raw.githubusercontent.com/tabatkins/wordle-list/main/words"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
word_list = list(soup)
It do not need BeautifulSoup, simply split the text of the response:
import requests
url = "https://raw.githubusercontent.com/tabatkins/wordle-list/main/words"
requests.get(url).text.split()
Or if you like to do it wit BeautifulSoup anyway:
import requests
from bs4 import BeautifulSoup
url = "https://raw.githubusercontent.com/tabatkins/wordle-list/main/words"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
soup.text.split()
Output:
['women',
'nikau',
'swack',
'feens',
'fyles',
'poled',
'clags',
'starn',...]
Hey I can't seem to scrape the images from this website
https://www.nike.com/gb/w/new-mens-shoes-3n82yznik1zy7ok
I am using the following code
product.find('img', {'class': 'css-1fxh5tw product-card__hero-image'})['src']]
It returns this
data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7
Your code was not wrong. I have extracted images
import requests
from bs4 import BeautifulSoup
url ="https://www.nike.com/gb/w/new-mens-shoes-3n82yznik1zy7ok"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
images = soup.find_all('img', {'class':'css-1fxh5tw product-card__hero-image'},src=True)
for i in images:
if 'data:image' not in i['src']:
print(i['src'])
I am trying to download PDF files from this website.
I am new to Python and am currently learning about the software. I have downloaded packages such as urllib and bs4. However, there is no .pdf extension in any of the URLs. Instead, each one has the following format: http://www.smv.gob.pe/ConsultasP8/documento.aspx?vidDoc={.....}.
I have tried to use the soup.find_all command. However, this was not successful.
from urllib import request
from bs4 import BeautifulSoup
import re
import os
import urllib
url="http://www.smv.gob.pe/frm_hechosdeImportanciaDia?data=38C2EC33FA106691BB5B5039DACFDF50795D8EC3AF"
response = request.urlopen(url).read()
soup= BeautifulSoup(response, "html.parser")
links = soup.find_all('a', href=re.compile(r'(http://www.smv.gob.pe/ConsultasP8/documento.aspx?)'))
print(links)
This works for me:
import re
import requests
from bs4 import BeautifulSoup
url = "http://www.smv.gob.pe/frm_hechosdeImportanciaDia?data=38C2EC33FA106691BB5B5039DACFDF50795D8EC3AF"
response = requests.get(url).content
soup = BeautifulSoup(response, "html.parser")
links = soup.find_all('a', href=re.compile(r'(http://www.smv.gob.pe/ConsultasP8/documento.aspx?)'))
links = [l['href'] for l in links]
print(links)
Only difference is that I use requests because I'm used to it, and I take the href attribute for each of the returned Tag from BeautifulSoup.
Please let me know how to find the class from HTML Code from the URL: https://knoema.com/MIG_UNEMP_GENDER_2015
from bs4 import BeautifulSoup as bs
import requests
r = requests.get('https://knoema.com/MIG_UNEMP_GENDER_2015')
soup = bs(r.text, 'lxml')
soup.findAll('div', {'class': 'metadata-block validate'})
Result giving blank.
I am trying to get the HTML source of a web page using beautifulsoup.
import bs4 as bs
import requests
import urllib.request
sourceUrl='https://www.pakwheels.com/forums/t/planing-a-trip-from-karachi-to-lahore-by-road-in-feb-2017/414115/2.html'
source=urllib.request.urlopen(sourceUrl).read()
soup=bs.BeautifulSoup(source,'html.parser')
print(soup)
I want the HTML source of the page. This is what I am getting now:
'ps.store("siteSettings", {"title":"PakWheels Forums","contact_email":"sami.ullah#pakeventures.com","contact_url":"https://www.pakwheels.com/main/contact_us","logo_url":"https://www.pakwheels.com/assets/logo.png","logo_small_url":"/images/d-logo-sketch-small.png","mobile_logo_url":"data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4NCjwhLS0gR2VuZXJhdG9yOiBBZG9iZSBJbGx1c3RyYXRvciAxNi4wLjAsIFNWRyBFeHBvcnQgUGx1Zy1JbiAuIFNWRyBWZXJzaW9uOiA2LjAwIEJ1aWxkIDApICAtLT4NCjwhRE9DVFlQRSBzdmcgUFVCTElDICItLy9XM0MvL0RURCBTVkcgMS4xLy9FTiIgImh0dHA6Ly93d3cudzMub3JnL0dyYXBoaWNzL1NWRy8xLjEvRFREL3N2ZzExLmR0ZCI+DQo8c3ZnIHZlcnNpb249IjEuMSIgaWQ9IkxheWVyXzEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHg9IjBweCIgeT0iMHB4Ig0KCSB3aWR0aD0iMjQwcHgiIGhlaWdodD0iNjBweCIgdmlld0JveD0iMCAwIDI0MCA2MCIgZW5hYmxlLWJhY2tncm91bmQ9Im5ldyAwIDAgMjQwIDYwIiB4bWw6c3BhY2U9InByZXNlcnZlIj4NCjxwYXRoIGZpbGw9IiNGRkZGRkYiIGQ9Ik02LjkwMiwyMy4yODZDMzQuNzc3LDIwLjI2Miw1Ny4yNC'
Have a look at this code:
from urllib import request
from bs4 import BeautifulSoup
url_1 = "http://www.google.com"
page = request.urlopen(url_1)
soup = BeautifulSoup(page)
print(soup.prettify())
Import everything you need correctly. Read this.