How to extract all data having same class using python

How to extract all data having same class using python - python

i need to find all title data and stock number. I wrote a code and its works well with single item when I use find_all method it shows en error please have a ook at my code and guide me how can I handle this. thank you!
here is my code:
import requests
from bs4 import BeautifulSoup
#import pandas as pd
#import numpy as
import csv
def get_page(url):
response = requests.get(url)
if not response.ok:
print('server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'html.parser') # 1. html , 2. parser
return soup
def get_detail_page(soup):
title = soup.find_all('div',class_="vehicle-location-name mts bold",id=False).text
print(title)
stock = soup.find_all('div',class_="text-lightgray",id=False).find('span').text
print(stock)
def main():
url = "https://www.superbrightleds.com/vehicle/2002-acura-cl-vehicle-led-lights?make=1&model=554&year=2002"
get_detail_page(get_page(url))
if __name__ == '__main__':
main()

Try:
def get_detail_page(soup):
titles = soup.findAll('div', attrs={"class": "vehicle-location-name mts bold"})
stocks = soup.findAll('div', attrs={"class": "text-lightgray"})
title = [title.get_text() for title in titles if title]
stock = [stock.get_text() for stock in stocks if stock and 'Stock #' in str(stock)]
for idx in range(len(stock)):
print(f'{title[idx]}\n\t{stock[idx]}')

Related

Web scraping doesn't iterate over entire webpage

Im trying to scrape the information of all the player names and player rating from this website:
https://www.fifaindex.com/players/?gender=0&league=1&order=desc
But i only get the information from the first player on the page.
The code im using:
from bs4 import BeautifulSoup
import requests
url = "https://www.fifaindex.com/players/?gender=0&league=1&order=desc"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find_all('div', class_="responsive-table table-rounded")
for result in results:
rating = result.find("span", class_="badge badge-dark rating r3").text
name = result.find("a", class_="link-player")
info = [rating, name]
print(info)
The HTML parsed is attached in the picture

I tinkered around a little bit and I think I got a version that does what you want
from bs4 import BeautifulSoup
import requests
page = requests.get("https://www.fifaindex.com/players/?
gender=0&league=1&order=desc")
soup = BeautifulSoup(page.content, "html.parser")
results = soup.find_all("tr")
for result in results:
try:
result["data-playerid"]
except KeyError:
continue
rating = result.find("span", class_="badge badge-dark rating r3").text
name = result.find("a", class_="link-player")
info = [rating, name]
print(info)

Get all table lines with a data-playerid attribute will fix it:
#!/usr/bin/env python3
from bs4 import BeautifulSoup
import requests
url = "https://www.fifaindex.com/players/?gender=0&league=1&order=desc"
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
results = soup.find_all('tr', {'data-playerid': True})
for res in results:
rating = res.find("span", class_="badge badge-dark rating r3").text
name = res.find("a", class_="link-player")
info = [rating, name]
print(info)

Is there a way to extract only the string of a paragraph when webscraping in python?

We want to know if there is a way to extract only the string of a paragraph when web scraping in python?
The problem lies in our definition of 'price'
Anastacia <3
Code:
import requests
from bs4 import BeautifulSoup
def scraper(url):
url.status_code
url.headers
c = url.content
soup = BeautifulSoup(c, "html.parser")
samples2 = soup.find_all(class_="product-price font-weight-bold mb-0")
for p in samples2:
price = (p)
print(price)
print()
url = "https://www.bevco.dk/spiritus/"
url = requests.get(url)
scraper(url)

You only have to modify the variable price. Extract the text from between the tags and for esthetic purposes, strip the extracted string.
import requests
from bs4 import BeautifulSoup
def scraper(url):
url.status_code
url.headers
c = url.content
soup = BeautifulSoup(c, "html.parser")
samples2 = soup.find_all(class_="product-price font-weight-bold mb-0")
for p in samples2:
price = (p)
print(price.get_text().strip())
print()
url = "https://www.bevco.dk/spiritus/"
url = requests.get(url)
scraper(url)

import requests
from bs4 import BeautifulSoup
def getdata(url):
r = requests.get(url)
return r.text
htmldata = getdata("https://www.bevco.dk/spiritus")
soup = BeautifulSoup(htmldata, 'html.parser')
data = ''
for data in soup.find_all("p",{'class': 'product-price font-weight-bold mb-0'}):
print(data.get_text()) `enter code here`

How to return only hotel description from booking?

How to print only description from specific hotel from booking.com
Ex:
I use this code but return None!
import requests
from scrapy.selector import Selector
url = 'https://www.booking.com/hotel/eg/mediterranean-azur.html?'
response2 = requests.get(url)
if response2.ok is True:
selector = Selector(text=response2.content)
print(selector.xpath("//div[#class='hp__hotel-name']").get())
Any kind of help please?

Try this:
import requests
from bs4 import BeautifulSoup
url = "https://www.booking.com/hotel/eg/mediterranean-azur.uk.html?"
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
answ = soup.find("div", {"id":"property_description_content"}).text

How to get all listings urls from main page with python web scraping

I wrote a code for web scraping, My code is ok just except two issues. From detail page, everything is ok just ISBN NO, and from main page, I need all listing URLs so that my code could scrape date from aa listings. Please guide me how can I fix this issue. Both(main page and details page )URLs are in the code. Thank you!
here is my code:
import requests
from bs4 import BeautifulSoup
import csv
def get_page(url):
response = requests.get(url)
if not response.ok:
print('server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'html.parser') # 1. html , 2. parser
return soup
def get_detail_data(soup):
try:
title = soup.find('span',class_="title product-field",id=False).text
except:
title = 'empty'
print(title)
try:
writer = soup.find('a',class_="contributor-name",id=False).text
except:
writer = 'empty'
print(writer)
try:
original_price = soup.find('div',class_="original-price",id=False).find('span').text
except:
original_price = 'empty'
print(original_price)
try:
active_price = soup.find('div',class_="active-price",id=False).find('span').text
except:
active_price = 'empty'
print(active_price)
try:
img = soup.find('div',class_="image-actions image-container product-type-icon-container book",id=False).find('img').attrs['src']
except:
img = 'empty'
print(img)
try:
isbn = soup.find('div',class_="bookitem-secondary-metadata",id=False).find('li').attrs['ISBN: ']
except:
isbn = 'empty'
print(isbn)
data = {
'title' : title,
'writer' : writer,
'original_price' : original_price,
'active_price' : active_price,
'image' : img,
'isbn' : isbn
}
return data
def get_index_data(soup):
titles_link = soup.find_all('a',class_="body_link_11")
try:
inks = soup.find('div', class_="item-info",id=False).find('p').find('a').get('href')
except:
inks = "empty"
print(inks)
def main():
#detail_page_url = "https://www.kobo.com/ww/en/ebook/mum-dad-1"
mainurl = "https://www.kobo.com/ww/en/list/new-hot-in-fiction/youL53408U25RHrVu3wR5Q"
#get_page(url)
#get_detail_data(get_page(detail_page_url))
get_index_data(get_page(mainurl))
if __name__ == '__main__':
main()

import requests
import re
import json
from bs4 import BeautifulSoup
import csv
def Soup(content):
soup = BeautifulSoup(content, 'html.parser')
return soup
def Main(url):
r = requests.get(url)
soup = Soup(r.content)
scripts = soup.findAll("script", type="application/ld+json",
text=re.compile("data"))
prices = [span.text for span in soup.select(
"p.product-field.price span span") if span.text != "USD"]
with open("data.csv", 'w', newline="") as f:
writer = csv.writer(f)
writer.writerow(["Title", "Writer", "Price", "ISBN", "IMG", "URL"])
for script, price in zip(scripts, prices):
script = json.loads(script.text)
title = script["data"]["name"]
author = script["data"]["author"][0]["name"]
img = f'https:{script["data"]["thumbnailUrl"]}'
isbn = script["data"]["isbn"]
url = script["data"]["url"]
writer.writerow([title, author, price, isbn, img, url])
Main("https://www.kobo.com/ww/en/list/new-hot-in-fiction/youL53408U25RHrVu3wR5Q")
Output: View-Online
Output Sample:

How to scrape text from div with empty class value

Hi how to scrape text from div without any class? At first I try to scrape all data from div with class 'jobs page' then without class value but it doesn't work.
from bs4 import BeautifulSoup
import requests
a = {}
def antal_pl(name=''):
try:
page_response = requests.get('https://antal.pl/oferty-pracy?s=&sid=&did=Accountancy', timeout=40).text
page_content = BeautifulSoup(page_response, 'lxml')
data = page_content.find_all(class_ = 'jobs_page')
data_in = data.find_all('div', class_ = None)
print(data_in)
except:
''
antal_pl( name='Accontancy')

Try the below approach to get the text out of that webpage as you mentioned above. I've tried to organize your code a little to make it look cleaner.
from bs4 import BeautifulSoup
import requests
URL = "https://antal.pl/oferty-pracy?s=&sid=&did={}"
def antal_pl(name):
res = requests.get(URL.format(name))
soup = BeautifulSoup(res.text, 'lxml')
data = soup.find(class_='header').find_next_sibling().text.strip()
print(data)
if __name__ == '__main__':
antal_pl("Accountancy")
Result:
Znaleziono 47 ofert pracy.

use XPATH
html = etree.HTML(wb_data)
html_data = html.xpath('/html/body/div/ul/li/a')
enter image description here

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to extract all data having same class using python - python

Related

Web scraping doesn't iterate over entire webpage

Is there a way to extract only the string of a paragraph when webscraping in python?

How to return only hotel description from booking?

How to get all listings urls from main page with python web scraping

How to scrape text from div with empty class value

Categories

Resources