Is there any way to fix bad currency read? - python

I would like to scrape prices of items on steam community market with my python and beautifulsoup but when I scrape one the currency ends up as swedish crowns is there any way to change that to eur?
from bs4 import BeautifulSoup
import requests
html_text = requests .get('https://steamcommunity.com/market/listings/730/USP-S%20%7C%20Blueprint%20%28Factory%20New%29').text
soup = BeautifulSoup(html_text, 'lxml')
skin_name = soup.find('span', class_ = 'market_listing_item_name').text
sm = soup.find('span', class_ = 'market_listing_price').text.replace(' ','sm ')
print(skin_name, sm)

Related

Text vanishes when soup produces

I am trying to extract the date from the Yahoo Finance site but when the text is produce only the time appears without the date
import requests
import re
from bs4 import BeautifulSoup
technicals = {}
ticker = "INFY.NS"
try:
url = "https://finance.yahoo.com/quote/" + ticker + "/key-statistics?p=" + ticker
r = requests.get(url)
# soup = BeautifulSoup(open("Yahoo Stats.html"), 'lxml')
soup = BeautifulSoup(r.text, 'lxml')
t = soup.find('span', {"class": "Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(ib)"})
technicals["CMP"] = t.text
# d = soup.find('span', {"class": "C($tertiaryColor) D(b) Fz(12px) Fw(n) Mstart(0)--mobpsm Mt(6px)--mobpsm"})
d = soup.find('div', attrs={'id': 'quote-market-notice'})
print(d.text)
The code runs further on.
Can somebody help get the date and time
As I checked the page source the data is not located in the span tag. It seems that it is loaded dynamically.
<span data-reactid="36">At close: 3:30PM IST</span>
I recommend you to use Selenium library instead to get the required data.

Creating a for Loop in bs4 for Ebay listings

I have the following code that goes to an ebay url and extracts the name of a listing and sale price, see below:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import request
html = urlopen('https://www.ebay.com/sch/i.html?_from=R40&_nkw=manga&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1')
soup = BeautifulSoup(html.read(), 'html.parser')
soldItem = soup.find('h3', class_='s-item__title s-item__title--has-tags')
salePrice = soup.find('span', class_='POSITIVE')
#data = soup.find('div', class_='s-item__info clearfix')
itemData = {soldItem.get_text():salePrice.get_text()}
I want to create a for loop that iterates over the first page and gives me the name and sale price of every listing.
However, every single attempt that I've made returns either the same listing five times or all sold items and thereafter all sale prices.
Any hints as to how to format my for loop ?
You can try this code that will create all items and their prices in a dictionary by the key of the item name and the value of the price.
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
html = urlopen('https://www.ebay.com/sch/i.html?_from=R40&_nkw=manga&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1')
soup = BeautifulSoup(html.read(), 'html.parser')
soldItem = soup.find_all('h3', class_='s-item__title s-item__title--has-tags')
salePrice = soup.find_all('span', class_='POSITIVE')
itemsData = {item.text: price.text for item, price in zip(soldItem, salePrice)}
print(itemsData)

How can I scrape Songs Title from this request that I have collected using python

import requests
from bs4 import BeautifulSoup
r = requests.get("https://gaana.com/playlist/gaana-dj-hindi-top-50-1")
soup = BeautifulSoup(r.text, "html.parser")
result = soup.find("div", {"class": "s_c"})
print(result.class)
From the above code, I am able to scrape this data
https://www.pastiebin.com/5f08080b8db82
Now I would like to scrape only the title of the songs and then make a list out of them like the below:
Meri Aashiqui
Genda Phool
Any suggestions are much appreciated!
Try this :
import requests
from bs4 import BeautifulSoup
r = requests.get("https://gaana.com/playlist/gaana-dj-hindi-top-50-1")
soup = BeautifulSoup(r.text, "html.parser")
result = soup.find("div", {"class": "s_c"})
#print(result)
div = result.find_all('div', class_='track_npqitemdetail')
name_list = []
for x in div:
span = x.find('span').text
name_list.append(span)
print(name_list)
this code will return all song name in name_list list.

Scraping Indeed with Beautiful Soup

I'm unfamiliar with html and web scraping with beautiful soup. I'm trying to retrieve Job titles, salaries, location and company name from various indeed job postings. This is my code so far:
URL = "http://www.indeed.com/jobs?q=data+scientist+%2420%2C000&l=New+York&start=10"
import urllib2
import bs4
from bs4 import BeautifulSoup
soup = BeautifulSoup(urllib2.urlopen(URL).read())
resultcol = soup.find_all(id = 'resultsCol')
company = soup.findAll('span', attrs={"class":"company"})
jobs = (soup.find_all({'class': " row result"}))
though I have the commands to find jobs and company, I can't get the contents. I'm aware there's a contents command, but none of my variables so far have that attribute. Thanks!
First I seach div with one job all elements and then I search elements inside this div
import urllib2
from bs4 import BeautifulSoup
URL = "http://www.indeed.com/jobs?q=data+scientist+%2420%2C000&l=New+York&start=10"
soup = BeautifulSoup(urllib2.urlopen(URL).read(), 'html.parser')
results = soup.find_all('div', attrs={'data-tn-component': 'organicJob'})
for x in results:
company = x.find('span', attrs={"itemprop":"name"})
print 'company:', company.text.strip()
job = x.find('a', attrs={'data-tn-element': "jobTitle"})
print 'job:', job.text.strip()
salary = x.find('nobr')
if salary:
print 'salary:', salary.text.strip()
print '----------'
updated #furas example, for python3:
import urllib.request
from bs4 import BeautifulSoup
URL = "https://www.indeed.com/jobs?q=data+scientist+%2420%2C000&l=New+York&start=10"
soup = BeautifulSoup(urllib.request.urlopen(URL).read(), 'html.parser')
results = soup.find_all('div', attrs={'data-tn-component': 'organicJob'})
for x in results:
company = x.find('span', attrs={"class":"company"})
if company:
print('company:', company.text.strip() )
job = x.find('a', attrs={'data-tn-element': "jobTitle"})
if job:
print('job:', job.text.strip())
salary = x.find('nobr')
if salary:
print('salary:', salary.text.strip())
print ('----------')

Extracting Most Read Titles with BS4

I want to extract the titles in the Most Read section of a news page. This is what I have so far, but I'm getting all the titles. I just want the ones in the Most Read section.
`
import requests
from bs4 import BeautifulSoup
base_url = 'https://www.michigandaily.com/section/opinion'
r = requests.get(base_url)
soup = BeautifulSoup(r.text, "html5lib")
for story_heading in soup.find_all(class_= "views-field views-field-title"):
if story_heading.a:
print(story_heading.a.text.replace("\n", " ").strip())
else:
print(story_heading.contents[0].strip())`
You need to limit your scope to only the div container for the most read articles.
import requests
from bs4 import BeautifulSoup
base_url = 'https://www.michigandaily.com/section/opinion'
r = requests.get(base_url)
soup = BeautifulSoup(r.text, "html5lib")
most_read_soup = soup.find_all('div', {'class': 'view-id-most_read'})[0]
for story_heading in most_read_soup.find_all(class_= "views-field views-field-title"):
if story_heading.a:
print(story_heading.a.text.replace("\n", " ").strip())
else:
print(story_heading.contents[0].strip())
You can use a css selector to get the specific tags from the top most read div:
from bs4 import BeautifulSoup
base_url = 'https://www.michigandaily.com/section/opinion'
r = requests.get(base_url)
soup = BeautifulSoup(r.text, "html5lib")
css = "div.pane-most-read-panel-pane-1 a"
links = [a.text.strip() for a in soup.select(css)]
Which will give you:
[u'Michigan in Color: Anotha One', u'Migos trio ends 2016 SpringFest with Hill Auditorium concert', u'Migos dabs their way to a seminal moment for Ann Arbor hip hop', u'Best of Ann Arbor 2016', u'Best of Ann Arbor 2016: Full List']

Categories

Resources