I need to write a Python script that will use data from a website like this link and put it into a pandas dataframe.
My try is
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
url = "https://www.w3schools.com/sql/trysql.asp?filename=trysql_op_in"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
league_table = soup.find('table', class_ = "table.ws-table-all.notranslate")
print(league_table)
And I got output: None
Related
I would like to scratch the reviews from this page and save them as a data frame, but I do not download star ratings and the text of the review. Just only text. What i did wrong?
import csv
import pandas as pd
import requests
from bs4 import BeautifulSoup
page = requests.get("https://www.morele.net/pralka-candy-cs4-1062d3-950636/?sekcja=reviews-all")
soup = BeautifulSoup(page.content, "html.parser",
).find_all("div", {"class":"reviews-item"})
# print(soup)
morele = [div.getText(strip=True) for div in soup]
print(morele)
csv_table = pd.DataFrame(morele)
csv_table = csv_table.reset_index(drop=True)
csv_table.insert(0,'No.',csv_table.index)
You are mostly there - just further navigate the DOM and you can get just the text.
import requests
from bs4 import BeautifulSoup
page = requests.get("https://www.morele.net/pralka-candy-cs4-1062d3-950636/?sekcja=reviews-all")
soup = BeautifulSoup(page.content, "html.parser",)
data = [{"text":ri.find("div", {"class":"rev-desc"}).getText(strip=True) ,
"stars":ri.find("div", {"class":"rev-stars"}).getText(strip=True)}
for ri in soup.find_all("div", {"class":"reviews-item"})
]
pd.DataFrame(data)
I would like scrape comments from this site https://www.ceneo.pl/sklepy/morele.net-s379
But after scrpaed i got empty file. What i did wrong ?
This is my code
import csv
import pandas as pd
import requests
from bs4 import BeautifulSoup
page = requests.get("https://www.ceneo.pl/sklepy/morele.net-s379")
soup = BeautifulSoup(page.content, "html.parser",
).find_all("div", class_="js_shop-reviews js_shop reviews-offer")
morele = [[ i.getText(strip=True) for i in div.find("div") if i.getText()] for div in soup]
csv_table = pd.DataFrame(morele)
csv_table = csv_table.reset_index(drop=True)
csv_table.insert(0,'No.',csv_table.index)
#print(csv_table)
#Export to Csv file
csv_table.to_csv(r'C:/Users/admin/Desktop/morele.csv',";",encoding='utf-8-sig',index = False, header=True)
try this
I found the comment was under class user-post__text so changed it.
import csv
import pandas as pd
import requests
from bs4 import BeautifulSoup
page = requests.get("https://www.ceneo.pl/sklepy/morele.net-s379")
soup = BeautifulSoup(page.content, "html.parser",
).find_all("div", {"class":"user-post__text"}) #changes made here
# print(soup)
morele = [div.getText(strip=True) for div in soup] #and here as well
print(morele)
csv_table = pd.DataFrame(morele)
csv_table = csv_table.reset_index(drop=True)
csv_table.insert(0,'No.',csv_table.index)
#print(csv_table)
#Export to Csv file
csv_table.to_csv(r'morele.csv',";",encoding='utf-8-sig',index = False, header=True)
Does this solves your problem?
I am trying to get the name of all organizations from https://www.devex.com/organizations/search using beautifulsoup.However, I am getting an error. Can someone please help.
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from time import sleep
from random import randint
headers = {"Accept-Language": "en-US,en;q=0.5"}
titles = []
pages = np.arange(1, 2, 1)
for page in pages:
page = requests.get("https://www.devex.com/organizations/search?page%5Bnumber%5D=" + str(page) + "", headers=headers)
soup = BeautifulSoup(page.text, 'html.parser')
movie_div = soup.find_all('div', class_='info-container')
sleep(randint(2,10))
for container in movie_div:
name = container.a.find('h3', class_= 'ng-binding').text
titles.append(name)
movies = pd.DataFrame({
'movie': titles,
})
to see your dataframe
print(movies)
to see the datatypes of your columns
print(movies.dtypes)
to see where you're missing data and how much data is missing
print(movies.isnull().sum())
to move all your scraped data to a CSV file
movies.to_csv('movies.csv')
you may try with something like
name = bs.find("h3", {"class": "ng-binding"})
I need help from python expert. This is a site where I have to scrape table data and separate into four different category then convert it into excel file but problem is all table category's classes are same.
There should be different four classes but same four classes
Thanks
Mariful
Website for scrape
import requests
from bs4 import BeautifulSoup
import csv
import re
import pandas as pd
url = "https://www.kpaa.or.kr/kpaa/eng/list.do?"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
items = soup.find_all(class_='title')
for item in items:
n = item.text
print(n)
df = pd.Dataframe({'name':n, 'office':n, 'phone':n, 'email':n})
Here is i try to convert single data to 2D list to use in data pandas data frame.
from bs4 import BeautifulSoup
import csv
import re
import pandas as pd
url = "https://www.kpaa.or.kr/kpaa/eng/list.do?"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
data_list = [td.getText(strip=True, separator=',').split(',') for td in soup.find('div', {'class':'cont_box2'}).find_all('tr')[:-1]]
df = pd.DataFrame(data_list)
df.to_excel('x.xlsx')
I have to make a code in order to scrape datafrom a website and then analyse them for university.
My problem is that I made this code in order to get some data for all products but when I run it it only shows a single response for each variable.
Can you help me resolve this error ?
from bs4 import BeautifulSoup as soup
import urllib
from urllib.request import urlopen as uReq
import requests
myurl='https://boutique.orange.fr/mobile/choisir-un-mobile'
Uclient=uReq(myurl)
page=Uclient.read()
Uclient.close()
pagesoup=soup(page,'html.parser')
containers=pagesoup.findAll('div',{'class':'box-prod pointer'})
container=containers[0]
produit=container.img['alt']
price=container.findAll('span',{'class':'price'})
price2=container.findAll('div',{'class':'prix-seul'})
avis=container.footer.div.a.img['alt']
file="orange.csv"
f=open(file,'w')
headers='produit,prix avec abonnement, prix seul, avis\n'
f.write(headers)
for container in containers:
produit=container.img['alt']
price=container.findAll('span',{'class':'price'})
price2=container.findAll('div',{'class':'prix-seul'})
avis=container.footer.div.a.img['alt']
You could use different selectors. Separate two prices per product by index. Extract price specific info using join and findall.
from bs4 import BeautifulSoup
import requests
import pandas as pd
url = 'https://boutique.orange.fr/mobile/choisir-un-mobile'
res = requests.get(url)
soup = BeautifulSoup(res.content, "lxml")
#print(len(soup.select('#resultat .box-prod.pointer')))
p = re.compile('[0-9,€]+')
altText= [item.get('alt').strip() for item in soup.select('#resultat .box-prod.pointer .lazy')]
titles = [item.text.strip().replace('\n', ' ') for item in soup.select('#resultat .box-prod.pointer .titre-produit')]
allPrices = [''.join(p.findall(item.text)) for item in soup.select('#resultat span.price')]
aPartirPrice = allPrices[0::2]
prixSeul = allPrices[1::2]
items = list(zip(titles, altText, aPartirPrice, prixSeul))
df = pd.DataFrame(items,columns=['title', 'altText', 'aPartirPrice', 'prixSeul'])
df.to_csv(r'C:\Users\User\Desktop\Data.csv', sep=',', encoding='utf-8',index = False )
Transpose with:
df = df.T