Spotipy - Can't add tracks to playlist - python

I've been trying to create a simple tool that converts apple music playlists to Spotify playlists using web scraping and Spotipy.
I broke each part of this tool into various functions. My function to scrape the apple website and create a playlist works fine but adding the tracks doesn't. It just shows an empty playlist on my Spotify account
This is the function that is supposed to search and add the tracks.
I commented out previous search/query methods.
def get_spotify_tracks(songs, artists, sp, user_id):
'''
Step 4: Transfer fetched song data
Step 5: Search for song on Spotify
Step 6: Add the songs to the Spotify playlist
'''
list_of_tracks = []
prePlaylist = sp.user_playlists(user=user_id)
playlist = prePlaylist["items"][0]["id"]
for playlist_song, song_artist in zip(songs, artists):
track_id = sp.search(q='artist:' + song_artist + ' track:' + playlist_song, type='track')
#track_data = list(f'{playlist_song} by {song_artist}')
#result = sp.search(q=track_data, )
#query = f"https://api.spotify.com/v1/serch?query=track%3A{playlist_song}" \
# f"+artist%3A{song_artist}&type=track&offset=0&limit=5"
list_of_tracks.append(track_id["tracks"]["items"][0]["uri"])
print(list_of_tracks)
query = f"https://api.spotify.com/v1/playlists/{playlist}/tracks"
response = requests.post(
query,
data=list_of_tracks,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}"
}
)
return list_of_tracks
This is the complete code. Executing main() creates a playlist with the right name and description but no tracks.
'''
Making a program that converts an Apple music playlist to a Spotify Playlist
Step1: Fetch the Apple music playlist data
Step2: Get Authorization to spotify account
Step3: Create new spotify playlist
Step4: Transfer fetched song data
Step5: Search for song on Spotify
Step6: Add the songs to Spotify playlist
'''
clientid = os.environ['CLIENT_ID']
client_secret = os.environ['CLIENT_SECRET']
token = os.environ['ACCESS_TOKEN']
username = os.environ['USERNAME']
track_list = []
artist_list = []
def get_apple_playlist(URL):
'''
Step1: Fetch the Apple music playlist data
'''
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9"
}
# pass in apple playlist link
apple_link = requests.get(URL, headers=headers)
# Use Beautiful soup to retrieve page html
playlist_soup = BeautifulSoup(apple_link.content, "lxml")
# Find various playlist info with Beautiful soup
playlist_name = playlist_soup.find('h1', attrs={'data-testid': 'non-editable-product-title'}).get_text(strip=True)
playlist_description = playlist_soup.find('p', attrs={'data-testid': 'truncate-text'}).get_text(strip=True)
songs_raw = playlist_soup.find_all("div", class_="songs-list-row__song-name")
artists_raw = playlist_soup.find_all("div", class_="songs-list__col songs-list__col--artist typography-body")
#song_art = [playlist_soup.find_all("div picture source", type="image/jpeg").get_]
#Refining data
for song in songs_raw:
track_list.append(song.get_text(strip=True))
for artist in artists_raw:
artist_list.append(artist.get_text(strip=True))
return playlist_name, playlist_description, track_list, artist_list
def spotify_playlist(playlist_name, playlist_description):
'''
Step 2: Get Authorization to spotify account
Step 3: Create new spotify playlist
'''
token = SpotifyOAuth(client_id=clientid,
client_secret=client_secret,
redirect_uri="http://127.0.0.1:8080/",
scope="playlist-modify-public")
sp = spotipy.Spotify(auth_manager=token)
user_id = sp.current_user()['id']
new_playlist = sp.user_playlist_create(user=user_id, name=playlist_name, description=playlist_description, public=True)
playlist_id = new_playlist['id']
return playlist_id, user_id, sp
def get_spotify_tracks(songs, artists, sp, user_id):
'''
Step 4: Transfer fetched song data
Step 5: Search for song on Spotify
Step 6: Add the songs to the Spotify playlist
'''
list_of_tracks = []
prePlaylist = sp.user_playlists(user=user_id)
playlist = prePlaylist["items"][0]["id"]
for playlist_song, song_artist in zip(songs, artists):
track_id = sp.search(q='artist:' + song_artist + ' track:' + playlist_song, type='track')
#track_data = list(f'{playlist_song} by {song_artist}')
#result = sp.search(q=track_data, )
#query = f"https://api.spotify.com/v1/serch?query=track%3A{playlist_song}" \
# f"+artist%3A{song_artist}&type=track&offset=0&limit=5"
list_of_tracks.append(track_id["tracks"]["items"][0]["uri"])
print(list_of_tracks)
query = f"https://api.spotify.com/v1/playlists/{playlist}/tracks"
response = requests.post(
query,
data=list_of_tracks,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}"
}
)
return list_of_tracks
def main(url):
'''
Execution
'''
playlist_data = get_apple_playlist(url)
playlist = spotify_playlist(playlist_data[0], playlist_data[1])
tracks = get_spotify_tracks(playlist_data[2], playlist_data[3], playlist[2], playlist[1])
main("https://music.apple.com/ng/playlist/angry/pl.u-PDbYmE4Ie084DqR")

Related

how do I add API code to Discord.py command

so i have this idea to make a weather command from a friend and i started working on it today. installed the packages, got the code set up, ect. so i have this code below for my weather API code and i want to get it into a command. (!weather). someone in the DPY guild recommended aiohttp and that looks too difficult because i am a newer programmer. is there any solution to this that someone can spoonfeed me? API code is below:
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
def weather(city):
city = city.replace(" ", "+")
res = requests.get(
f'https://www.google.com/search?q={city}&oq={city}&aqs=chrome.0.35i39l2j0l4j46j69i60.6128j1j7&sourceid=chrome&ie=UTF-8', headers=headers)
print("Searching...\n")
soup = BeautifulSoup(res.text, 'html.parser')
location = soup.select('#wob_loc')[0].getText().strip()
time = soup.select('#wob_dts')[0].getText().strip()
info = soup.select('#wob_dc')[0].getText().strip()
weather = soup.select('#wob_tm')[0].getText().strip()
print(location)
print(time)
print(info)
print(weather+"°C")
city = input("Enter the Name of City -> ")
city = city+" weather"
weather(city)
print("Have a Nice Day:)")
# This code is contributed by adityatri
Get a API key from the weathermap website.
This is an example of what this could look like. Its easily translatable into a command.
In case you want to make it its own function, you can just use this code.
What will be returned is a tuple.
list_weather = weatherFrog()
print(list_weather[0])
0 would be the temperature, 1 the humidity and 2 the description.
Code:
def weatherFrog(city):
api_key = YOUR_API_KEY
base_url = "https://api.openweathermap.org/data/2.5/weather?"
complete_url = base_url + "appid=" + api_key + "&q=" + city
response = requests.get(complete_url)
x = response.json()
if x["cod"] != "404":
y = x["main"]
current_temperature = y["temp"]
current_humidiy = str(y["humidity"]) + "%"
z = x["weather"]
current_temperaturetwo = str(round(current_temperature - 273.15, 2)) + " Degrees Celsius"
en_weather_description = z[0]["description"]
return current_temperaturetwo, current_humidiy, en_weather_description

Spotify & Youtube API integration: Liked Youtube music videos in Spotify

An overview of the project.
API Used
Using Spotify API to Create a Playlist and add music to the playlist
Using Youtube Data API to retrieve liked videos
OAuth 2.0 for verification
Goal:
The liked youtube videos of my youtube account should automatically come inside my Spotify newly created playlist
Code:
import json
import os
import google_auth_oauthlib.flow
import google.oauth2.credentials
import googleapiclient.discovery
import googleapiclient.errors
import requests
import youtube_dl
from secret import spotify_token, spotify_user_id
from exceptions import ResponseException
class CreatePlaylist:
def __init__(self):
self.user_id = spotify_user_id
self.spotify_token = spotify_token
self.youtube_client = self.get_youtube_client()
self.all_song_info = {}
# connect to youtube data api
def get_youtube_client(self):
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "youtube_auth.json"
# Get credentials and create an API client
scopes = ["https://www.googleapis.com/auth/youtube.readonly"]
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
client_secrets_file, scopes)
credentials = flow.run_console()
# from the Youtube DATA API
youtube_client = googleapiclient.discovery.build(
api_service_name, api_version, credentials=credentials)
return youtube_client
# remove **kwarg that are not set
def get_liked_videos(self):
request = self.youtube_client.videos().list(
part="snippet,contentDetails,statistics", myRating="like"
)
response = request.execute()
# collect each video and get important information
for item in response['items']:
video_title = item['snippet']['title']
youtube_url = "https://www.youtube.com/watch?v={}".format(
item["id"])
# use youtube_dl to collect song name and artist name
video = youtube_dl.YoutubeDL({}).extract_info(
youtube_url, download=False)
song_name = video['track']
artist = video['artist']
# save all important info and skip any missing song and artist
self.all_song_info[video_title] = {
"youtube_url": youtube_url,
"song_name": song_name,
"artist": artist,
# add the uri, easy to get song to put into playlist
"spotify_uri": self.get_spotify_uri(song_name, artist)
}
# create a new playlist
def create_playlist(self):
request_body = json.dumps({
"name": "Youtube Liked Songs",
"description": "All liked youtube video songs",
"public": True
})
query = "https://api.spotify.com/v1/users/{}/playlists".format(
self.user_id)
response = requests.post(
query,
data=request_body,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
}
)
response_json = response.json()
# playlis id
return response_json["id"]
# search for the song on Spotify
def get_spotify_uri(self, song_name, artist):
query = "https://api.spotify.com/v1/search?query=track%3A{}+artist%3A{}&type=track&offset=0&limit=20".format(
song_name,
artist
)
response = requests.get(
query,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
}
)
response_json = response.json()
songs = response_json["tracks"]["items"]
# only use the first song
uri = songs[0]["uri"]
return uri
# add the song in new spotify_playlist
def add_song_to_playlist(self):
# populate dictionary with our liked songs
self.get_liked_videos()
# collect all of uri
uris = []
for song, info in self.all_song_info.items():
uris.append(info['spotify_uri'])
# create a new playlist
playlist_id = self.create_playlist()
# add all songs into new playlist
request_data = json.dumps(uris)
query = "https://api.spotify.com/v1/playlists/{}/tracks".format(
playlist_id)
response = requests.post(
query,
data=request_data,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
})
if response.status_code < 200 or response.status_code > 300:
raise ResponseException(response.status_code)
response_json = response.json()
return response_json
if __name__ == '__main__':
cp = CreatePlaylist()
cp.add_song_to_playlist()
Output
A new playlist is made inside spotify libray but the song in the list doesn't belong to my liked videos and the songs are repeated in the playlist the number of songs are almost 5-6 and all are same
Link of the song: https://www.youtube.com/watch?v=4awXLGzlf7E
Thanks in advance kinda help.

Only valid bearer authentication supported - Python - Spotify API

I was coding this script that can get into my liked videos and make a playlist on Spotify with the title and artist of each video.
I already tried to renew the Token from the Spotify API manager, but for some reason it's still showing the following error:
status': 400, 'message': 'Only valid bearer authentication supported'}}
Traceback (most recent call last):
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 159, in <module>
cp.add_song_to_playlist()
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 129, in add_song_to_playlist
self.get_liked_videos()
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 76, in get_liked_videos
"spotify_uri": self.get_spotify_uri(song_name, artist)
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 119, in get_spotify_uri
songs = response_json["tracks"]["items"]
KeyError: 'tracks'
I noticed that the KeyError is showing up because the call is returning an error.
Here follows the code for the project:
import json
import requests
import os
from secrets import spotify_user_id, spotify_token
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
import youtube_dl
class CreatePlaylist:
def __init__(self):
self.user_id = spotify_user_id
self.spotify_token = spotify_token
self.youtube_client = self.get_youtube_client()
self.all_song_info = {}
#Step 1
def get_youtube_client(self):
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
#Get Credentials for API Client
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file('client_secret.json', scopes=['https://www.googleapis.com/auth/youtube'])
credentials = flow.run_console()
youtube_client = googleapiclient.discovery.build(api_service_name, api_version, credentials=credentials)
return youtube_client
#Step 2
def get_liked_video(self):
request = self.youtube_client.videos().list(
part="snippet,contentDetails,statistics",
myRating="Like"
)
response = request.execute()
#Get information on each video on the list
for item in response["items"]:
video_title = item["snippet"]["title"]
youtube_url = "https://www.youtube.com/watch?v={}".format(item["id"])
#use youtube_dl to colect the name of the song and the artist
video = youtube_dl.YoutubeDL({}).extract_info(youtube_url,download = False)
song_name = video["track"]
artist = video["artist"]
self.all_song_info[video_title] = {
"youtube_url":youtube_url,
"song_name":song_name,
"artist":artist,
"spotify_uri":self.get_spotify_uri(song_name,artist)
}
#Step 3
def create_playlist(self):
request_body = json.dumps({
"name":"Youtube Liked Videos",
"description":"Todos os Videos com Like do YT",
"public": True
})
query = "https://api.spotify.com/v1/users/{user_id}/playlists".format()
response = requests.post(
query,
data= request_body,
headers={
"Content-type": "application/json",
"Authorization": "Bearer {}".format(spotify_token)
}
)
response_json = response.json
#playlistId
return response_json["id"]
#Step 4
def get_spotify_uri(self, song_name, artist):
query = "https://api.spotify.com/v1/search".format(
song_name,
artist
)
response = requests.get(
query,
headers={
"Content-type": "application/json",
"Authorization": "Bearer {}".format(spotify_token)
}
)
response_json = response.json()
songs = response_json["tracks"]["items"]
#configurar para utilizar somente a primeira musica
uri = songs[0]["uri"]
return uri
#Step 5
def add_song_to_playlist(self):
self.get_liked_video()
uris = []
for song ,info in self.all_song_info():
uris.apend(info["spotify_uri"])
#create new playlist
playlist_id = self.create_playlist
#add musics to the created playlist
request_data = json.dumps(uris)
query = "https://api.spotify.com/v1/playlists/{playlist_id}/tracks".format(playlist_id)
response = requests.post(
query,
data=request_data,
headers = {
"Content-Type":"application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
}
)
response_json = response.json()
return response_json
CreatePlaylist()
I think the spotify_token and spotify_user_id are the issue. If you go to:
https://pypi.org/project/spotify-token/ it is a Python script where you can generate a Spotify token.
As for the spotify_user_id that is your username on Spotify. To find your username, go to: https://www.spotify.com/us/ , click on Profile > Account > Account overview
Hope it helps.

Cannot get access token for Azure Cognitive Services (for tts)

I can't seem to get authorization for the Azure cognitive services access token. I'm using the example code (modified to take my key) that the azure team posted to github.
I've gone through the documentation and as far as I can tell I'm doing everything right. I've also used the "Unified Speech Services for free trials" and that also doesn't work.
class TextToSpeech(object):
def __init__(self, subscription_key):
self.subscription_key = subscription_key
self.tts = "testing the TTS abilities of Azure using python"
#self.tts = input("What would you like to convert to speech: ")
self.timestr = time.strftime("%Y%m%d-%H%M")
self.access_token = None
'''
The TTS endpoint requires an access token. This method exchanges your
subscription key for an access token that is valid for ten minutes.
'''
def get_token(self):
fetch_token_url = "https://eastus.api.cognitive.microsoft.com/sts/v1.0/issuetoken"
headers = {
'Ocp-Apim-Subscription-Key': self.subscription_key
}
response = requests.post(fetch_token_url, headers=headers)
self.access_token = str(response.text)
if __name__ == "__main__":
app = TextToSpeech(subscription_key)
app.get_token()
Here is the output of the access token
'{"error":{"code":"401","message": "Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource."}}'
What I should be getting is the temporary access token but for some reason I get the above error and I have no idea why.
This error is due to you called the wrong endpoint . Pls try the code below to get started with your own subscription params in main method :
import os, requests, time
from xml.etree import ElementTree
try: input = raw_input
except NameError: pass
class TextToSpeech(object):
def __init__(self, subscription_key,region):
self.subscription_key = subscription_key
self.region =region
self.tts = input("What would you like to convert to speech: ")
self.timestr = time.strftime("%Y%m%d-%H%M")
self.access_token = None
def get_token(self):
fetch_token_url = 'https://'+self.region+'.api.cognitive.microsoft.com/sts/v1.0/issuetoken'
headers = {
'Ocp-Apim-Subscription-Key': self.subscription_key
}
response = requests.post(fetch_token_url, headers=headers)
self.access_token = str(response.text)
def save_audio(self):
base_url = 'https://'+self.region+'.tts.speech.microsoft.com/'
path = 'cognitiveservices/v1'
constructed_url = base_url + path
headers = {
'Authorization': 'Bearer ' + self.access_token,
'Content-Type': 'application/ssml+xml',
'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm',
'User-Agent': 'YOUR_RESOURCE_NAME'
}
xml_body = ElementTree.Element('speak', version='1.0')
xml_body.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-us')
voice = ElementTree.SubElement(xml_body, 'voice')
voice.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-US')
voice.set('name', 'en-US-Guy24kRUS') # Short name for 'Microsoft Server Speech Text to Speech Voice (en-US, Guy24KRUS)'
voice.text = self.tts
body = ElementTree.tostring(xml_body)
response = requests.post(constructed_url, headers=headers, data=body)
'''
If a success response is returned, then the binary audio is written
to file in your working directory. It is prefaced by sample and
includes the date.
'''
if response.status_code == 200:
with open('sample-' + self.timestr + '.wav', 'wb') as audio:
audio.write(response.content)
print("\nStatus code: " + str(response.status_code) + "\nYour TTS is ready for playback.\n")
else:
print("\nStatus code: " + str(response.status_code) + "\nSomething went wrong. Check your subscription key and headers.\n")
def get_voices_list(self):
base_url = 'https://'+self.region+'.tts.speech.microsoft.com/'
path = 'cognitiveservices/voices/list'
constructed_url = base_url + path
headers = {
'Authorization': 'Bearer ' + self.access_token,
}
response = requests.get(constructed_url, headers=headers)
if response.status_code == 200:
print("\nAvailable voices: \n" + response.text)
else:
print("\nStatus code: " + str(response.status_code) + "\nSomething went wrong. Check your subscription key and headers.\n")
if __name__ == "__main__":
region = '<your region here , in your case , the value should be eastus>'
subscription_key = '<your subscription key here>'
app = TextToSpeech(subscription_key,region)
app.get_token()
app.save_audio()
You can find your region and subscription key value on Azure portal here :
I have tested on my side and it works for me .Once you go through the codes , a wav file will be created, that is the thing you need :

How to extend the scraping done to more than 1st page using python

Hi I was going through a Python code (pasted below). The code is working fine for scraping the 1st page results (25 listing per page). However, I would want to extend its usability to scrape results from atleast 10 more pages
For example I want to generate results for zip code - 98021 which has 80 listings in total (until page 4). However, when I run the code below using python zillow.py 980021 newest, it displays only 25 listings
Since I am a newbie in python I request you to please help me out with this objective.
from lxml import html
import requests
import unicodecsv as csv
import argparse
def parse(zipcode,filter=None):
if filter=="newest":
url = "https://www.zillow.com/homes/for_sale/{0}/0_singlestory/days_sort".format(zipcode)
elif filter == "cheapest":
url = "https://www.zillow.com/homes/for_sale/{0}/0_singlestory/pricea_sort/".format(zipcode)
else:
url = "https://www.zillow.com/homes/for_sale/{0}_rb/?fromHomePage=true&shouldFireSellPageImplicitClaimGA=false&fromHomePageTab=buy".format(zipcode)
for i in range(10):
# try:
headers= {
'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'accept-encoding':'gzip, deflate, sdch, br',
'accept-language':'en-GB,en;q=0.8,en-US;q=0.6,ml;q=0.4',
'cache-control':'max-age=0',
'upgrade-insecure-requests':'1',
'user-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}
response = requests.get(url,headers=headers)
print(response.status_code)
parser = html.fromstring(response.text)
search_results = parser.xpath("//div[#id='search-results']//article")
properties_list = []
for properties in search_results:
raw_address = properties.xpath(".//span[#itemprop='address']//span[#itemprop='streetAddress']//text()")
raw_city = properties.xpath(".//span[#itemprop='address']//span[#itemprop='addressLocality']//text()")
raw_state= properties.xpath(".//span[#itemprop='address']//span[#itemprop='addressRegion']//text()")
raw_postal_code= properties.xpath(".//span[#itemprop='address']//span[#itemprop='postalCode']//text()")
raw_price = properties.xpath(".//span[#class='zsg-photo-card-price']//text()")
raw_info = properties.xpath(".//span[#class='zsg-photo-card-info']//text()")
raw_broker_name = properties.xpath(".//span[#class='zsg-photo-card-broker-name']//text()")
url = properties.xpath(".//a[contains(#class,'overlay-link')]/#href")
raw_title = properties.xpath(".//h4//text()")
address = ' '.join(' '.join(raw_address).split()) if raw_address else None
city = ''.join(raw_city).strip() if raw_city else None
state = ''.join(raw_state).strip() if raw_state else None
postal_code = ''.join(raw_postal_code).strip() if raw_postal_code else None
price = ''.join(raw_price).strip() if raw_price else None
info = ' '.join(' '.join(raw_info).split()).replace(u"\xb7",',')
broker = ''.join(raw_broker_name).strip() if raw_broker_name else None
title = ''.join(raw_title) if raw_title else None
property_url = "https://www.zillow.com"+url[0] if url else None
is_forsale = properties.xpath('.//span[#class="zsg-icon-for-sale"]')
properties = {
'address':address,
'city':city,
'state':state,
'postal_code':postal_code,
'price':price,
'facts and features':info,
'real estate provider':broker,
'url':property_url,
'title':title
}
if is_forsale:
properties_list.append(properties)
return properties_list
# except:
# print ("Failed to process the page",url)
if __name__=="__main__":
argparser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
argparser.add_argument('zipcode',help = '')
sortorder_help = """
available sort orders are :
newest : Latest property details,
cheapest : Properties with cheapest price
"""
argparser.add_argument('sort',nargs='?',help = sortorder_help,default ='Homes For You')
args = argparser.parse_args()
zipcode = args.zipcode
sort = args.sort
print ("Fetching data for %s"%(zipcode))
scraped_data = parse(zipcode,sort)
print ("Writing data to output file")
with open("properties-%s.csv"%(zipcode),'wb')as csvfile:
fieldnames = ['title','address','city','state','postal_code','price','facts and features','real estate provider','url']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in scraped_data:
writer.writerow(row)
You need to grab the link to the next page from the current page and then update the url that you use to scrape from.
Here's a rough example of how that could work:
def parse(zipcode, url, filter=None):
# get results how you are
# get url from next page button
return results, next_page_url
full_results = []
results, next_page_url = parse(zipcode, initial_page_url, filter=filter)
full_results += results
while (len(results) >= 25 and next_page_url):
results, next_page_url = parse(zipcode, next_page_url, filter=filter)
full_results += results
So in this example parse takes the url to scrape from as a second positional argument and returns the results and the url for the next page to scrape.
This will just keep scraping as long as there are the max results (25) on the page and a url to the next page is returned.

Categories

Resources