Trying to Scrape Instagram Post Data from .csv with links - For Masters Thesis

Trying to Scrape Instagram Post Data from .csv with links - For Masters Thesis - python

I am trying to scrape instagram post data (number of likes, Caption, Hashtags, Mentions and number of comments) from a collection of links in a .csv for data analysis to put towards my Masters Thesis. however i am coming across an error where the xpath or element cannot be found. Here is the error message:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[#id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button"}
Here is the code block i have written using selenium:
def scrape_post_data():
influencerpostsdata = []
# Specify the path to chromedriver.exe
chromedriver_path = r"C:\\Users\\stuar\\Instagram Scraper\\ChromeDrivers\chromedriver.exe"
driver = webdriver.Chrome(executable_path=chromedriver_path)
time.sleep(2)
# Open the webpage
url = "https://www.instagram.com"
driver.get(url)
time.sleep(3)
# Alert number 1
time.sleep(5)
alert = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept All")]'))).click()
# Target Username Entry
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
# Enter Username and Password
login_username = str(enter_loginusername_entry.get())
login_password = str(enter_password_entry.get())
username.clear()
username.send_keys(login_username)
password.clear()
password.send_keys(login_password)
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
# Alert number 2
time.sleep(5)
alert2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
# Alert number 3
time.sleep(5)
alert3 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
# This captures the standard like count.
likes = driver.find_element_by_xpath("""//*[#id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button""").text.split()[0]
post_type = 'photo'
except:
# This captures the like count for videos which is stored
likes = driver.find_element_by_xpath("""//*[#id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/span""").text.split()[0]
post_type = 'video'
age = driver.find_element_by_css_selector('a time').text
comment = driver.find_element_by_xpath("""//*[#id="react-root"]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span""").text
hashtags = find_hashtags(comment)
mentions = find_mentions(comment)
post_details = {'link': url, 'type': post_type, 'likes/views': likes,
'age': age, 'comment': comment, 'hashtags': hashtags,
'mentions': mentions}
time.sleep(10)
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerposts)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()

Not To worry i have resolved the problem..
with open(r"C:\\Users\\stuar\\Instagram Scraper\\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
likes = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/section[2]/div/div/a/span')
except Exception as e:
likes = None
try:
likes2 = likes.text
except Exception as e:
likes2 = None
time.sleep(20)
try:
age = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[2]/a/time')
except Exception as e:
age = None
try:
age2 = age.text
except Exception as e:
age2 = None
time.sleep(20)
try:
caption = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[1]/ul/div/li/div/div/div[2]/span')
except Exception as e:
caption = None
try:
caption2 = caption.text
except Exception as e:
caption2 = None
time.sleep(20)
try:
AccountName = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/header/div[2]/div[1]/div/span/a')
except Exception as e:
AccountName = None
try:
AccountName2 = AccountName.text
except Exception as e:
AccountName2 = None
time.sleep(20)
post_details = {'Username': AccountName2,'Caption': caption2, 'Likes/Views': likes2,
'Age': age2 }
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerpostsdata)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()

Related

How can I select the Suburb/City dropdown on Amazon using Selenium and Python?

I'm trying to make a bot to order a product on Amazon using Python and Selenium. I have it working so that it enters text input details such as name and everything however the way Amazon does dropdowns doesn't use normal select elements. I'm trying to click on the element which I believe to be the trigger with Selenium but that just isn't working. If anyone has any ideas that would be greatly appreciated.
The code is below, you will have to replace the details part so you can login and test it.
from selenium import webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.support.ui as ui
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
import logging
AMAZON_PRODUCT_LINK = "https://www.amazon.com.au/PlayStation-5-Console/dp/B08HHV8945/ref=sr_1_2?crid=1ZDXB7YRF1RXT&keywords=playstation+5&qid=1648454510&rnid=5367991051&s=videogames&sprefix=playstatio%2Caps%2C303&sr=1-2"
TEST_LINK = "https://www.amazon.com.au/PlayStation-4-500GB-Console-Black/dp/B0773RV962/ref=sr_1_3?crid=1O9CWY87HOD3T&keywords=playstation&qid=1648454640&s=videogames&sprefix=playstation+%2Cvideogames%2C238&sr=1-3"
# AMAZON XPATHs
UNAVAILABLE_XPATH = "/html/body/div[2]/div[2]/div[6]/div[4]/div[4]/div[19]/div[1]/span"
AVAILABLE_XPATH = "/html/body/div[2]/div[2]/div[6]/div[4]/div[1]/div[2]/div/div/div/div/div/div/form/div/div/div/div/div[3]/div/div[4]/div/div[1]/span"
# FORM INFO
BUY_BUTTON_XPATH = '//*[#id="buy-now-button"]'
EMAIL_INPUT_XPATH = '//*[#id="ap_email"]'
CONTINUE_BUTTON_XPATH = '//*[#id="continue"]'
PASSWORD_INPUT_XPATH = '//*[#id="ap_password"]'
SUBMIT_BUTTON_XPATH = '//*[#id="signInSubmit"]'
# BILLING INFO
FULL_NAME_XPATH = '//*[#id="address-ui-widgets-enterAddressFullName"]'
PHONE_XPATH = '//*[#id="address-ui-widgets-enterAddressPhoneNumber"]'
COUNTRY_XPATH = '//*[#id="address-ui-widgets-countryCode-dropdown-nativeId"]'
ADDRESS_1_XPATH = '//*[#id="address-ui-widgets-enterAddressLine1"]'
ADDRESS_2_XPATH = '//*[#id="address-ui-widgets-enterAddressLine2"]'
POST_CODE_XPATH = '//*[#id="address-ui-widgets-enterAddressPostalCode"]'
CITY_XPATH = '/html/body/div[5]/div[2]/div[3]/div[1]/div/div[1]/form/div/div[1]/div/div[14]/span/span/span/span'
CONTINUE_XPATH = '/html/body/div[5]/div[2]/div[2]/div[1]/div/div[1]/form/div/span/span/span/input'
# DETAILS
ACCOUNT_EMAIL = "EMAIL#email.com"
ACCOUNT_PASSWORD = "password123"
FULL_NAME = "FULL NAME"
PHONE_NUMBER = "PHONE"
COUNTRY = 'COUNTRY'
ADDRESS_1 = "ADDRESS 1"
ADDRESS_2 = "ADDRESS 2"
POST_CODE = "CODE"
CITY = 'SUBURB'
driver = webdriver.Firefox()
wait = ui.WebDriverWait(driver,10)
try:
#driver.get(AMAZON_PRODUCT_LINK)
driver.get(TEST_LINK)
try:
try:
print(driver.find_element(by=By.XPATH, value=UNAVAILABLE_XPATH).text)
except:
value = wait.until(EC.presence_of_element_located((By.XPATH, AVAILABLE_XPATH)))
print(value.text)
buy_button = wait.until(EC.presence_of_element_located((By.XPATH, BUY_BUTTON_XPATH)))
buy_button.click()
email_input = wait.until(EC.presence_of_element_located((By.XPATH, EMAIL_INPUT_XPATH)))
email_input.send_keys(ACCOUNT_EMAIL)
continue_button = wait.until(EC.presence_of_element_located((By.XPATH, CONTINUE_BUTTON_XPATH)))
continue_button.click()
password_input = wait.until(EC.presence_of_element_located((By.XPATH, PASSWORD_INPUT_XPATH)))
password_input.send_keys(ACCOUNT_PASSWORD)
continue_button = wait.until(EC.presence_of_element_located((By.XPATH, SUBMIT_BUTTON_XPATH)))
continue_button.click()
try:
full_name = wait.until(EC.presence_of_element_located((By.XPATH, FULL_NAME_XPATH)))
full_name.send_keys(FULL_NAME)
phone = wait.until(EC.presence_of_element_located((By.XPATH, PHONE_XPATH)))
phone.send_keys(PHONE_NUMBER)
address_1 = wait.until(EC.presence_of_element_located((By.XPATH, ADDRESS_1_XPATH)))
address_1.send_keys(ADDRESS_1)
address_2 = wait.until(EC.presence_of_element_located((By.XPATH, ADDRESS_2_XPATH)))
address_2.send_keys(ADDRESS_2)
post_code = wait.until(EC.presence_of_element_located((By.XPATH, POST_CODE_XPATH)))
post_code.clear()
post_code.send_keys(POST_CODE)
country = wait.until(EC.presence_of_element_located((By.XPATH, COUNTRY_XPATH)))
country.click()
city = wait.until(EC.presence_of_element_located((By.XPATH, CITY_XPATH)))
#city.set_attribute('aria-pressed', 'true')
city.click()
city_chosen = wait.until(EC.presence_of_element_located((By.XPATH, CITY)))
city_chosen.click()
continue_button = wait.until(EC.presence_of_element_located((By.XPATH, CONTINUE_XPATH)))
continue_button.click()
except Exception as Argument:
logging.exception("Something went wrong when logging in.")
except Exception as Argument:
logging.exception("Something went wrong when checking the availability of this product.")
except Exception as Argument:
logging.exception("Something went wrong with the URL you provided.")
#driver.quit()
P.S. I don't want any comments about my use of try and except. Right now I just need something that's working.

Linkedin bot Selenium Beautiful Soup not reading email in /overalay/contact-info

So whenever i run the following code, it writes 'no mail found' in the email.txt file. Ive checked the classes in inspect and they're correct. Anyone has any idea what the problem could be?
'''
visitingProfileID = profilesQueued.pop()
visitedProfiles.append(visitingProfileID)
fullLink = 'https://www.linkedin.com' + visitingProfileID
linkoverlay=fullLink+'/overlay/contact-info/'
with open('visitedUsers.txt', 'a') as visitedUsersFile:
visitedUsersFile.write(str(visitingProfileID)+'\n')
visitedUsersFile.close()
browser.get(linkoverlay)
soup2=BeautifulSoup(browser.page_source)
with open('emails.txt', 'a') as visitedEmailFile:
try:
pava2=soup2.find('section', {'class': 'pv-contact-info__contact-type ci-email'})
sto=pava2.find('a', {'class': 'pv-contact-info__contact-link link-without-visited-state t-14'}).get('href')
visitedEmailFile.write(str(sto)+'\n')
except:
visitedEmailFile.write('no email found \n')
visitedEmailFile.close()
'''

I personally do not use beautiful soup, but you need maybe to try a little bit more with xpaths.
This works for me :
# driver is already on linkedin profile
contact_infos = []
mails = []
try:
contact = driver.find_element_by_xpath(
'//*[contains(#href,"contact-info")]'
).click() #click on contact-info
except NoSuchElementException:
contact_infos.append(np.nan)
mails.append(np.nan)
else:
contact_info = driver.find_element_by_xpath(
'//*[contains(#class,"pv-contact-info")]')
# save everything from contact info
contact_infos.append(contact_info.text.split('\n'))
print(contact_info.text.split('\n'))
try:
mail = driver.find_element_by_xpath('//*[contains(#class,"mail")]')
except NoSuchElementException:
mails.append(np.nan)
# this closes window
driver.find_element_by_xpath('//*[contains(#type,"cancel-icon")]').click() #this closes window
time.sleep(<rndm>)
else:
mail = [x.strip() for x in mail.text.split('\n')][1]
mails.append(mail)
print(mail)
driver.find_element_by_xpath('//*[contains(#type,"cancel-icon")]').click()
time.sleep(<rndm>)

trying to close popover - python - selenium - Glassdoor

Trying to close a popover while scraping Glassdoor for jobs [It keeps popping up from time to time - need to close it every time].. I've tried quite a few things
Tried closing it by looking for the close button. Please help !
driver.find_element_by_class_name("SVG_Inline modal_closeIcon").click()
Tried looking for a ElementClickInterceptedException when the bot couldn't click on the next company, and everywhere else there was a click
element = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, "SVG_Inline-svg modal_closeIcon-svg")))
element.click()
This is the website:
https://www.glassdoor.co.uk/Job/web-developer-jobs-SRCH_KO0,13.htm
This is the complete code:
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, StaleElementReferenceException
from selenium import webdriver
import time
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def get_jobs(keyword, num_jobs, verbose, place):
'''Gathers jobs as a dataframe, scraped from Glassdoor'''
#Initializing the webdriver
options = webdriver.ChromeOptions()
#Uncomment the line below if you'd like to scrape without a new Chrome window every time.
#options.add_argument('headless')
#Change the path to where chromedriver is in your home folder.
#driver = webdriver.Chrome(executable_path="/Users/omersakarya/Documents/GitHub/scraping-glassdoor-selenium/chromedriver", options=options)
driver = webdriver.Chrome()
driver.set_window_size(1120, 1000)
url = "https://www.glassdoor.co.uk/Job/web-developer-jobs-SRCH_KO0,13.htm"
driver.get(url)
jobs = []
time.sleep(3)
driver.find_element_by_id("onetrust-accept-btn-handler").click()
time.sleep(3)
while len(jobs) < num_jobs: #If true, should be still looking for new jobs.
job_buttons = driver.find_elements_by_class_name("jl") #jl for Job Listing. These are the buttons we're going to click.
try:
for job_button in job_buttons:
if len(jobs) >= num_jobs:
break
print("Progress: {}".format("" + str(len(jobs)) + "/" + str(num_jobs)))
job_button.click()
collected_successfully = False
while not collected_successfully:
try:
company_name = driver.find_element_by_xpath('.//div[#class="employerName"]').text
location = driver.find_element_by_xpath('.//div[#class="location"]').text
job_title = driver.find_element_by_xpath('.//div[contains(#class, "title")]').text
job_description = driver.find_element_by_xpath('.//div[#class="jobDescriptionContent desc"]').text
collected_successfully = True
except:
time.sleep(5)
try:
#salary_estimate = driver.find_element_by_xpath('.//span[#class="gray salary"]').text
salary_estimate = driver.find_element_by_xpath('//*[#id="HeroHeaderModule"]/div[3]/div[1]/div[4]/span').text
except NoSuchElementException:
salary_estimate = -1 #You need to set a "not found value. It's important."
try:
rating = driver.find_element_by_xpath('.//span[#class="rating"]').text
except NoSuchElementException:
rating = -1 #You need to set a "not found value. It's important."
#Printing for debugging
if verbose:
print("Job Title: {}".format(job_title))
print("Salary Estimate: {}".format(salary_estimate))
print("Job Description: {}".format(job_description[:500]))
print("Rating: {}".format(rating))
print("Company Name: {}".format(company_name))
print("Location: {}".format(location))
#Going to the Company tab...
#clicking on this:
#<div class="tab" data-tab-type="overview"><span>Company</span></div>
try:
driver.find_element_by_xpath('.//div[#class="tab" and #data-tab-type="overview"]').click()
try:
#<div class="infoEntity">
# <label>Headquarters</label>
# <span class="value">San Francisco, CA</span>
#</div>
headquarters = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Headquarters"]//following-sibling::*').text
except NoSuchElementException:
headquarters = -1
try:
size = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Size"]//following-sibling::*').text
except NoSuchElementException:
size = -1
try:
founded = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Founded"]//following-sibling::*').text
except (NoSuchElementException, StaleElementReferenceException):
founded = -1
try:
type_of_ownership = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Type"]//following-sibling::*').text
except NoSuchElementException:
type_of_ownership = -1
try:
industry = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Industry"]//following-sibling::*').text
except NoSuchElementException:
industry = -1
try:
sector = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Sector"]//following-sibling::*').text
except NoSuchElementException:
sector = -1
try:
revenue = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Revenue"]//following-sibling::*').text
except NoSuchElementException:
revenue = -1
try:
competitors = driver.find_element_by_xpath('.//div[#class="infoEntity"]//label[text()="Competitors"]//following-sibling::*').text
except NoSuchElementException:
competitors = -1
except (NoSuchElementException,ElementClickInterceptedException,StaleElementReferenceException): #Rarely, some job postings do not have the "Company" tab.
if NoSuchElementException:
time.sleep(1)
headquarters = -1
size = -1
founded = -1
type_of_ownership = -1
industry = -1
sector = -1
revenue = -1
competitors = -1
else:
driver.find_element_by_class_name("selected").click()
driver.find_element_by_class_name("SVG_Inline modal_closeIcon").click()
element = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, "SVG_Inline-svg modal_closeIcon-svg")))
element.click()
pass
if verbose:
print("Headquarters: {}".format(headquarters))
print("Size: {}".format(size))
print("Founded: {}".format(founded))
print("Type of Ownership: {}".format(type_of_ownership))
print("Industry: {}".format(industry))
print("Sector: {}".format(sector))
print("Revenue: {}".format(revenue))
print("Competitors: {}".format(competitors))
print("####################################################")
jobs.append({"Job Title" : job_title,
"Salary Estimate" : salary_estimate,
"Job Description" : job_description,
"Rating" : rating,
"Company Name" : company_name,
"Location" : location,
"Headquarters" : headquarters,
"Size" : size,
"Founded" : founded,
"Type of ownership" : type_of_ownership,
"Industry" : industry,
"Sector" : sector,
"Revenue" : revenue,
"Competitors" : competitors})
#You might
#time.sleep(0.5)
except (ElementClickInterceptedException, StaleElementReferenceException):
alertObj = driver.switch_to.alert
alertObj.accept()
alertObj.dismiss()
driver.find_element_by_class_name("selected").click()
driver.find_element_by_class_name("SVG_Inline modal_closeIcon").click()
element = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, "SVG_Inline-svg modal_closeIcon-svg")))
element.click()
pass
#add job to jobs
#Clicking on the "next page" button
# try:
# driver.find_element_by_xpath('.//li[#class="page"]//a').click()
# except NoSuchElementException:
# print("Scraping terminated before reaching target number of jobs. Needed {}, got {}.".format(num_jobs, len(jobs)))
# break
# time.sleep(5)
try:
driver.find_element_by_xpath('.//li[#class="next"]//a').click()
except (ElementClickInterceptedException):
#print("Scraping terminated before reaching target number of jobs. Needed {}, got {}.".format(num_jobs, len(jobs)))
driver.find_element_by_class_name("selected").click()
driver.find_element_by_class_name("SVG_Inline modal_closeIcon").click()
element = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, "SVG_Inline-svg modal_closeIcon-svg")))
element.click()
element.text
pass
#break
return pd.DataFrame(jobs) #This line converts the dictionary object into a pandas DataFrame.
df = gs.get_jobs(keyword, num_jobs, False, place)
Trying to get rid of this:
enter image description here
[Screenshot of the element I need to close and continue with the loop][2]

I can't export scraped data in CSV

I can't get all data in CSV, only last. When scraping is done only last one scraped is saving CSV file but I want to save from all pages.
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
mainurl = 'https://austin.craigslist.org/search/cta?s=0'
driver.get(mainurl)
res = driver.execute_script("return document.documentElement.outerHTML")
page_soup = BeautifulSoup(res, 'html.parser')
lnk_opener = driver.find_element_by_xpath('//*[#id="sortable-results"]/ul/li[1]/p/a').click()
time.sleep(4)
records = []
i = 1
while i <3:
i+=1
try:
print(driver.current_url)
except Exception:
print('Internet Error Detected')
try:
title = driver.find_element_by_xpath('//*[#id="titletextonly"]').text
print(title)
except Exception:
print('No Title Given')
try:
price = driver.find_element_by_xpath('/html/body/section/section/h2/span/span[2]').text
print(price)
except Exception:
print('No Price Given')
try:
phone = driver.find_element_by_xpath('//*[#id="postingbody"]/h2[1]/big').text
print(phone)
records.append((phone))
except Exception:
print('No Mobile number avalible')
try:
loc = driver.find_element_by_xpath('/html/body/section/section/section/div[1]/div/div[2]').text
print(loc)
except Exception:
print('No Location Data Avalible')
try:
img = page_soup.find('img')
immg = print(img.get('src','\n'))
except Exception:
print('No img Found')
nxtpg = driver.find_element_by_xpath('/html/body/section/section/header/div[1]/div/a[3]')
nxtpg.click()
time.sleep(4)
url = driver.find_element_by_xpath("/html/body/section/section/header/div[1]/div/a[3]").get_attribute("href")
if url == None:
bckbtn = driver.find_element_by_class_name('backup').click()
time.sleep(5)
nextbuttton = driver.find_element_by_xpath('//*[#id="searchform"]/div[3]/div[3]/span[2]/a[3]').click()
time.sleep(6)
print(records)
records.append((driver.current_url, title, price, loc, immg))
df = pd.DataFrame(records, columns=['Product Url', 'Title/Model/Make', 'Price', 'GM Location', 'Image Link'])
print(df)
df.to_csv('zzz.csv')
time.sleep(4)
driver.quit()

I think this line
records.append((driver.current_url, title, price, loc, immg))
should be inside the while statement. Also, move i += 1 to the end of the statement, otherwise you're skipping the first iteration.

StaleElementException while writing a text file in python using selenium webdriver

I am trying to scrape the reviews of a hotel from TripAdvisor and write it in a text file. So far the code is doing good except the fact that every now and then it throws StaleElementException on the line where I am writing the text file. Here's is my code:
for num in range(page_count):
try:
if num != 0:
try:
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.nav.next.rndBtn.ui_button.primary.taLnk")))
#nxt = driver.find_element_by_css_selector("a.nav.next.rndBtn.ui_button.primary.taLnk")
nxt.click()
driver.implicitly_wait(5)
except NoSuchElementException:
driver.refresh()
#driver.implicitly_wait(5)
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.nav.next.rndBtn.ui_button.primary.taLnk")))
nxt.click()
driver.implicitly_wait(5)
try:
more = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
#more = driver.find_element_by_css_selector("span.taLnk.ulBlueLinks")
more.click()
time.sleep(1)
except TimeoutException:
print("There is no 'more' button on page %d" % (num+1))
except WebDriverException:
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
nxt.click()
driver.implicitly_wait(5)
review_result = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'entry')))
with open('New_Review.txt', 'a') as fid:
for review in review_result:
fid.write(unidecode(review.text))
fid.write(sep)
fid.write(line_break)
print ("processing done till page number %d" % (num+1))
except StaleElementReferenceException:
driver.refresh()
driver.implicitly_wait(5)
try:
more = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
#more = driver.find_element_by_css_selector("span.taLnk.ulBlueLinks")
more.click()
except TimeoutException:
print("There is no 'more' button on page %d" % (num+1))
except WebDriverException:
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
nxt.click()
driver.implicitly_wait(5)
review_result = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'entry')))
#print (review_result[1].text)
with open('New_Review.csv', 'a') as fid:
writer = csv.writer(fid, delimiter = ',', line_break = '\n')
for review in review_result:
fid.write(unidecode(review.text))
fid.write(sep)
fid.write(line_break)
print ("processing done till page number %d" % (num+1))
Here is the error:
StaleElementReferenceException: stale element reference: element is
not attached to the page document
The traceback gives this line:
fid.write(unidecode(review.text))
I have already tried to handle the exception but its not working for me and I am having a hard time trying to figure out where exactly am I wrong. Any help is appreciated!

Try creating helper method such as
def get_text(locator):
staled = True
while staled:
try:
return WebDriverWait(driver, 10).until(EC.presence_of_element(locator)).text
except StaleElementReferenceException:
`log something or limit retry to certain times`
then change how you get the text
review_result = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located())
num_of_review - review_result.length
with open('New_Review.txt', 'a') as fid:
for index in range(1, num_of_review):
review_text = get_text((By.XPATH, "//*[#class='entry'][index]"))
fid.write(unidecode(review_text))
fid.write(sep)
fid.write(line_break)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Trying to Scrape Instagram Post Data from .csv with links - For Masters Thesis - python

Related

How can I select the Suburb/City dropdown on Amazon using Selenium and Python?

Linkedin bot Selenium Beautiful Soup not reading email in /overalay/contact-info

trying to close popover - python - selenium - Glassdoor

I can't export scraped data in CSV

StaleElementException while writing a text file in python using selenium webdriver

Categories

Resources