AttributeError: 'unicode' object has no attribute 'sleep' - python

I am getting AttributeError: 'unicode' object has no attribute 'sleep' as specified in the title of this question and I cannot figure out why it is throwing that error message. this is automation script which will automatic post facebook. if someone know what's wrong with my code my tell me. code is here
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import traceback
import string
import xlrd
#taking inputs from user-----------------------------------
delay=input('Set Time: ')
limit=input('Set Post Limit: ')
valuee=limit
count=0
start=0
end=valuee
#----------------------------------------------------------
#open file for taking multiple accounts-------------------
f=open('Accounts.csv')
data=f.readlines()
i=0
for value in data:
values=value.split(',')
password=values[1]
user_name=values[0]
#Condition for next URLs--------------------------
if count>=1:
start=limit
end=start + valuee
limit=end
count+=1
#-----------------------------------------------
chrome_options = webdriver.ChromeOptions() #going to chrome options
chrome_options.add_argument("--start-maximized")
prefs = {"profile.default_content_setting_values.notifications" : 2 #turn off all notifications
,"profile.managed_default_content_settings.images": 2} #disable images
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(chrome_options=chrome_options) # passing paramaters to chrome
#----------------------------------------------------------------------------------------------
try:
driver.get('https://www.facebook.com/')
time.sleep(2)
#puting userName and password---------------------------------------------------------
driver.find_element_by_css_selector('#email').send_keys(user_name)
time.sleep(1)
driver.find_element_by_css_selector('#pass').send_keys(password, Keys.RETURN)
time.sleep(2)
driver.get('https://www.facebook.com/pages/?category=your_pages')
driver.find_element_by_css_selector('._1vgt.ellipsis._349g a').click()
pageUrl=driver.current_url
raw_input('>>')
except:
traceback.print_exc()
raw_input('Something Wrong..! please hit Enter >>\n')
driver.quit()
continue
#file for report-----
report=open('Report/'+user_name+'.csv','w')
#reading file of excel--------------------------------------------------------------------
workbook = xlrd.open_workbook('links.xlsx')
sheet = workbook.sheet_by_index(0)
#print commands:
print '\n******************** Auto Posting Start ********************\n'
#print 'Using Proxy:',proxy
print 'Using Email:',user_name
for row in range(start, end):
driver.get(pageUrl)
time.sleep(2)
try:
#title=value=sheet.cell(row, 0).value
disc=value=sheet.cell(row, 0).value
try:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % 5)
time.sleep(2)
try:
driver.find_element_by_css_selector('._1hib._4bl9').click()
time.sleep(4)
except:
pass
try:
driver.find_element_by_css_selector('.notranslate._5rpu').send_keys(disc)
time.sleep(3)
except:
driver.find_element_by_css_selector('._4h98.navigationFocus').send_keys(disc)
time.sleep(3)
driver.find_element_by_css_selector('._1mf7._4jy0._4jy3._4jy1._51sy.selected._42ft').click()
time.sleep(6)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(4)
#get latest post url---------------------------
for elem in driver.find_elements_by_css_selector('._5pcq'):
try:
tim=elem.find_element_by_css_selector('._5ptz.timestamp.livetimestamp')
time=tim.text
if time=='Just now':
print'great'
href=elem.get_attribute('href')
print href
raw_input('>>')
break
else:
continue
except:
continue
print 'Report Url:',href
report.write(href+'\n')
except:
traceback.print_exc()
pass
time.sleep(delay)
except:
traceback.print_exc()
print'Urls Completed!'
break
report.close()
driver.quit()
print'Thanks For Using..'

This is because you overwrote time with a string in below line
tim=elem.find_element_by_css_selector('._5ptz.timestamp.livetimestamp')
time=tim.text
if time=='Just now':
So time is no more a module it is just a unicode text. You need to rename that variable to something else

Related

Scrapping twitter followers using selenium scroll issue

I have written a script which can scrape followers' usernames. But the issue is I am getting all the usernames in the first attempt but when I try to scroll the page using javascript the page keeps on going down instead of going once and scraping the ids and then going down.Although I am getting date till 34th username but later its just messed up. I am sharing the code here you can use your own username and password to check what is the issue with the code. If you copy paste this code (entering your username and password in the empty string) it will run on your PC completely fine.
import warnings
warnings.filterwarnings('ignore')
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from getpass import getpass
from time import sleep
chrome_options = Options()
driver=webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options)
driver.maximize_window()
website='https://twitter.com/i/flow/login'
driver.get(website)
print('website getting')
sleep(5)
username = driver.find_element_by_xpath('//input[#name="text"]')
username.send_keys('')
print('username running')
username.send_keys(Keys.RETURN)
sleep(3)
password = driver.find_element_by_xpath('//input[#name="password"]')
print('password running')
sleep(2)
password.send_keys('')
password.send_keys(Keys.RETURN)
website='https://twitter.com/MehroozW/followers'
driver.get(website)
print('got it')
import warnings
warnings.filterwarnings('ignore')
import csv
data = []
tweet_ids = set()
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
count = 1
i = 1
while scrolling:
for i in range(1,190):
try:
follower_username = driver.find_element_by_xpath(f'//div[#data-testid="primaryColumn"]/div[1]/section/div[1]/div[1]/div[{count}]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/a/div[1]/div[1]/span').text
print('index', count, follower_username)
data.append(follower_username)
count +=1
sleep(1)
except Exception:
pass
scroll_attempt = 0
while True:
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(2)
curr_position = driver.execute_script("return window.pageYOffset;")
print('curr_position',curr_position, last_position, scroll_attempt, scrolling)
if last_position == curr_position:
scroll_attempt += 1
# end of scroll region
if scroll_attempt >= 3:
scrolling = False
break
else:
sleep(2) # attempt another scroll
else:
last_position = curr_position
break
data

How to go to next page until the last page in Python Selenium when scraping website?

Image is for CSS selector and xpath for pagination.
I also wanted to perform a regex in to to separate Apple, iPhone 12, Neo Galactic Silver like this I wanted to print it in new line.
After finishing the product list of this current page, I want to be able to click next and perform the same procedure with the products on the next page.
This is the problem: when it reaches the 10 items of the current page, I have no idea how to change to another page and start all over again.
import xlwt
from selenium import webdriver
import re
import time
class cometmobiles:
def __init__(self):
self.url='https://www.mediaworld.it/catalogo/telefonia/smartphone-e-cellulari/smartphone'
def comet(self):
try:
driver=webdriver.Chrome()
driver.get(self.url)
time.sleep(5)
cookies = driver.find_element_by_id("onetrust-accept-btn-handler")
cookies.click()
print("accepted cookies")
driver.maximize_window()
print("window maximized")
mylist = []
hasNextPate = True
while hasNextPate:
containers = []
containters =driver.find_elements_by_css_selector('article[class="product clearfix p-list-js"]')
for container in containters:
#Title
try:
title = container.find_element_by_css_selector('h3[class="product-name"]').text
print(title)
except:
pass
#price
try:
price = container.find_element_by_css_selector('span[class="price mw-price enhanced"]').text
print(price)
except:
pass
try:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)
nxt=driver.find_elements_by_css_selector('span[class="pages"] a')
time.sleep(5)
nxt.click()
except:
break
except:
pass
comets=cometmobiles()
comets.comet()
Instead of this part
try:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)
nxt=driver.find_elements_by_css_selector('span[class="pages"] a')
time.sleep(5)
nxt.click()
except:
break
You can use this and also if the page number doesn't exist website turn the main page so you should add
try:
x=0
while True:
x+=1
driver.get(url+"?pageNumber="+str(x)) #Get the next page
if driver.current_url == url: #If there is no next page it will turn main page and you can break at this time
break
except:
pass

scraping with selenium cant click on clickable text

I am trying to scrape some data from yahoo finance, for each stock, I want to get the historical data. Taking the Apple stock. I should go to https://finance.yahoo.com/quote/AAPL/history?p=AAPL and choose "MAX" from "Time Period". so
I believe the script I wrote so far is getting the date element, but somehow clicking on it to be able to choose "MAX" is not working.
here is my whole script:
# using linux here
project_path = os.getcwd()
driver_path = project_path + "/" + "chromedriver"
yahoo_finance = "https://finance.yahoo.com/quote/"
driver = webdriver.Chrome(driver_path)
def get_data(symbol='AAPL'):
stock_history_link = yahoo_finance + symbol + '/history?p=' + symbol
driver.get(stock_history_link)
date_picker = '//div[contains(#class, "D(ib)") and contains(#class, "Pos(r)") and contains(#class, "Cur(p)")' \
'and contains(#class, "O(n):f")]'
try:
print("I am inside")
date_picker_2 = "//div[#class='Pos(r) D(ib) O(n):f Cur(p)']"
date_picker_element = driver.find_element_by_xpath(date_picker_2)
print("date_picker_element: ", date_picker_element)
date_picker_element.click()
try:
print("I will be waiting for the date")
my_dropdown = WebDriverWait(driver, 100).until(
EC.presence_of_element_located((By.ID, 'dropdown-menu'))
)
print(my_dropdown)
print("I am not waiting anymore")
except TimeoutException as e:
print("wait timed out")
print(e)
except WebDriverException:
print("Something went wrong while trying to pick the max date")
if __name__ == '__main__':
try:
get_data()
except:
pass
# finally:
# driver.quit()
To click the button with Max just open it up and target it.
driver.get("https://finance.yahoo.com/quote/AAPL/history?p=AAPL")
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//span[#class='C($linkColor) Fz(14px)']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#data-value='MAX']"))).click()
Element:
<button class="Py(5px) W(45px) Fz(s) C($tertiaryColor) Cur(p) Bd Bdc($seperatorColor) Bgc($lv4BgColor) Bdc($linkColor):h Bdrs(3px)" data-value="MAX"><span>Max</span></button>
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
You have the wrong xpath for the date_picker_2:
date_picker_2 = '//*[#id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/div[1]/div/div/div/span'
Using requests:
import requests
import datetime
end = int(datetime.datetime.strptime(datetime.date.today().isoformat(), "%Y-%m-%d").timestamp())
url = f"https://finance.yahoo.com/quote/AAPL/history?period1=345427200&period2={end}&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true"
requests.get(url)
Gets you to the same end page.

How to stop selenium scraper from redirecting to another internal weblink of the scraped website?

Was wondering if anyone knows of a way for instructing a selenium script to avoid visiting/redirecting to an internal page that wasn't part of the code. Essentially, my code opens up this page:
https://cryptwerk.com/companies/?coins=1,6,11,2,3,8,17,7,13,4,25,29,24,32,9,38,15,30,43,42,41,12,40,44,20
keeps clicking on show more button until there's none (at end of page) - which by then - it should have collected the links of all the products listed on the page it scrolled through till the end, then visit each one respectively.
What happens instead, it successfully clicks on show more till the end of the page, but then it visits this weird promotion page of the same website instead of following each of the gathered links respectively and then scraping further data points located off each of those newly opened ones.
In a nutshell, would incredibly appreciate it if someone can explain how to avoid this automated redirection on its own! And this is the code in case someone can gratefully nudge me in the right direction :)
from selenium.webdriver import Chrome
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException
import json
import selenium.common.exceptions as exception
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
webdriver = '/Users/karimnabil/projects/selenium_js/chromedriver-1'
driver = Chrome(webdriver)
driver.implicitly_wait(5)
url = 'https://cryptwerk.com/companies/?coins=1,6,11,2,3,8,17,7,13,4,25,29,24,32,9,38,15,30,43,42,41,12,40,44,20'
driver.get(url)
links_list = []
coins_list = []
all_names = []
all_cryptos = []
all_links = []
all_twitter = []
all_locations = []
all_categories = []
all_categories2 = []
wait = WebDriverWait(driver, 2)
sign_in = driver.find_element_by_xpath("//li[#class='nav-item nav-guest']/a")
sign_in.click()
time.sleep(2)
user_name = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#name='login']")))
user_name.send_keys("karimnsaber95#gmail.com")
password = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#name='password']")))
password.send_keys("PleomaxCW#2")
signIn_Leave = driver.find_element_by_xpath("//div[#class='form-group text-center']/button")
signIn_Leave.click()
time.sleep(3)
while True:
try:
loadMoreButton = driver.find_element_by_xpath("//button[#class='btn btn-outline-primary']")
time.sleep(2)
loadMoreButton.click()
time.sleep(2)
except exception.StaleElementReferenceException:
print('stale element')
break
print('no more elements to show')
try:
company_links = driver.find_elements_by_xpath("//div[#class='companies-list items-infinity']/div[position() > 3]/div[#class='media-body']/div[#class='title']/a")
for link in company_links:
links_list.append(link.get_attribute('href'))
except:
pass
try:
with open("links_list.json", "w") as f:
json.dump(links_list, f)
with open("links_list.json", "r") as f:
links_list = json.load(f)
except:
pass
try:
for link in links_list:
driver.get(link)
name = driver.find_element_by_xpath("//div[#class='title']/h1").text
try:
show_more_coins = driver.find_element_by_xpath("//a[#data-original-title='Show more']")
show_more_coins.click()
time.sleep(1)
except:
pass
try:
categories = driver.find_elements_by_xpath("//div[contains(#class, 'categories-list')]/a")
categories_list = []
for category in categories:
categories_list.append(category.text)
except:
pass
try:
top_page_categories = driver.find_elements_by_xpath("//ol[#class='breadcrumb']/li/a")
top_page_categories_list = []
for category in top_page_categories:
top_page_categories_list.append(category.text)
except:
pass
coins_links = driver.find_elements_by_xpath("//div[contains(#class, 'company-coins')]/a")
all_coins = []
for coin in coins_links:
all_coins.append(coin.get_attribute('href'))
try:
location = driver.find_element_by_xpath("//div[#class='addresses mt-3']/div/div/div/div/a").text
except:
pass
try:
twitter = driver.find_element_by_xpath("//div[#class='links mt-2']/a[2]").get_attribute('href')
except:
pass
try:
print('-----------')
print('Company name is: {}'.format(name))
print('Potential Categories are: {}'.format(categories_list))
print('Potential top page categories are: {}'.format(top_page_categories_list))
print('Supporting Crypto is:{}'.format(all_coins))
print('Registered location is: {}'.format(location))
print('Company twitter profile is: {}'.format(twitter))
time.sleep(1)
except:
pass
all_names.append(name)
all_categories.append(categories_list)
all_categories2.append(top_page_categories_list)
all_cryptos.append(all_coins)
all_twitter.append(twitter)
all_locations.append(location)
except:
pass
df = pd.DataFrame(list(zip(all_names, all_categories, all_categories2, all_cryptos, all_twitter, all_locations)), columns=['Company name', 'Categories1', 'Categories2', 'Supporting Crypto', 'Twitter Handle', 'Registered Location'])
CryptoWerk_Data = df.to_csv('CryptoWerk4.csv', index=False)
Redirect calls happen for two reasons, in your case either by executing some javascript code when clicking the last time on the load more button or by receiving an HTTP 3xx code, which is the least likely in your case.
So you need to identify when this javascript code is executed and send an ESC_KEY before it loads and then executing the rest of your script.
You could also scrape the links and append them to your list before clicking the load more button and each time it is clicked, make an if statement the verify the link of the page you're in, if it is that of the promotion page then execute the rest of your code, else click load more.
while page_is_same:
scrape_elements_add_to_list()
click_load_more()
verify_current_page_link()
if current_link_is_same != link_of_scraped_page:
page_is_same = False
# rest of the code here

NameError: name 'attach_and_send_screenshot' is not defined** in python

used to sent attachment through selenium, using self._attach_and_send_screenshot() funtion to autogenertation.
Enter anything after scanning QR code
Traceback (most recent call last):
File "wht.py", line 21, in
attach_and_send_screenshot()
NameError: name 'attach_and_send_screenshot' is not defined
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, ElementNotVisibleException
from urllib.parse import quote_plus
driver = webdriver.Chrome()
driver.get('https://web.whatsapp.com/')
all_names = ['Anas Cse']
msg = 'testing'
count = 1
input('Enter anything after scanning QR code')
for name in all_names:
user = driver.find_element_by_xpath('//span[#title = "{}"]'.format(name))
user.click()
msg_box = driver.find_element_by_class_name('_2S1VP')
for i in range(count):
self._attach_and_send_screenshot()
# msg_box.send_keys(msg)
# button = driver.find_element_by_class_name('_2lkdt')
# button.click()
def _attach_and_send_screenshot(self):
# TODO - ElementNotVisibleException - this shouldn't happen but when would it
# local variables for x_path elements on browser
attach_xpath = '//*[#id="main"]/header/div[3]/div/div[2]/div'
send_file_xpath = '//*[#id="app"]/div/div/div[1]/div[2]/span/div/span/div/div/div[2]/span[2]/div/div'
if self.attachment_type == "img":
attach_type_xpath = '//*[#id="main"]/header/div[3]/div/div[2]/span/div/div/ul/li[1]/input'
elif self.attachment_type == "cam":
attach_type_xpath = '//*[#id="main"]/header/div[3]/div/div[2]/span/div/div/ul/li[2]/button'
elif self.attachment_type == "doc":
attach_type_xpath = '//*[#id="main"]/header/div[3]/div/div[2]/span/div/div/ul/li[3]/input'
try:
# open attach menu
attach_btn = driver.find_element_by_xpath(attach_xpath)
attach_btn.click()
# Find attach file btn and send screenshot path to input
time.sleep(1)
attach_img_btn = driver.find_element_by_xpath(attach_type_xpath)
# TODO - might need to click on transportation mode if url doesn't work
attach_img_btn.send_keys(os.getcwd() + "/screenshot.png") # get current script path + img_path
time.sleep(1)
send_btn = driver.find_element_by_xpath(send_file_xpath)
send_btn.click()
# close attach menu
time.sleep(1)
attach_btn = driver.find_element_by_xpath(attach_xpath)
attach_btn.click()
except (NoSuchElementException, ElementNotVisibleException) as e:
print(str(e))
send_message((str(e)))
send_message("Bot failed to retrieve search content, try again...")
def send_message(msg):
whatsapp_msg = driver.find_element_by_class_name('_2S1VP')
whatsapp_msg.send_keys(msg)
whatsapp_msg.send_keys(Keys.ENTER)
move the function definitions before your main logic. You are trying to call a function that has not been defined yet

Categories

Resources