I am trying to create a python script that complets th form on this page http://segas.gr/index.php/el/2015-best-athlete by selecting the radio button with label "Γιώργος Μηλιαράς (ΣΑΚΑ) 800 μ./1,500 μ./3,000 μ" aka id="male_kids_3".
Here is my code:
import urllib
import urllib2
import webbrowser
url = "http://segas.gr/index.php/el/2015-best-athlete"
data = urllib.urlencode({'male_kids_3': 'checked'})
results = urllib2.urlopen(url, data)
with open("results.html", "w") as f:
f.write(results.read())
webbrowser.open("results.html")
I found a solution using selinium
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
def malakia():
#Get url
browser.get("http://segas.gr/index.php/el/2015-best-athlete")
miliaras = browser.find_element_by_id("male_kids_3")
miliaras.click()
validate = browser.find_element_by_name("input_submit_4")
validate.click()
if __name__ == "__main__": #Because we are bad-ass and we know python
#Let's make sme magiKKK
times = int(input("Πόσες φορές θέλεις να ψηφίσεις τον G #babas ??\n"))
#create brwoser object
browser = webdriver.Chrome()
for i in range(times):
malakia()
Related
I was trying to automate a post to Facebook using Python Selenium, and it was 90% complete. The only issue is that the string I give is "test," but when Facebook posts, it just sends the first character of "test," which is "t."
This is the code:
#libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
from time import sleep
import pyautogui
#fetching hashtags
def hashtags(hash_idea):
url = 'http://best-hashtags.com/hashtag/' + hash_idea
try:
req = Request(url, headers={'User-Agent' : 'Mozilla/5.0'})
page = urlopen(req, timeout=10)
page_html = page.read()
page.close()
page_soup = soup(page_html, 'html.parser')
result = page_soup.find('div',{'class':'tag-box tag-box-v3 margin-bottom-40'})
tags = result.decode()
start_index = tags.find('#')
end_index = tags.find('</p1>')
tags = tags[start_index:end_index]
return tags
except:
print('Something went wrong While Fetching hashtags')
def login(username, password):
try:
url = 'https://facebook.com'
driver.get(url)
user = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'email')))
user.send_keys(username)
pas = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'pass')))
pas.send_keys(password)
login_btn = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME,'login')))
login_btn.click()
except:
print('Something went wrong while login process')
def upload(img_path,caption):
try:
btn1 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[3]/div/div[2]/div/div/div/div[1]/div/div[1]')))
btn1.click()
btn2= WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[1]/div[2]/div/div[1]/div/span/div/div/div[1]/div/div/div[1]/i')))
btn2.click()
btn3 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/div/div/div[1]/div/i')))
btn3.click()
pyautogui.write(img_path)
pyautogui.press('enter')
cap = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/div/div/div[1]')))
cap.send_keys(caption)
sleep(5) # this is mandatory while doing some thing with bot
btn_post = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[2]/div/div/div[1]/div')))
btn_post.click()
except:
print('Something Went Wrong While posting the image or video')
if __name__== "__main__":
#turn for credentials, driver, and caption
username = input('username : ')
password = input('pass : ')
img_path = 'pic1.jpg'
hash_idea = 'covid'
caption = 'test' # if you want to
caption = caption + '\n' + hashtags(hash_idea)
driver = webdriver.Firefox(executable_path="C:/Users/Asus/Downloads/Compressed/geckodriver-v0.32.0-win64/geckodriver.exe")
login(username,password)
upload(img_path,caption)
I wanted to automate the post with the text I provided in the code.
You can try several alternatives
In the definition of cap replace presence_of_element_located with element_to_be_clickable.
Do what in 1. and moreover add
cap = ...
cap.clear()
cap.click()
cap.send_keys(caption)
Do what in 1. and moreover use ActionChains
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
cap = ...
actions.move_to_element(cap) # move the mouse to the middle of element
actions.click()
actions.send_keys(caption).perform()
If none works, then you can always send one character at a time
[cap.send_keys(c) for c in caption]
I know questions very similar to mine have been asked many times, but I have just about reviewed them all and cannot solve my own code, so I'm hoping someone has the answer.
I'm trying to loop through csv downloads and append them together inside a user-defined function that calls a for loop. I've added in print lines so that I can see where the function fails. The function prints 1, 2, 3 and the returns dataframe df that is empty.
Why is the for loop getting skipped and the df returning empty? When run outside of the user-designed function, it works perfectly.
Thanks in advance!
# LoadPackages
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import pandas as pd
# ================================================== download spp tie flows
# set directories
directory = r"E:/Data/sophie/line vision/sources/spp public/downloaded/"
driverPath = r"/Users/sophi/Downloads/chromedriver_win32/chromedriver.exe"
# define urls
page_url = "https://marketplace.spp.org/pages/historical-tie-flow"
prefix_download_url = ("https://marketplace.spp.org/file-browser-api/download/" +
"historical-tie-flow?path=%2F")
xpath = "//*[#id='main-content']/div/div/div[2]/div/div[3]/div/ul/li/a/span[#class='fname name' and contains(text(), '2021')]"
driver = webdriver.Chrome(ChromeDriverManager().install())
def download_and_append(page_url, prefix_download_url, xpath) :
driver.get(page_url)
print(1)
# create empty dataframe to append to
df = pd.DataFrame()
print(2)
# retrieve data from page
elements = driver.find_elements(By.XPATH, xpath)
print(3)
for element in elements:
index = element.text.split()[0]
print(index)
data = pd.read_csv(prefix_download_url + index)
print(4)
# clean dataframe and concatenate to df
df = pd.concat([df, data])
print(5)
return df
hourly_tie_flows_2021 = download_and_append(page_url, prefix_download_url, xpath)
hourly_tie_flows_2021
# ========================================================== export data
hourly_tie_flows_2021.to_csv(directory + "/spp_tie_flows_by_ba_2021.csv")`
Short answer (add a sleep to let the javascript load completely) took 205.64 seconds to complete:
from time import sleep
sleep(2)
elements = driver.find_elements(By.XPATH, xpath)
A longer answer (Only use Selenium to get the urls. Use requests and concurrent.futures to download the files concurrently) took 35.08 seconds to complete:
import os
from concurrent.futures import ProcessPoolExecutor
from io import StringIO
from time import sleep
import pandas as pd
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
class Scrape:
def __init__(self, **kwargs):
self.year = kwargs.get("year")
self.urls = self.get_urls()
self.output_file_path = r"E:/Data/sophie/line vision/sources/spp public/downloaded/"
self.driver_path = r"/Users/sophi/Downloads/chromedriver_win32/chromedriver.exe"
def get_urls(self) -> list:
root_url = "https://marketplace.spp.org"
page_url = f"{root_url}/pages/historical-tie-flow"
download_url = f"{root_url}/file-browser-api/download/historical-tie-flow?path="
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(page_url)
sleep(2)
elements = driver.find_elements(By.XPATH, f"//*[contains(text(),'{self.year}')]")
return [f"{download_url}{x.text}" for x in elements]
def process_urls(self) -> None:
with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
(pd
.concat(executor.map(self.download_data, self.urls))
.sort_values("GMTTIME")
.reset_index(drop=True)
).to_csv(f"{self.output_file_path}/spp_tie_flows_by_ba_{self.year}.csv")
#staticmethod
def download_data(url: str) -> pd.DataFrame:
with requests.Session() as request:
response = request.get(url)
if response.status_code != 200:
print(response.raise_for_status())
return pd.read_csv(StringIO(response.text), sep=",")
if __name__ == "__main__":
Scrape(year=2021).process_urls()
the code below retrieve all the data from the url below. But I want to save the results in the output file text. like (info.text), Would you mind help me.
url: https://www150.statcan.gc.ca/n1/pub/71-607-x/2021004/exp-eng.htm?r1=(1)&r2=0&r3=0&r4=12&r5=0&r7=0&r8=2022-02-01&r9=2022-02-01
from selenium import webdriver
import time
url = "https://www150.statcan.gc.ca/n1/pub/71-607-x/2021004/exp-eng.htm?r1=(1)&r2=0&r3=0&r4=12&r5=0&r7=0&r8=2022-02-01&r9=2022-02-01"
driver = webdriver.Chrome("C:\Program Files\Python310\chromedriver.exe")
driver.get(url)
table = driver.find_element_by_id('report_table')
body = table.find_element_by_tag_name('tbody')
cells = body.find_elements_by_tag_name('td')
for cell in cells:
print(cell.text)
# run the loop 26 times
for i in range(26):
# your code
table = driver.find_element_by_id('report_table')
body = table.find_element_by_tag_name('tbody')
cells = body.find_elements_by_tag_name('td')
for cell in cells:
print(cell.text)
# click on the Next button
driver.find_element_by_xpath('//*[#id="report_results_next"]').click()
If you want to write the results from a variable (seems like cell.text in your example), we just need to open a file and write that data to the file.
with open("info.txt", “w”, encoding = 'utf-8') as f:
f.write(cell.txt)
This is making an assumption that cell.text is a string, by the way
This may help, you can read further about file handling here
from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
import time
options = Options()
options.add_experimental_option("w3c", True)
options = webdriver.ChromeOptions()
f = open("datafile.txt", "a")
url = "https://www150.statcan.gc.ca/n1/pub/71-607-x/2021004/exp-eng.htm?r1=(1)&r2=0&r3=0&r4=12&r5=0&r7=0&r8=2022-02-01&r9=2022-02-01"
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.get(url)
table = driver.find_element_by_id('report_table')
body = table.find_element_by_tag_name('tbody')
cells = body.find_elements_by_tag_name('td')
for cell in cells:
#print(cell.text)
f.write(cell.text)
# run the loop 26 times
for i in range(26):
# your code
table = driver.find_element_by_id('report_table')
body = table.find_element_by_tag_name('tbody')
cells = body.find_elements_by_tag_name('td')
for cell in cells:
#print(cell.text)
f.write(cell.text)
# click on the Next button
driver.find_element_by_xpath('//*[#id="report_results_next"]').click()
f.close()
On a Youtube channel, I'm trying to get a list of videos listed in the channel. (i.e. link, title, view, etc)
Yet, my code doesn't return any object. Any help will be appreciated!
from bs4 import BeautifulSoup as bs
import requests
from selenium.webdriver import Chrome
import re
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
address = "https://www.youtube.com/channel/UCepWEz3BW6EMKA4CU-yGDMw/videos"
#driver = webdriver.Chrome('./chromedriver')
#driver.get(address)
#driver.maximize_window()
#body = driver.find_element_by_css_selector('body')
#for i in range(250):
# body.send_keys(Keys.PAGE_DOWN)
# time.sleep(1)
r = requests.get(address)
page = r.text
soup=bs(page,'html.parser')
result=soup.find_all('div', attrs={"class": 'videoId'})
print(result)
Try it:
from selenium import webdriver
url = "https://www.youtube.com/channel/UCepWEz3BW6EMKA4CU-yGDMw/videos"
browser = webdriver.Firefox()
browser.get(url)
datas = browser.find_elements_by_css_selector(".ytd-grid-renderer")
result = {"title":[], "link":[], "views":[]}
for data in datas:
try:
title = data.find_element_by_css_selector("#video-title").text
result["title"].append(title)
except:
result["title"].append("")
try:
link = data.find_element_by_css_selector("#video-title").get_attribute("href")
result["link"].append(link)
except:
result["link"].append("")
try:
views = data.find_element_by_css_selector("#metadata-line .ytd-grid-video-renderer:nth-child(1)").text
result["views"].append(views)
except:
result["views"].append("")
# print(result)
browser.close()
The link is below:
https://www.doximity.com/sign_ups/9e016f85-d589-4cdf-8240-09c356d4434f/edit?sign_up[user_attributes][firstname]=Jian&sign_up[user_attributes][lastname]=Cui
I need to pull the occupation and its corresponding speciality.
But my code only works on pulling occupations.
import requests, bs4
r = requests.get('https://www.doximity.com/sign_ups/9e016f85-d589-4cdf-8240-09c356d4434f/edit?sign_up[user_attributes][firstname]=Jian&sign_up[user_attributes][lastname]=Cui')
soup = bs4.BeautifulSoup(r.text, 'lxml')
spec = soup.find_all('select')
for sub in spec:
print (sub.text)
Please give me some ideas.
Check below code and let me know in case of any issues:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
driver = webdriver.Chrome()
url = 'https://www.doximity.com/sign_ups/9e016f85-d589-4cdf-8240-09c356d4434f/edit?sign_up[user_attributes][firstname]=Jian&sign_up[user_attributes][lastname]=Cui'
driver.get(url)
spec = driver.find_element_by_id("sign_up_user_attributes_credential_id")
for sub in spec.find_elements_by_xpath('./option | ./optgroup/option'):
if sub.get_attribute('value') != '':
print(sub.text)
selected_spec = Select(driver.find_element_by_id("sign_up_user_attributes_credential_id"))
selected_spec.select_by_visible_text(sub.text)
time.sleep(0.5)
occup = driver.find_element_by_xpath('//select[#id="sign_up_user_attributes_user_professional_detail_attributes_specialty_id"]')
for oc in occup.find_elements_by_xpath('./option'):
if oc.text != '' and oc.get_attribute('value') != '':
print(oc.text)