So I am developing a nitro type bot and I don't see a reason I should be getting an error but around 20 races in it is going directly to sending me an email. Am I use the statements wrong or what I am new to python so if it is really simple and a dumb mistake plz be nice and if it is something you could easily find on the web im really sorry.
try:
time.sleep(4)
driver.get('https://www.nitrotype.com/garage')
driver.implicitly_wait(20)
driver.find_element_by_css_selector('a.btn--light:nth-child(2)').click()
time.sleep(5)
driver.find_element_by_css_selector('button.btn--primary').click()
driver.implicitly_wait(10)
driver.find_element_by_css_selector('.dash-copyContainer')
time.sleep(4.25)
html = driver.page_source.replace(' ', ' ')
f = open("word.html", "w")
f.write(html)
f.close()
with open("word.html", "r") as html_file:
content = html_file.read()
soup = BeautifulSoup(content, 'lxml')
words = soup.find_all('span', class_='dash-letter')
stuff = ""
for span in words:
if span.text.isascii():
stuff += span.text
with open("Sentence.txt", "w") as wf:
wf.write(stuff)
wf.close()
e = open('Sentence.txt', 'r')
s = e.read()
Words = (s.split())
Delay = ((len(s.split()) / WPM) * 60)
int(Delay)
Delay1 = Delay / len(s.split())
for Word in Words:
pyautogui.typewrite(Word + " ")
time.sleep(Delay1)
time.sleep(2)
driver.get('https://www.nitrotype.com/garage')
except:
time.sleep(4)
driver.get('https://www.nitrotype.com/garage')
driver.implicitly_wait(20)
driver.find_element_by_css_selector('a.btn--light:nth-child(2)').click()
time.sleep(5)
driver.find_element_by_css_selector('button.btn--primary').click()
driver.implicitly_wait(10)
driver.find_element_by_css_selector('.dash-copyContainer')
time.sleep(4.25)
html = driver.page_source.replace(' ', ' ')
f = open("word.html", "w")
f.write(html)
f.close()
with open("word.html", "r") as html_file:
content = html_file.read()
soup = BeautifulSoup(content, 'lxml')
words = soup.find_all('span', class_='dash-letter')
stuff = ""
for span in words:
if span.text.isascii():
stuff += span.text
with open("Sentence.txt", "w") as wf:
wf.write(stuff)
wf.close()
e = open('Sentence.txt', 'r')
s = e.read()
Words = (s.split())
Delay = ((len(s.split()) / WPM) * 60)
int(Delay)
Delay1 = Delay / len(s.split())
for Word in Words:
pyautogui.typewrite(Word + " ")
time.sleep(Delay1)
time.sleep(2)
driver.get('https://www.nitrotype.com/garage')
finally:
driver1 = webdriver.Chrome(executable_path='/Users/Braeden/Downloads/chromedriver.exe')
driver1.get('https://accounts.google.com/ServiceLogin/signinchooser?service=mail&passive=true&rm=false&continue=https%3A%2F%2Fmail.google.com%2Fmail%2F&ss=1&scc=1<mpl=default<mplcache=2&emr=1&osid=1&flowName=GlifWebSignIn&flowEntry=ServiceLogin')
time.sleep(2)
driver1.find_element_by_css_selector('#identifierId')\
.send_keys(EU)
time.sleep(2)
driver1.find_element_by_css_selector('.VfPpkd-vQzf8d').click()
time.sleep(2)
driver1.find_element_by_css_selector('<div class="VfPpkd-RLmnJb"></div>')\
.send_keys(EP)
time.sleep(1)
driver1.find_element_by_css_selector('.VfPpkd-LgbsSe-OWXEXe-k8QpJ > span:nth-child(2)').click()
time.sleep(2)
driver1.find_element_by_css_selector('.VfPpkd-LgbsSe-OWXEXe-k8QpJ > div:nth-child(1)').click()
time.sleep(2)
driver1.find_element_by_css_selector('.T-I-KE').click()
time.sleep(2)
driver1.find_element_by_css_selector('#\:c1')\
.send_keys(TO)
driver1.find_element_by_css_selector('#\:co')\
.send_keys('Nitro type requires Captcha')
driver1.find_element_by_css_selector('#\:b9').click()
driver1.close()
input('Did you complete the captcha:')
Related
I'm on working on my tool.
So I have this function :
import subprocess, os, platform, ctypes, requests, random, threading
from bs4 import BeautifulSoup as bs
temptotal = 0
totalurl = 0
retry = 0
load = 0
load2 = 0
loaded = 0
dorksdone = 0
tempourl = 0
#Import Proxy List
selecting = 1
while selecting == 1:
try:
option = int(input("Choose Type Proxy(1 = http, 2=socks4, 3 = socks5) :")
except:
option = 404
if option == 1:
selecting = 0
prox = 'http'
proxyyyy = 'http'
elif option == 2:
selecting = 0
prox = 'socks4'
proxyyyy = 'socks4'
elif option == 3:
selecting = 0
prox = 'socks5'
proxyyyy = 'socks5'
else:
print("Choose valid numbre such as 1, 2 or 3!")
proxy_list = input("Give me Proxylist :" )
with open(proxy_list, mode="r", encoding="utf-8") as mf:
for line in mf:
load2 += 1
print(" ")
print("Total Proxy loaded :" + str(load2))
print(" ")
#import keywordfile
dorkslist = input("Give me KeywordList/Dorklist :" + bcolors.ENDC + " ")
with open(dorkslist, mode="r", encoding="utf-8") as mf:
for line in mf:
load += 1
mf.close()
print(" ")
print("Total Dorks loaded:" + str(load))
print(" ")
#define url to check
yahoourl = {"https://fr.search.yahoo.com/search?p=&fr=yfp-search-sb",
"https://fr.search.yahoo.com/search?p=&fr=yfp-search-sb&b=11&pz=10"}
#funtion i want to speed up
def checker():
global temptotal
global loaded
global dorksdone
global tempourl
proxy = set()
with open(proxy_list, "r") as f:
file_lines1 = f.readlines()
for line1 in file_lines1:
proxy.add(line1.strip())
with open(dorkslist, mode="r",encoding="utf-8") as my_file:
for line in my_file:
loaded += 1
threading.Thread(target=titre).start()
indorks = line
encode = requote_uri(indorks)
for yahoo in yahoourl:
yahooo = yahoo.replace("&fr",encode + "&fr")
try:
proxies = {
'http': prox+'://'+random.choice(list(proxy))
}
r = requests.get(yahooo, proxies=proxies)
print("Dorks used :" + indorks )
dorksdone += 1
soup = bs(r.text, 'html.parser')
links = soup.find_all('a')
for link in soup.find_all('a'):
a = link.get('href')
unquote(a)
temptotal += 1
with open("Bing.txt", mode="a",encoding="utf-8") as fullz:
fullz.write(a + "\n")
fullz.close()
lines_seen = set() # holds lines already seen
outfile = open("Bingnodup.txt", "w", encoding="utf-8")
for line in open("Bing.txt", "r", encoding="utf-8"):
if line not in lines_seen: # not a duplicate
outfile.write(line)
lines_seen.add(line)
outfile.close()
with open("Bingnodup.txt", mode="r", encoding="utf-8") as cool:
for url in cool:
try:
proxies = {
'http': prox+'://'+random.choice(list(proxy))
}
response = requests.get(url, proxies=proxies)
save = response.url
with open("Bingtemp.txt", mode="a", encoding="utf-8") as cool1:
cool1.write(save + "\n")
tempourl += 1
cool1.close()
except:
pass
except:
raise
fin()
#start bot
bot1 = threading.Thread(target=checker)
bot1.start()
bot1.join()
Exemple file for Keyword:
python
wordpress
Exemple file for proxy(http so take 1 on choice) :
46.4.96.137:8080
223.71.167.169:80
219.248.205.117:3128
198.24.171.34:8001
51.158.123.35:9999
But this function when running is very very very slow, could who let me know how i can give boost to this function ?
Because i have try to use this topic: How can I use threading in Python?
But i didn't understand how to build in into the right way for my function.
Your script is what's called I/O bound. What this means is that it is not slow because the CPU needs to perform long computations, but because it needs to wait a lot every time it requests a URL (the bottleneck are the requests to the internet).
For concurrency you have 3 options:
asyncio
threading
multiprocessing
The first two are the ones which can help you in I/O bound problems like yours. The first one is the recommended approach in a problem like this, since there is a library available with support for async/await.
This is an adapted example from the above link, which does exactly what you need:
import asyncio
import time
import aiohttp
def get_proxies():
if platform.system() == "Linux":
clear = lambda: os.system('clear')
clear()
if platform.system() == "Windows":
clear = lambda: os.system('cls')
clear()
proxy = set()
with open("proxy.txt", "r") as f:
file_lines1 = f.readlines()
for line1 in file_lines1:
proxy.add(line1.strip())
return proxy
async def download_site(session, url, proxies):
async with session.get(url, proxies=proxies) as response:
save = response.url
with open("Yahootemp.txt", mode="a", encoding="utf-8") as cool1:
cool1.write(save + "\n")
async def download_all_sites(sites, proxies):
async with aiohttp.ClientSession() as session:
tasks = []
for url in sites:
task = asyncio.ensure_future(download_site(session, url, proxies))
tasks.append(task)
await asyncio.gather(*tasks, return_exceptions=True)
if __name__ == "__main__":
proxies = get_proxies()
proxies = {
'http': prox + '://' + random.choice(list(proxies))
}
sites = []
with open("Yahoonodup.txt", mode="r", encoding="utf-8") as cool:
for url in cool:
sites.append(url)
asyncio.get_event_loop().run_until_complete(download_all_sites(sites, proxies))
You could make it even faster if saving the files seems to still be too slow; read this.
I want to try to make realtime scrapes which have separate intervals. For example, the last data I scrape is T = 1 then it will loop once every 6 hours, T = 2 then it will loop 1 hour once and T = 3 then it will loop once every 1 minute.
But after I thought about the logic, I was confused about how to implement it.
where I make T as a reference, here is an example of the data.
[1]: https://i.stack.imgur.com/H427J.png
I will try to share the code snippet that I made.
headers = ["Year", "Month", "Day", "Hour", "Minute", "Second", "T", "Height"]
page = requests.get('https://www.ndbc.noaa.gov/station_page.php?station=52406')
soup = BeautifulSoup(page.text, 'html.parser')
datas = []
dt = soup.find_all('textarea')[0].text
datas = dt.split('\n')[2:-1]
#membaca scrape to to array dan membaca data ke6
arr = []
arr = np.array([datas])
def listToString(s):
str1 = ""
for ele in s:
str1 += ele
return str1
coba = []
for item_list in arr:
item_string = listToString(item_list)
coba.append(item_string.split()[6])
print(coba)
#-----------------------------------------------
#perulangan interval data T
while True:
if coba[0] == 1:
while True:
page = requests.get('https://www.ndbc.noaa.gov/station_page.php?station=52406')
soup = BeautifulSoup(page.text, 'html.parser')
datas1 = []
dt = soup.find_all('textarea')[0].text
datas1 = dt.split('\n')[2:-1]
with open("52406.csv", "w") as f:
writer = csv.writer(f, lineterminator="\n")
writer.writerow(headers)
for line in soup.select_one("#data").text.split("\n"):
if re.fullmatch(r"[\d. ]{30}", line) and len(line.split()) == len(headers):
writer.writerow(line.split())
print('Data 1')
addDate()
insertSQL()
time.sleep(3600)
break
if coba[0] == 2:
while True:
page = requests.get('https://www.ndbc.noaa.gov/station_page.php?station=52406')
soup = BeautifulSoup(page.text, 'html.parser')
datas1 = []
dt = soup.find_all('textarea')[0].text
datas1 = dt.split('\n')[2:-1]
with open("52406.csv", "w") as f:
writer = csv.writer(f, lineterminator="\n")
writer.writerow(headers)
for line in soup.select_one("#data").text.split("\n"):
if re.fullmatch(r"[\d. ]{30}", line) and len(line.split()) == len(headers):
writer.writerow(line.split())
print('Data 1')
addDate()
insertSQL()
time.sleep(3600)
break
if coba[0] == 3:
while True:
page = requests.get('https://www.ndbc.noaa.gov/station_page.php?station=52406')
soup = BeautifulSoup(page.text, 'html.parser')
datas1 = []
dt = soup.find_all('textarea')[0].text
datas1 = dt.split('\n')[2:-1]
with open("52406.csv", "w") as f:
writer = csv.writer(f, lineterminator="\n")
writer.writerow(headers)
for line in soup.select_one("#data").text.split("\n"):
if re.fullmatch(r"[\d. ]{30}", line) and len(line.split()) == len(headers):
writer.writerow(line.split())
print('Data 1')
addDate()
insertSQL()
time.sleep(3600)
break
I'm making a bot to play Nitro Type for me. I'm using Pyautogui typewrite() to type out the text but it is way slower than it should be. Without a time.sleep() added to it, it is only typing at 111 WPM when. Then when I add the delay which is set to 0.019546153826607692 it gets 70 I'm pretty sure it is my fault but I don't know what I did wrong any help would be appreciated. Oh and here is the code for that part of it.
for i in range(1):
driver.get('https://www.nitrotype.com/race')
time.sleep(5)
html = driver.page_source.replace(' ', ' ')
f = open("word.html", "w")
f.write(html)
f.close()
with open("word.html", "r") as html_file:
content = html_file.read()
soup = BeautifulSoup(content, 'lxml')
words = soup.find_all('span', class_='dash-letter')
stuff = ""
for span in words:
if span.text.isascii():
stuff += span.text
with open("Sentence.txt", "w") as wf:
wf.write(stuff)
wf.close()
e = open('Sentence.txt', 'r')
s = e.read()
Words = len(s.split())
Characters = len(s)
delay1 = (WPM * Characters) / Words
delay2 = delay1 / 60
delay3 = Characters / delay2
Delay = (delay3 / Characters)
print(Delay)
for word in s:
pyautogui.typewrite(word)
time.sleep(Delay)
I have written a script that loops and prints the results, I am trying to add in saving to CSV, however I can't figure out how to do it.
I have got the code to save it which works on my other scripts but either it only prints one line, or prints 3 lines (one for each loop.
How do I do it so it prints all results?
This is the code I am working with
from selenium import webdriver
import time
browser = webdriver.Firefox(executable_path="/Users/**/Downloads/geckodriver")
browser.get('https://www.tripadvisor.co.uk/Restaurants-g186338-zfn29367-London_England.html#EATERY_OVERVIEW_BOX')
meci = browser.find_elements_by_class_name('listing')
filename ="scrape1.1.csv"
f = open(filename, 'w')
headers ="Title, URL, Rating\n "
f.write("")
while True:
try:
meci = browser.find_elements_by_class_name('listing')
for items in meci:
title_cont = items.find_element_by_class_name('property_title')
title = title_cont.text
href = title_cont.get_attribute('href')
rating = items.find_element_by_class_name('ui_bubble_rating')
ratingbubble = rating.get_attribute('alt').replace(' of 5 bubbles', '')
print(title)
print(href)
print(ratingbubble)
time.sleep(3)
browser.find_element_by_css_selector('.next').click()
time.sleep(3)
except:
break
f.write(title + "," + href + "," + ratingbubble + "\n")
f.close()
browser.quit()
try this
from selenium import webdriver
import time
browser = webdriver.Firefox(executable_path="C:/Py/pythonv4/gecko/geckodriver")
browser.get('https://www.tripadvisor.co.uk/Restaurants-g186338-zfn29367-
London_England.html#EATERY_OVERVIEW_BOX')
meci = browser.find_elements_by_class_name('listing')
filename ="scrape1.1.csv"
f = open(filename, 'w')
headers ="Title, URL, Rating\n "
f.write("")
while True:
try:
meci = browser.find_elements_by_class_name('listing')
for items in meci:
title_cont = items.find_element_by_class_name('property_title')
title = title_cont.text
href = title_cont.get_attribute('href')
rating = items.find_element_by_class_name('ui_bubble_rating')
ratingbubble = rating.get_attribute('alt').replace(' of 5 bubbles', '')
print(title)
print(href)
print(ratingbubble)
f.write(title + "," + href + "," + ratingbubble + "\n")
time.sleep(5)
browser.find_element_by_css_selector('.next').click()
time.sleep(1)
except:
break
f.close()
browser.quit()
I am learning web-scraping but while formatting the scraped data I came across a problem that my two variables i.e. first_line and second_line are both showing same value and that value is of second_line.
Inside the else when I tried printing out first_line then I got expected result but outside if and else first_line is showing copied value from second_line
while current_page < 201:
print(current_page)
url = base_url + loc + "&start=" + str(current_page)
yelp_r = requests.get(url)
yelp_soup = BeautifulSoup(yelp_r.text, 'html.parser')
file_path = 'yelp-{loc}-2.txt'.format(loc=loc)
with open(file_path, "a") as textfile:
business = yelp_soup.findAll('div',{'class':'biz-listing-large'})
for biz in business:
title = biz.findAll('a', {'class':'biz-name'})[0].text
print(title)
second_line = ""
first_line = ""
try:
address = biz.findAll('address')[0].contents
for item in address:
if "br" in str(item):
second_line = second_line + item.getText()
else:
first_line = item.strip(" \n\t\r")
print(first_line)
print(first_line)
print(second_line)
except:
pass
print('\n')
try:
phone = biz.findAll('span',{'class':'biz-phone'})[0].text
except:
phone = None
print(phone)
page_line = "{title}\n{address_1}\n{address_2}\n{phone}".format(
title=title,
address_1=first_line,
address_2=second_line,
phone=phone
)
textfile.write(page_line)
current_page += 10
If you call .get_text() on a node, it gives you the full text. You can then split on newline to get your first and second line:
first_line, second_line = biz.findAll('address')[0].get_text().split('\n')
But since you just print f'{first_line}\n{second_line}', why do you need them separate at all?