I'm making a bot to play Nitro Type for me. I'm using Pyautogui typewrite() to type out the text but it is way slower than it should be. Without a time.sleep() added to it, it is only typing at 111 WPM when. Then when I add the delay which is set to 0.019546153826607692 it gets 70 I'm pretty sure it is my fault but I don't know what I did wrong any help would be appreciated. Oh and here is the code for that part of it.
for i in range(1):
driver.get('https://www.nitrotype.com/race')
time.sleep(5)
html = driver.page_source.replace(' ', ' ')
f = open("word.html", "w")
f.write(html)
f.close()
with open("word.html", "r") as html_file:
content = html_file.read()
soup = BeautifulSoup(content, 'lxml')
words = soup.find_all('span', class_='dash-letter')
stuff = ""
for span in words:
if span.text.isascii():
stuff += span.text
with open("Sentence.txt", "w") as wf:
wf.write(stuff)
wf.close()
e = open('Sentence.txt', 'r')
s = e.read()
Words = len(s.split())
Characters = len(s)
delay1 = (WPM * Characters) / Words
delay2 = delay1 / 60
delay3 = Characters / delay2
Delay = (delay3 / Characters)
print(Delay)
for word in s:
pyautogui.typewrite(word)
time.sleep(Delay)
Related
So I am developing a nitro type bot and I don't see a reason I should be getting an error but around 20 races in it is going directly to sending me an email. Am I use the statements wrong or what I am new to python so if it is really simple and a dumb mistake plz be nice and if it is something you could easily find on the web im really sorry.
try:
time.sleep(4)
driver.get('https://www.nitrotype.com/garage')
driver.implicitly_wait(20)
driver.find_element_by_css_selector('a.btn--light:nth-child(2)').click()
time.sleep(5)
driver.find_element_by_css_selector('button.btn--primary').click()
driver.implicitly_wait(10)
driver.find_element_by_css_selector('.dash-copyContainer')
time.sleep(4.25)
html = driver.page_source.replace(' ', ' ')
f = open("word.html", "w")
f.write(html)
f.close()
with open("word.html", "r") as html_file:
content = html_file.read()
soup = BeautifulSoup(content, 'lxml')
words = soup.find_all('span', class_='dash-letter')
stuff = ""
for span in words:
if span.text.isascii():
stuff += span.text
with open("Sentence.txt", "w") as wf:
wf.write(stuff)
wf.close()
e = open('Sentence.txt', 'r')
s = e.read()
Words = (s.split())
Delay = ((len(s.split()) / WPM) * 60)
int(Delay)
Delay1 = Delay / len(s.split())
for Word in Words:
pyautogui.typewrite(Word + " ")
time.sleep(Delay1)
time.sleep(2)
driver.get('https://www.nitrotype.com/garage')
except:
time.sleep(4)
driver.get('https://www.nitrotype.com/garage')
driver.implicitly_wait(20)
driver.find_element_by_css_selector('a.btn--light:nth-child(2)').click()
time.sleep(5)
driver.find_element_by_css_selector('button.btn--primary').click()
driver.implicitly_wait(10)
driver.find_element_by_css_selector('.dash-copyContainer')
time.sleep(4.25)
html = driver.page_source.replace(' ', ' ')
f = open("word.html", "w")
f.write(html)
f.close()
with open("word.html", "r") as html_file:
content = html_file.read()
soup = BeautifulSoup(content, 'lxml')
words = soup.find_all('span', class_='dash-letter')
stuff = ""
for span in words:
if span.text.isascii():
stuff += span.text
with open("Sentence.txt", "w") as wf:
wf.write(stuff)
wf.close()
e = open('Sentence.txt', 'r')
s = e.read()
Words = (s.split())
Delay = ((len(s.split()) / WPM) * 60)
int(Delay)
Delay1 = Delay / len(s.split())
for Word in Words:
pyautogui.typewrite(Word + " ")
time.sleep(Delay1)
time.sleep(2)
driver.get('https://www.nitrotype.com/garage')
finally:
driver1 = webdriver.Chrome(executable_path='/Users/Braeden/Downloads/chromedriver.exe')
driver1.get('https://accounts.google.com/ServiceLogin/signinchooser?service=mail&passive=true&rm=false&continue=https%3A%2F%2Fmail.google.com%2Fmail%2F&ss=1&scc=1<mpl=default<mplcache=2&emr=1&osid=1&flowName=GlifWebSignIn&flowEntry=ServiceLogin')
time.sleep(2)
driver1.find_element_by_css_selector('#identifierId')\
.send_keys(EU)
time.sleep(2)
driver1.find_element_by_css_selector('.VfPpkd-vQzf8d').click()
time.sleep(2)
driver1.find_element_by_css_selector('<div class="VfPpkd-RLmnJb"></div>')\
.send_keys(EP)
time.sleep(1)
driver1.find_element_by_css_selector('.VfPpkd-LgbsSe-OWXEXe-k8QpJ > span:nth-child(2)').click()
time.sleep(2)
driver1.find_element_by_css_selector('.VfPpkd-LgbsSe-OWXEXe-k8QpJ > div:nth-child(1)').click()
time.sleep(2)
driver1.find_element_by_css_selector('.T-I-KE').click()
time.sleep(2)
driver1.find_element_by_css_selector('#\:c1')\
.send_keys(TO)
driver1.find_element_by_css_selector('#\:co')\
.send_keys('Nitro type requires Captcha')
driver1.find_element_by_css_selector('#\:b9').click()
driver1.close()
input('Did you complete the captcha:')
Running a program in cmd; the print function
with open('test1.csv', 'wb') as csv_file:
writer = csv.writer(csv_file)
for index, url in enumerate(URL_LIST):
page = requests.get(url)
print '\r' 'Scraping URL ' + str(index+1) + ' of ' + str(len(URL_LIST)),
if text2search in page.text:
tree = html.fromstring(page.content)
(title,) = (x.text_content() for x in tree.xpath('//title'))
(price,) = (x.text_content() for x in tree.xpath('//div[#class="property-value__price"]'))
(sold,) = (x.text_content().strip() for x in tree.xpath('//p[#class="property-value__agent"]'))
writer.writerow([title, price, sold])
Which returns: Scraping URL 1 of 400
Over and over till count ends.
What i'm trying to learn today, is printing 2 outcomes on 2 separate lines, over and over till loop ends.
Example:
Scraping URL 1 of 400 Where bold character is only thing changing
Then if the scraper finds a result in the list;
Adding Result 1 to CSV Where bold character is only thing changing
So far i have tried a few print commands, but it either overwrites the entire sentence on the same line;
with open('test1.csv', 'wb') as csv_file:
writer = csv.writer(csv_file)
for index, url in enumerate(URL_LIST):
page = requests.get(url)
print '\r' 'Scraping URL ' + str(index+1) + ' of ' + str(len(URL_LIST)),
if text2search in page.text:
tree = html.fromstring(page.content)
(title,) = (x.text_content() for x in tree.xpath('//title'))
(price,) = (x.text_content() for x in tree.xpath('//div[#class="property-value__price"]'))
(sold,) = (x.text_content().strip() for x in tree.xpath('//p[#class="property-value__agent"]'))
writer.writerow([title, price, sold])
print '\r' 'URL_FOUND' + str(index+1) + 'adding to CSV',
If i try to link to two print functions to an else argument, it will only print the first statement and the second is not acknowledged.
with open('test1.csv', 'wb') as csv_file:
writer = csv.writer(csv_file)
for index, url in enumerate(URL_LIST):
page = requests.get(url)
print '\r' 'Scraping URL ' + str(index+1) + ' of ' + str(len(URL_LIST)),
else:
if text2search in page.text:
tree = html.fromstring(page.content)
(title,) = (x.text_content() for x in tree.xpath('//title'))
(price,) = (x.text_content() for x in tree.xpath('//div[#class="property-value__price"]'))
(sold,) = (x.text_content().strip() for x in tree.xpath('//p[#class="property-value__agent"]'))
writer.writerow([title, price, sold])
print '\n' 'title'
Just wondering if anyone could point me in the right direction for printing two outcomes on 2 lines.
Full code below if required:
import requests
import csv
import datetime
import pandas as pd
import csv
from lxml import html
df = pd.read_excel("C:\Python27\Projects\REA_SCRAPER\\REA.xlsx", sheetname="REA")
dnc = df['Property']
dnc_list = list(dnc)
url_base = "https://www.realestate.com.au/property/"
URL_LIST = []
for nd in dnc_list:
nd = nd.strip()
nd = nd.lower()
nd = nd.replace(" ", "-")
URL_LIST.append(url_base + nd)
text2search = '''RECENTLY SOLD'''
with open('test1.csv', 'wb') as csv_file:
writer = csv.writer(csv_file)
for index, url in enumerate(URL_LIST):
page = requests.get(url)
print '\r' 'Scraping URL ' + str(index+1) + ' of ' + str(len(URL_LIST)),
if text2search in page.text:
tree = html.fromstring(page.content)
(title,) = (x.text_content() for x in tree.xpath('//title'))
(price,) = (x.text_content() for x in tree.xpath('//div[#class="property-value__price"]'))
(sold,) = (x.text_content().strip() for x in tree.xpath('//p[#class="property-value__agent"]'))
writer.writerow([title, price, sold])
I would have recommended curses, but you're on Windows and just writing what appears to be a small script; reason enough to not go down that rabbit hole.
The reason you are seeing your lines overwrite each other is because you are printing carriage returns \r, which moves the cursor to the start of the line. Any text written thereafter will overwrite previous printed text.
I found this with a quick Google, which may be of interest to you.
While coding an image downloader I was testing some functions and I want to store all the links in links.txt but it only write one link and in the run window it shows all the links that has been founded please help me to fix this problem, and also if the problem fixed I want to know how many links are there, I tried some function but it doesn't seem to work like I wanted to and thank you so much!
Here's the code so far:
# import random
# import urllib.request
import requests
from bs4 import BeautifulSoup
def Download_Image_from_Web(url):
# name = random.randrange(1, 1000)
# fullName = str(name) + ".jpg"
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for link in soup.findAll('img'):
image_links = link.get('src')
if '.jpg' in image_links:
raw_text = r'links.txt'
fw = open(raw_text, 'w')
for i in image_links.split("\\n"):
fw.write(i+'\n')
fw.close()
fr = open('links.txt', 'r')
text = fr.read()
print(text)
Download_Image_from_Web("https://pixabay.com/")
Below is the program based on original sample using with context for reference.
import requests
from bs4 import BeautifulSoup
def Download_Image_from_Web(url):
# name = random.randrange(1, 1000)
# fullName = str(name) + ".jpg"
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, "html.parser")
raw_text = 'links.txt'
with open(raw_text, 'w') as fw:
for link in soup.findAll('img'):
image_links = link.get('src')
if '.jpg' in image_links:
for i in image_links.split("\\n"):
fw.write(i+'\n')
with open(raw_text, 'r') as fr:
text = fr.read()
# print(text)
print("Total {} images found:\n{}".format(len(text.splitlines()), text))
Download_Image_from_Web("https://pixabay.com/")
EDIT1: Remove the description on previous with sample in first paragraph.
EDIT2: Add image counts in output.
You have to open the file to write in append mode note in write mode.
Basically write mode overwrites the file so your code will be
fw = open(raw_text, 'a') #this opens file in append mode
for i in image_links.split("\\n"):
fw.write(i+'\n')
fw.close()
fr = open('links.txt', 'r')
text = fr.read() #this prints all the written content for each line
print(text)
You can print the entire written content at the end like this
fw = open(raw_text, 'a') #this opens file in append mode
for i in image_links.split("\\n"):
fw.write(i+'\n')
fw.close()
fr = open('links.txt', 'r')
text = fr.read() #this prints all the written content at the end.
print(text)
If you need count of images then you can use len method on image_links.split("\\n") so in that case your code will be
fw = open(raw_text, 'a') #this opens file in append mode
Images= image_links.split("\\n")
print "no of images = ",len(Images)
for i in Images:
fw.write(i+'\n')
fw.close()
fr = open('links.txt', 'r')
text = fr.read() #this prints all the written content at the end
print(text)
I'm currently developing a web crawler that works through a list of urls I have stored in a queue file, I need my Spider to scrape all words from these url pages before it moves onto the next link in the queue, I need a point in the right direction for setting it up so that web scraper compares to my common.txt to make sure the word isn't in there and if it isn't already in the list before adding it etc.
I had tried something like this with get_keywords in my spider.py but it isn't doing anything I may be missing something simple as I've been coding all day but anyway here is my code
Spider.py
from Gen_info import *
class Spider:
project_name = ''
queue_file = ''
crawled_file = ''
keyword_file = ''
queue = set()
crawled = set()
def __init__(self, project_name):
Spider.project_name = project_name
Spider.queue_file = Spider.project_name + '/Chrome_Hist.csv'
Spider.crawled_file = Spider.project_name + '/CrawledUrls.txt'
self.boot()
#self.crawl_page('First spider', Spider.queue)
# Creates directory and files for project on first run and starts the spider
#staticmethod
def boot():
create_project_dir(Spider.project_name)
create_files(Spider.project_name)
Spider.queue = file_to_set(Spider.queue_file)
Spider.crawled = file_to_set(Spider.crawled_file)
# Updates user display, fills queue and updates files
#staticmethod
def crawl_page(thread_name, page_url):
if page_url not in Spider.crawled:
print(thread_name + ' now crawling ' + page_url)
print('Queue ' + str(len(Spider.queue)) + ' | Crawled ' + str(len(Spider.crawled)))
Spider.queue.remove(page_url)
Spider.crawled.add(page_url)
Spider.update_files()
#staticmethod
def update_files():
set_to_file(Spider.queue, Spider.queue_file)
set_to_file(Spider.crawled, Spider.crawled_file)
#staticmethod
def get_keywords(Page_words):
common = open("Common_words.txt").read().split('\n')
word_dict = {}
word_list = Page_words.lower().split()
for word in word_list:
if word not in common and word.isalnum():
if word not in word_dict:
word_dict[word] = 1
if word in word_dict:
word_dict[word] += 1
main.py
import threading
from Queue import Queue
from Spider import Spider
from Gen_info import *
import urllib2
from bs4 import BeautifulSoup
from shutil import copyfile
import os
PROJECT_NAME = 'History Forensics'
QUEUE_FILE = PROJECT_NAME + '/Chrome_Hist.csv'
CRAWLED_FILE = PROJECT_NAME + '/CrawledUrls.txt'
NUMBER_OF_THREADS = 2
Queue = Queue()
Spider(PROJECT_NAME)
keywords = ''
src = 'C:\Users\Lewis Collins\Python Project\ChromeDBs\Chrome_Hist.csv'
dst = PROJECT_NAME
path = 'C:\Users\Lewis Collins\Python Project\ChromeDBs\Chrome_Hist.csv'
# Create worker threads (will die when main exits)
def create_workers():
for _ in range(NUMBER_OF_THREADS):
t = threading.Thread(target=work)
t.daemon = True
t.start()
# Do the next job in the queue
def work():
while True:
url = Queue.get()
Spider.crawl_page(threading.current_thread().name, url)
Queue.task_done()
# Each queued link is a new job
def create_jobs():
for link in file_to_set(QUEUE_FILE):
Queue.put(link)
Queue.join()
crawl()
# Check if there are items in the queue, if so crawl them
def crawl():
queued_links = file_to_set(QUEUE_FILE)
if len(queued_links) > 0:
print(str(len(queued_links)) + ' links in the queue')
create_jobs()
def get_keywords():
common_words = open('File_Storage/common.txt', 'r').readlines()
keywords=open(PROJECT_NAME + '/keywords.txt', 'r').read().split('\n')
f = open(PROJECT_NAME + '/keywords.txt', 'a')
urls = file_to_set(QUEUE_FILE)
Hist_queue = urls
for i in Hist_queue:
html_content = urllib2.urlopen(i).read()
soup = BeautifulSoup(html_content)
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = '\n'.join(chunk for chunk in chunks if chunk)
(text.encode('utf-8'))
visible_text = soup.getText()
words = visible_text.split(' ')
for word in words:
if word not in common_words and word not in keywords and word.isalnum():
f.write(word + '\n')
keywords.append(word)
else:
continue
#copyfile(src, dst)
#
# os.remove(path)
create_workers()
get_keywords()
crawl()
Any questions about how it works fire away or any other code you may need to see
thanks in advance everyone
def get_keywords():
common_words = open('File_Storage/common.txt', 'r').readlines()
keywords=open(PROJECT_NAME + '/keywords.txt', 'r').read().split('\n')
f = open(PROJECT_NAME + '/keywords.txt', 'a')
urls = file_to_set(QUEUE_FILE)
Hist_queue = urls
for i in Hist_queue:
html_content = urllib2.urlopen(i).read()
soup = BeautifulSoup(html_content)
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = '\n'.join(chunk for chunk in chunks if chunk)
(text.encode('utf-8'))
visible_text = soup.getText()
words = visible_text.split(' ')
for word in words:
if word not in common_words and word not in keywords and word.isalnum():
f.write(word + '\n')
keywords.append(word)
else:
continue
I have currently this:
def download_dropbox(url, pre_file_name):
file = url[42:]
file = file[:-5]
file_name = pre_file_name + file
print('Downloading from ' + url + ' to ' + file_name)
print(file)
u = urllib.request.urlopen(url)
data = u.read()
u.close()
with open(file_name, "wb") as f:
f.write(data)
print('Download Completed from ' + url + ' and saved to ' + file_name)
This basically downloads files from dropbox and saves it to a directory. However I want to be able to have some sort of text progress bar like:
[==== ]50%
OR
50%
The hard part i would think is doing it with any external modules like the loading bar module, etc. Also, as the title states, I need it in python 3. Thank-you.
Edit:
Thanks to Martin Evans for the data read while loop and progress bar here is the end result of the code:
#Get the total number of bytes of the file to download before downloading
print ("opening url:", url)
u = urllib.request.urlopen(url)
meta = u.info()
print(str(meta).split())
metaInfo = str(meta).split()
print(len(metaInfo))
print ("Content-Length:" + metaInfo[46] + " bytes")
fileTotalbytes=int(metaInfo[46])
data_blocks = []
# total = int(metaInfo[46])
total=0
while True:
block = u.read(1024)
data_blocks.append(block)
total += len(block)
hash = ((60*total)//fileTotalbytes)
print("[{}{}] {}%".format('#' * hash, ' ' * (60-hash), int(total/fileTotalbytes*100)), end="\r")
if not len(block):
break
data=b''.join(data_blocks) #had to add b because I was joining bytes not strings
u.close()
with open('test.zip', "wb") as f:
f.write(data)
To answer your main question, how to make a text progress bar, you could use something like the following to give you an idea:
import time
for n in range(1,101):
hash = ((60*n)//100)
print("[{}{}] {}%".format('#' * hash, ' ' * (60-hash), n), end="\r")
time.sleep(0.05)
This would give you the following:
[########################### ] 45%
Your main problem though is that there is no obvious way to determine how many bytes will eventually be downloaded unless you already know the exact size of the item being downloaded beforehand. If you control the server end then you could arrange for the length to be obtained before starting.
You can though start by at least converting your read() line to something like the following:
u = urllib.request.urlopen(url)
data_blocks = []
total = 0
while True:
block = fd.read(1024)
data_blocks.append(block)
total += len(block)
print("Downloaded {} bytes".format(total), end="\r")
if not len(block):
break
data = "".join(data_blocks)
u.close()
By doing it this way, you read it a bit at a time and can then provide feedback.
You can use print with \r at the start to go to the start of the line and write over the previous text (so you need to write spaces if you want to clear a character). Here's a simple example:
from time import sleep
x = 0
while x < 20:
print('\r' + '.' * x, end="")
x += 1
sleep(0.1)