I am trying to make the below short code to work. My focus is in fetch_historical_yahoo which seems not to be working. I am trying to use it in a larger code.
import datetime
import matplotlib.finance as finance
import matplotlib.mlab as mlab
startdate = datetime.date(2005,1,1)
today = enddate = datetime.date.today()
ticker = 'nvda'
fh = finance.fetch_historical_yahoo(ticker, startdate, enddate)
r = mlab.csv2rec(fh); fh.close()
r.sort()
When I run the code, I get the below error. When I go and open finance.py I can't seem to be able to place my finger on the url issue.
Any ideas?
I tried mlp_finance but I was nopt able to install it.
fh = finance.fetch_historical_yahoo(ticker, startdate, enddate)
Traceback (most recent call last):
File "<ipython-input-61-e83eb3d28a19>", line 1, in <module>
fh = finance.fetch_historical_yahoo(ticker, startdate, enddate)
File "C:\Users\dvargas\Anaconda3\lib\site-packages\matplotlib\finance.py", line 362, in fetch_historical_yahoo
with contextlib.closing(urlopen(url)) as urlfh:
File "C:\Users\dvargas\Anaconda3\lib\urllib\request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\dvargas\Anaconda3\lib\urllib\request.py", line 466, in open
response = self._open(req, data)
File "C:\Users\dvargas\Anaconda3\lib\urllib\request.py", line 484, in _open
'_open', req)
File "C:\Users\dvargas\Anaconda3\lib\urllib\request.py", line 444, in _call_chain
result = func(*args)
File "C:\Users\dvargas\Anaconda3\lib\urllib\request.py", line 1282, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\dvargas\Anaconda3\lib\urllib\request.py", line 1256, in do_open
raise URLError(err)
URLError: <urlopen error [Errno 11004] getaddrinfo failed>
I had to use a workaround.
from urllib.request import urlopen
from bs4 import BeautifulSoup as bs
def get_historical_data(name, number_of_days):
data = []
url = "https://finance.yahoo.com/quote/" + name + "/history/"
rows = bs(urlopen(url).read()).findAll('table')[0].tbody.findAll('tr')
for each_row in rows:
divs = each_row.findAll('td')
if divs[1].span.text != 'Dividend': #Ignore this row in the table
#I'm only interested in 'Open' price; For other values, play with divs[1 - 5]
data.append({'Date': divs[0].span.text, 'Open': float(divs[1].span.text.replace(',',''))})
return data[:number_of_days]
#Test
for i in get_historical_data('googl', 5):
print(i)
Related
I've made a very basic share price prediction script which essentially uses linear regression. Not accurate, but it's proof of concept for a uni project.
I want to add these prices to my website using the new PyScript Library. However, when i run the code on my website, I get this error:
PythonError: Traceback (most recent call last): File "/lib/python3.10/asyncio/futures.py", line 201, in result raise self._exception File "/lib/python3.10/asyncio/tasks.py", line 232, in __step result = coro.send(None) File "/lib/python3.10/site-packages/_pyodide/_base.py", line 506, in eval_code_async await CodeRunner( File "/lib/python3.10/site-packages/_pyodide/_base.py", line 357, in run_async coroutine = eval(self.code, globals, locals) File "", line 6, in File "/lib/python3.10/site-packages/pandas/util/_decorators.py", line 311, in wrapper return func(*args, **kwargs) File "/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 680, in read_csv return _read(filepath_or_buffer, kwds) File "/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 575, in _read parser = TextFileReader(filepath_or_buffer, **kwds) File "/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 933, in init self._engine = self._make_engine(f, self.engine) File "/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 1217, in _make_engine self.handles = get_handle( # type: ignore[call-overload] File "/lib/python3.10/site-packages/pandas/io/common.py", line 670, in get_handle ioargs = _get_filepath_or_buffer( File "/lib/python3.10/site-packages/pandas/io/common.py", line 339, in _get_filepath_or_buffer with urlopen(req_info) as req: File "/lib/python3.10/site-packages/pandas/io/common.py", line 239, in urlopen return urllib.request.urlopen(*args, **kwargs) File "/lib/python3.10/urllib/request.py", line 216, in urlopen return opener.open(url, data, timeout) File "/lib/python3.10/urllib/request.py", line 519, in open response = self._open(req, data) File "/lib/python3.10/urllib/request.py", line 541, in _open return self._call_chain(self.handle_open, 'unknown', File "/lib/python3.10/urllib/request.py", line 496, in _call_chain result = func(*args) File "/lib/python3.10/urllib/request.py", line 1419, in unknown_open raise URLError('unknown url type: %s' % type) urllib.error.URLError:
This solution worked absolutely fine in pyCharm, so im assunming it's something to do with PyScript?
I'll link the code below.
<py-script>
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
urlAAPL = "https://raw.githubusercontent.com/Dan168/-DanCreates/main/AAPL.csv"
dataAAPL = pd.read_csv(urlAAPL)
XAAPL = dataAAPL.iloc[:, 0].values.reshape(-1, 1)
YAAPL = dataAAPL.iloc[:, 1].values.reshape(-1, 1)
urlAMZN = "https://raw.githubusercontent.com/Dan168/-DanCreates/main/AMZN.csv"
dataAMZN = pd.read_csv(urlAMZN)
XAMZN = dataAMZN.iloc[:, 0].values.reshape(-1, 1)
YAMZN = dataAMZN.iloc[:, 1].values.reshape(-1, 1)
urlTSLA = "https://raw.githubusercontent.com/Dan168/-DanCreates/main/TSLA.csv"
dataTSLA = pd.read_csv(urlTSLA)
XTSLA = dataTSLA.iloc[:, 0].values.reshape(-1, 1)
YTSLA = dataTSLA.iloc[:, 1].values.reshape(-1, 1)
urlNIO = "https://raw.githubusercontent.com/Dan168/-DanCreates/main/NIO.csv"
dataNIO = pd.read_csv(urlNIO)
XNIO = dataNIO.iloc[:, 0].values.reshape(-1, 1)
YNIO = dataNIO.iloc[:, 1].values.reshape(-1, 1)
def Predict(X, Y, Name):
lr = LinearRegression()
lr.fit(X, Y)
Y_pred = lr.predict(X)
oneDay = 127
oneWeek = 134
oneMonth = 156
print(str(Name))
print("Price prediction one day day from now: ")
print(lr.predict(np.array([oneDay]).reshape(-1, 1)))
print("Price prediction one week day from now: ")
print(lr.predict(np.array([oneWeek]).reshape(-1, 1)))
print("Price prediction one month day from now: ")
print(lr.predict(np.array([oneMonth]).reshape(-1, 1)))
Predict(XAAPL, YAAPL, "AAPL")
Predict(XNIO, YNIO, "NIO")
Predict(XTSLA, YTSLA, "TSLA")
Predict(XAMZN, YAMZN, "AMZN")
</py-script>
Ran the code in PyCharm and it worked absolutely fine. Ran fine on my website before when it was just checking one CSV. When I added the method and more stocks to check, this is when I encounted the error - hence making me thing it is something to do with the method??
Thanks in advance
I was trying to encode in scraping part and decode in cleaning but I was having same code or HTTP 400 code, I run code on Spyder 3.7, also tried Jupyter 3 Python. I rea a lot about it online, was trying apply
So my error:
runfile('C:/Users/sound/Desktop/dataPlot.py', wdir='C:/Users/sound/Desktop')
Traceback (most recent call last):
File "<ipython-input-248-b48d632496ca>", line 1, in <module>
runfile('C:/Users/sound/Desktop/dataPlot.py', wdir='C:/Users/sound/Desktop')
File "C:\Users\sound\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 786, in runfile
execfile(filename, namespace)
File "C:\Users\sound\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/sound/Desktop/dataPlot.py", line 16, in <module>
jobs = sns.load_dataset(needToPlot)
File "C:\Users\sound\Anaconda3\lib\site-packages\seaborn\utils.py", line 428, in load_dataset
urlretrieve(full_path, cache_path)
File "C:\Users\sound\Anaconda3\lib\urllib\request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Users\sound\Anaconda3\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\sound\Anaconda3\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Users\sound\Anaconda3\lib\urllib\request.py", line 543, in _open
'_open', req)
File "C:\Users\sound\Anaconda3\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\sound\Anaconda3\lib\urllib\request.py", line 1360, in https_open
context=self._context, check_hostname=self._check_hostname)
File "C:\Users\sound\Anaconda3\lib\urllib\request.py", line 1317, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "C:\Users\sound\Anaconda3\lib\http\client.py", line 1229, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\sound\Anaconda3\lib\http\client.py", line 1240, in _send_request
self.putrequest(method, url, **skips)
File "C:\Users\sound\Anaconda3\lib\http\client.py", line 1107, in putrequest
self._output(request.encode('ascii'))
UnicodeEncodeError: 'ascii' codec can't encode character '\u0117' in position 1760: ordinal not in range(128)
And my code:
I've started with scraping part to gather data about IT positions and Salary from popular jobspage.
#Scraping part
import requests
import pandas as pd
import seaborn as sns
from bs4 import BeautifulSoup
url = 'https://www.cvbankas.lt/?padalinys%5B0%5D=76&page=1'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
all_data = []
for i in range(1, 9):
url = 'https://www.cvbankas.lt/?padalinys%5B0%5D=76&page='+str(i)
print(url)
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for h3 in soup.select('h3.list_h3'):
try:
job_title = h3.get_text(strip=True)
company = h3.find_next(class_="heading_secondary").get_text(strip=True)
salary = h3.find_next(class_="salary_amount").get_text(strip=True)
location = h3.find_next(class_="list_city").get_text(strip=True)
print('{:<50} {:<15} {:<15} {}'.format(company, salary, location, job_title))
except AttributeError:
pass
all_data.append({
'Job Title': job_title,
'Company': company,
'Salary': salary,
'Location': location
})
df = pd.DataFrame(all_data)
df.to_csv('data.csv')
After I saved four types of text into dictionary. I tried to clean it so it would be easier to plot it out
#Cleaning part
needtoclean = pd.read_csv(r'C:\Users\sound\Desktop\data.csv')
#making all into str, due its mixed with int,float,str
needtoclean['Salary'] = needtoclean['Salary'].astype(str)
cleanedSalary = []
columnSeriesObj = needtoclean['Salary']
for value in columnSeriesObj.values:
value = value.replace('Nuo',"").replace('Iki','')
value = value.strip()
value = re.split(r'[\s,-]+', value)
if len(value) > 1: #
value = (int(value[0])+int(value[1]))/2
else:
value = float(value[0])
value = round(value)
cleanedSalary.append(value)
needtoclean['Salary'] = cleanedSalary
cleanTitles = []
for value in needtoclean['Job Title'].values:
value = value.title()
value = value.replace('/', ' ').replace('(-Ė)','').replace('(-A)','').replace('(-As)', '').replace('(0s)','')
value = value.strip()
value = value.split()
cleanTitles.append(value)
needtoclean['Job Title'] = cleanTitles
needtoclean.to_csv(r'C:/Users/sound/Desktop/manodata.csv')
After cleaning I cannot start plotting due I getting ascii error, not sure which approach should be fine for decoding.
#Ploting part
needToPlot = pd.read_csv(r'C:\Users\sound\Desktop\manodata.csv')
jobs = sns.load_dataset(needToPlot)
You use it in wrong way.
sns.load_dataset() is used to load files defined in searborn - like sns.load_dataset('iris')
To plot it you need use DataFrame directly
needToPlot = pd.read_csv(r'C:\Users\sound\Desktop\manodata.csv')
sns.relplot(..., data=needToPlot)
I have a python script, which is used to scrape images from google. To run the script, you need to create a file named imgsearch_list.txt where you have to pass a list into that like cat, dog so as to search that list on google.
Where i run the script it throws me an error.
You can see the code:
import re, os, sys, datetime, time
import pandas
from selenium import webdriver
from contextlib import closing
from selenium.webdriver import Chrome
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from pattern.web import URL, extension, cache, plaintext, Newsfeed, DOM
class GoogleImageExtractor(object):
def __init__(self, search_key = '' ):
""" Google image search class
Args:
search_key to be entered.
"""
if type(search_key) == str:
## convert to list even for one search keyword to standalize the pulling.
self.g_search_key_list = [search_key]
elif type(search_key) == list:
self.g_search_key_list = search_key
else:
print ('google_search_keyword not of type str or list')
raise
self.g_search_key = ''
## user options
self.image_dl_per_search = 200
## url construct string text
self.prefix_of_search_url = "https://www.google.com.sg/search?q="
self.postfix_of_search_url = '&source=lnms&tbm=isch&sa=X&ei=0eZEVbj3IJG5uATalICQAQ&ved=0CAcQ_AUoAQ&biw=939&bih=591'# non changable text
self.target_url_str = ''
## storage
self.pic_url_list = []
self.pic_info_list = []
## file and folder path
self.folder_main_dir_prefix = r'C:\Users\intel\Desktop\Scrappr'
def reformat_search_for_spaces(self):
"""
Method call immediately at the initialization stages
get rid of the spaces and replace by the "+"
Use in search term. Eg: "Cookie fast" to "Cookie+fast"
steps:
strip any lagging spaces if present
replace the self.g_search_key
"""
self.g_search_key = self.g_search_key.rstrip().replace(' ', '+')
def set_num_image_to_dl(self, num_image):
""" Set the number of image to download. Set to self.image_dl_per_search.
Args:
num_image (int): num of image to download.
"""
self.image_dl_per_search = num_image
def get_searchlist_fr_file(self, filename):
"""Get search list from filename. Ability to add in a lot of phrases.
Will replace the self.g_search_key_list
Args:
filename (str): full file path
"""
with open(filename,'r') as f:
self.g_search_key_list = f.readlines()
def formed_search_url(self):
''' Form the url either one selected key phrases or multiple search items.
Get the url from the self.g_search_key_list
Set to self.sp_search_url_list
'''
self.reformat_search_for_spaces()
self.target_url_str = self.prefix_of_search_url + self.g_search_key +\
self.postfix_of_search_url
def retrieve_source_fr_html(self):
""" Make use of selenium. Retrieve from html table using pandas table.
"""
driver = webdriver.Chrome(executable_path=r"C:\Users\intel\Downloads\setups\chromedriver.exe")
driver.get(self.target_url_str)
## wait for log in then get the page source.
try:
driver.execute_script("window.scrollTo(0, 30000)")
time.sleep(2)
self.temp_page_source = driver.page_source
#driver.find_element_by_css_selector('ksb _kvc').click()#cant find the class
driver.find_element_by_id('smb').click() #ok
time.sleep(2)
driver.execute_script("window.scrollTo(0, 60000)")
time.sleep(2)
driver.execute_script("window.scrollTo(0, 60000)")
except:
print ('not able to find')
driver.quit()
self.page_source = driver.page_source
driver.close()
def extract_pic_url(self):
""" extract all the raw pic url in list
"""
dom = DOM(self.page_source)
tag_list = dom('a.rg_l')
for tag in tag_list[:self.image_dl_per_search]:
tar_str = re.search('imgurl=(.*)&imgrefurl', tag.attributes['href'])
try:
self.pic_url_list.append(tar_str.group(1))
except:
print ('error parsing', tag)
def multi_search_download(self):
""" Mutli search download"""
for indiv_search in self.g_search_key_list:
self.pic_url_list = []
self.pic_info_list = []
self.g_search_key = indiv_search
self.formed_search_url()
self.retrieve_source_fr_html()
self.extract_pic_url()
self.downloading_all_photos() #some download might not be jpg?? use selnium to download??
self.save_infolist_to_file()
def downloading_all_photos(self):
""" download all photos to particular folder
"""
self.create_folder()
pic_counter = 1
for url_link in self.pic_url_list:
print (pic_counter)
pic_prefix_str = self.g_search_key + str(pic_counter)
self.download_single_image(url_link.encode(), pic_prefix_str)
pic_counter = pic_counter +1
def download_single_image(self, url_link, pic_prefix_str):
""" Download data according to the url link given.
Args:
url_link (str): url str.
pic_prefix_str (str): pic_prefix_str for unique label the pic
"""
self.download_fault = 0
file_ext = os.path.splitext(url_link)[1] #use for checking valid pic ext
temp_filename = pic_prefix_str + file_ext
temp_filename_full_path = os.path.join(self.gs_raw_dirpath, temp_filename )
valid_image_ext_list = ['.png','.jpg','.jpeg', '.gif', '.bmp', '.tiff'] #not comprehensive
url = URL(url_link)
if url.redirect:
return # if there is re-direct, return
if file_ext not in valid_image_ext_list:
return #return if not valid image extension
f = open(temp_filename_full_path, 'wb') # save as test.gif
print (url_link)
self.pic_info_list.append(pic_prefix_str + ': ' + url_link )
try:
f.write(url.download())#if have problem skip
except:
#if self.__print_download_fault:
print ('Problem with processing this data: ', url_link)
self.download_fault =1
f.close()
def create_folder(self):
"""
Create a folder to put the log data segregate by date
"""
self.gs_raw_dirpath = os.path.join(self.folder_main_dir_prefix, time.strftime("_%d_%b%y", time.localtime()))
if not os.path.exists(self.gs_raw_dirpath):
os.makedirs(self.gs_raw_dirpath)
def save_infolist_to_file(self):
""" Save the info list to file.
"""
temp_filename_full_path = os.path.join(self.gs_raw_dirpath, self.g_search_key + '_info.txt' )
with open(temp_filename_full_path, 'w') as f:
for n in self.pic_info_list:
f.write(n)
f.write('\n')
if __name__ == '__main__':
choice =4
if choice ==4:
"""test the downloading of files"""
w = GoogleImageExtractor('')#leave blanks if get the search list from file
searchlist_filename = r'C:\Users\intel\Desktop\Scrappr\imgsearch_list.txt'
w.set_num_image_to_dl(200)
w.get_searchlist_fr_file(searchlist_filename)#replace the searclist
w.multi_search_download()
Here's the error:
not able to find
Traceback (most recent call last):
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connection.py", line 156, in _new_conn
conn = connection.create_connection(
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\util\connection.py", line 84, in create_connection
raise err
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\util\connection.py", line 74, in create_connection
sock.connect(sa)
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 665, in urlopen
httplib_response = self._make_request(
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 387, in _make_request
conn.request(method, url, **httplib_request_kw)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 1230, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 1276, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 1225, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 1004, in _send_output
self.send(msg)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 944, in send
self.connect()
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connection.py", line 184, in connect
conn = self._new_conn()
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connection.py", line 168, in _new_conn
raise NewConnectionError(
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x0000000007017520>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:/Users/intel/Desktop/go.py", line 211, in <module>
w.multi_search_download()
File "c:/Users/intel/Desktop/go.py", line 133, in multi_search_download
self.retrieve_source_fr_html()
File "c:/Users/intel/Desktop/go.py", line 106, in retrieve_source_fr_html
self.page_source = driver.page_source
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 679, in page_source
return self.execute(Command.GET_PAGE_SOURCE)['value']
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 319, in execute
response = self.command_executor.execute(driver_command, params)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 374, in execute
return self._request(command_info[0], url, body=data)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 397, in _request
resp = self._conn.request(method, url, body=body, headers=headers)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\request.py", line 75, in request
return self.request_encode_url(
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\request.py", line 97, in request_encode_url
return self.urlopen(method, url, **extra_kw)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\poolmanager.py", line 330, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 747, in urlopen
return self.urlopen(
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 747, in urlopen
return self.urlopen(
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 747, in urlopen
return self.urlopen(
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 719, in urlopen
retries = retries.increment(
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\util\retry.py", line 436, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='127.0.0.1', port=50181): Max retries exceeded with url: /session/a473cdecf0cbd7a585ac13d08f156b4a/source (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000000007017520>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
Any help would be appreciated...
I am running a python script with Phontomjs and Selenium. I am facing timeout issue. It is stopping after 20-50min. I need a solution so that I can run my script without this timeout issue. where is the problem please and how can I solve it?
The input file cannot be read or no in proper format.
Traceback (most recent call last):
File "links_crawler.py", line 147, in <module>
crawler.Run()
File "links_crawler.py", line 71, in Run
self.checkForNextPages()
File "links_crawler.py", line 104, in checkForNextPages
self.next.click()
File "/home/dev/.local/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 75, in click
self._execute(Command.CLICK_ELEMENT)
File "/home/dev/.local/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 454, in _execute
return self._parent.execute(command, params)
File "/home/dev/.local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 199, in execute
response = self.command_executor.execute(driver_command, params)
File "/home/dev/.local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 395, in execute
return self._request(command_info[0], url, body=data)
File "/home/dev/.local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 463, in _request
resp = opener.open(request, timeout=self._timeout)
File "/usr/lib/python2.7/urllib2.py", line 431, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 449, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1227, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1200, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib/python2.7/httplib.py", line 1127, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 453, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 417, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
Code:
class Crawler():
def __init__(self,where_to_save, verbose = 0):
self.link_to_explore = ''
self.TAG_RE = re.compile(r'<[^>]+>')
self.TAG_SCRIPT = re.compile(r'<(script).*?</\1>(?s)')
if verbose == 1:
self.driver = webdriver.Firefox()
else:
self.driver = webdriver.PhantomJS()
self.links = []
self.next = True
self.where_to_save = where_to_save
self.logs = self.where_to_save + "/logs"
self.outputs = self.where_to_save + "/outputs"
self.logfile = ''
self.rnd = 0
try:
os.stat(self.logs)
except:
os.makedirs(self.logs)
try:
os.stat(self.outputs)
except:
os.makedirs(self.outputs)
try:
fin = open(file_to_read,"r")
FileContent = fin.read()
fin.close()
crawler =Crawler(where_to_save)
data = FileContent.split("\n")
for info in data:
if info!="":
to_process = info.split("|")
link = to_process[0].strip()
category = to_process[1].strip().replace(' ','_')
print "Processing the link: " + link : " + info
crawler.Init(link,category)
crawler.Run()
crawler.End()
crawler.closeSpider()
except:
print "The input file cannot be read or no in proper format."
raise
If you don't want Timeout to stop your script you can catch the exception
selenium.common.exceptions.TimeoutException and pass it.
You can set the default page load timeout using the set_page_load_timeout() method of webdriver.
Like this
driver.set_page_load_timeout(10)
This will throw a TimeoutException if your page didn't load in 10 seconds.
EDIT:
Forgot to mention that you will have to put your code in a loop.
Add import
from selenium.common.exceptions import TimeoutException
while True:
try:
# Your code here
break # Loop will exit
except TimeoutException:
pass
I am using Python 2.7.3 and I am trying to post data to my local web server. The data I am posting is temperature readings from my raspberry pi. I know the url is right because if I use the postman chrome plugin the data is successfully posted and I get a return message. In postman I can only use form-data though and NOT x-www-form-urlencoded which is how my python script has the content type setup. Can I change it to form-data?
Python Code:
import os
import glob
import time
import threading
import urllib
import urllib2
os.system('modprobe w1-gpio')
os.system('modprobe w1-therm')
base_dir = '/sys/bus/w1/devices/'
device_folder = glob.glob(base_dir + '28*')[0]
device_file = device_folder + '/w1_slave'
def read_temp_raw():
f = open(device_file, 'r')
lines = f.readlines()
f.close()
return lines
def read_temp():
lines = read_temp_raw()
while lines[0].strip()[-3:] != 'YES':
time.sleep(0.2)
lines = read_temp_raw()
equals_pos = lines[1].find('t=')
if equals_pos != -1:
temp_string = lines[1][equals_pos+2:]
temp_c = float(temp_string) / 1000.0
temp_f = temp_c * 9.0 / 5.0 + 32.0
temperature = {'tempf':temp_f, 'tempc':temp_c}
return temperature
def post():
threading.Timer(1800.0, post).start()
temperature = read_temp()
data = temperature
data['room'] = 'Server Room'
print(data)
data=urllib.urlencode(data)
path='http://client.pathtophppage' #the url you want to POST to
req=urllib2.Request(path, data)
req.add_header("Content-type", "application/x-www-form-urlencoded")
page=urllib2.urlopen(req).read()
post()
And the Error:
pi#raspberrypi ~/Documents $ python Temperature.py
{'tempc': 22.0, 'tempf': 71.6, 'room': 'Server Room'}
Traceback (most recent call last):
File "Temperature.py", line 49, in <module>
post()
File "Temperature.py", line 45, in post
page=urllib2.urlopen(req).read()
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 407, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 520, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 439, in error
result = self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 379, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 626, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 407, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 520, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 445, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 379, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 528, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 500: Internal Server Error
save your time, use this requests lib for httpRequests,
simple app
import requests
url = 'http://url.com'
query = {'field': value}
res = requests.post(url, data=query)
print(res.text)