for using pandas-datareader with yahoo, when I have start and end as the same date I get no information returned when I ask on that date. If I ask a day later, it works. But I want today's close today.
import sys
from sqlalchemy import *
import os
import datetime
import pandas_datareader.data as web
end = datetime.datetime(2015, 10, 15)
start = datetime.datetime(2015, 10, 15)
path = 'c:\\python34\\myprojects\\msis\\'
try:
os.mkdir(path)
except:
pass
fname = path + 'test.txt'
fhand = open(fname, 'w')
engine = create_engine('mysql+mysqlconnector://root:#localhost /stockinfo')
connection = engine.connect()
result1 = engine.execute("select symbol from equities where daily = 'Y'")
for sqlrow in result1:
try:
info = web.DataReader(sqlrow[0], 'yahoo', start, end)
print (info)
close = info['Close'].ix['2015-10-14']
print ("=========================" + str(round(close,4)))
answer = "Closing price for " + sqlrow[0] + " is " + str(round(close,4)) + "\n"
except:
answer = "No success for " + sqlrow[0] + "\n"
fhand.write(answer)
# result2 = engine.execute("update holdings set lasrprice = " + round(close,4) + " where symbol = '" + sqlrow[0] + "'")
# result2.close()
result1.close()
fhand.close()
The code takes the second "except" route.
What am I doing wrong/what is happening?
Related
I am trying to implement multiprocessing in my web crawler, what I usually see online is sending the url as args into the function of map or map_async or apply_asyn. The data I am crawling is in the table, thus, I extract them by doing two times beautifulsoup find_all for row and column. Since the data I am crawling sometime is in one page which only require one url. I try to use the return list from Find_all as args for map_async, but the error occur showing "Fatal Python error: Cannot recover from stackoverflow."
The error occurred on the following line
return_list = pool.map_async(func, Species_all_recorded_data_List)
How could I solve it or where should the multiprocessing be implemented will be better?
The second problem is that if I put some code above the function crawl_all_data_mp, when it execute the pool = Pool(), all the code above will execute. I solved it by simply move all the other code under that function. It might not be correct since I still can't really run the code due to the first error.
Looking for your advice
My code:
(1) Function to call for web crawling
from tkinter import filedialog
from tkinter import *
import csv
import os.path
from os import path
from Index import *
from Dragonfly import *
import codecs
from multiprocessing import Process, Value
#\ multiprocessing ver
def multiprocessing_row_data(Web_rawl_Species_family_name, Web_rawl_Species_name, Total_num, Limit_CNT, expecting_CNT, oldID, page, Species_all_record_data_Data_Set):
global DataCNT, stop_crawl_all_data_mp
tmp_List = Species_all_record_data_Data_Set.find_all('td')
# End condition
# 1.no data in next page
# 2.for update to find unti the old data by inspecting its ID
# 3.if it count over the the limit count
id = tmp_List[0].text
if (len(id) == 0) or (DataCNT >= expecting_CNT)or (DataCNT >= Limit_CNT):
print(' --Finish crawl--' + ' crawl to page: ' + str(page) + ", ID: " + id + ", count: " + str(DataCNT))
stop_crawl_all_data_mp = True
raise StopIteration
# access the same value in memory when doing multiprocessing
with DataCNT.getlock():
DataCNT.value += 1
response_DetailedInfo = session.post(general_url + Detailed_discriptions_url + id, headers=headers)
soup2 = BeautifulSoup(response_DetailedInfo.text, 'html.parser')
print("Current finished datas >> " + str(DataCNT.value) + " /" + str(Total_num) + " (" + str(DataCNT.value * 100 / Total_num) + "%)", end='\r')
return DetailedTableInfo(tmp_List[0].text, tmp_List[1].text, tmp_List[2].text, tmp_List[3].text, tmp_List[4].text, tmp_List[5].text, tmp_List[7].text, tmp_List[6].text,
soup2.find(id='R_LAT').get('value'),
soup2.find(id='R_LNG').get('value'),
Web_rawl_Species_family_name,
Web_rawl_Species_name,
soup2.find(id='R_MEMO').get('value'))
def crawl_all_data_mp(Web_rawl_Species_family_name, Web_rawl_Species_name, Total_num, Limit_CNT, expecting_CNT, oldID):
page = 0
DataList = []
while not stop_crawl_all_data_mp:
pool = multiprocessing.Pool(10)
Species_all_recorded_data = session.post( general_url +
species_all_record_data_first_url +
species_all_record_data_page_url + str(page) +
species_all_record_data_species_url +
Species_class_key[Web_rawl_Species_family_name] +
Species_key[Web_rawl_Species_name],
headers=headers)
soup = BeautifulSoup(Species_all_recorded_data.text, 'html.parser')
Species_all_recorded_data_List = soup.find_all(id='theRow')
func = partial(multiprocessing_row_data, Web_rawl_Species_family_name, Web_rawl_Species_name, Total_num, Limit_CNT, expecting_CNT, oldID, page)
return_list = pool.map_async(func, Species_all_recorded_data_List)
DataList.append(list(filter(None, return_list.get())))
page += 1
# make sure whe main is finished, subfunctions still keep rolling on
pool.close()
pool.join()
return [DataList, page]
(2) main
it goes wrong on the following line for calling the function above
[datatmpList, page] = crawl_all_data_mp(Input_species_famliy, Input_species, Total_num, limit_cnt, expecting_CNT, oldID)
the main code:
# --main--
if __name__ == '__main__':
# setting
Input_species_famliy = "細蟌科"
Input_species = "四斑細蟌"
limit_cnt = 6000
folder = 'Crawl_Data\\' + Species_class_key[Input_species_famliy]
File_name = folder + "\\" + Species_class_key[Input_species_famliy] + Species_key[Input_species] +'.csv'
oldID = 0
oldData_len = 0
print("--Start crawl-- " + Input_species_famliy + " " + Input_species)
print("[folder]: " + folder)
stop_crawl_all_data_mp = False
# check the file exist or not
file_check = path.exists(current_path + "\\" + File_name)
# get the Old ID
if file_check:
file_size = os.stat(current_path + "\\" + File_name).st_size
if not file_size == 0:
with open(File_name, newline='', errors = "ignore") as F:
R = csv.reader(F)
oldData = [line for line in R]
oldID = oldData[0][0]
oldData_len = len(oldData)-1
# login
Login_Web(myaccount, mypassword)
# find the total number of the species_input (expect executed one time)
Species_total_num_Dict = Find_species_total_data()
# get the data
Total_num = int(Species_total_num_Dict[Input_species])
#[datatmpList, page] = crawl_all_data(Input_species_famliy, Input_species, Total_num, limit_cnt, oldID)
expecting_CNT = Total_num - oldData_len # get the total number of data need to be update ot crawl
[datatmpList, page] = crawl_all_data_mp(Input_species_famliy, Input_species, Total_num, limit_cnt, expecting_CNT, oldID)
Data = []
for Data_tmp in datatmpList:
Data.append([Data_tmp.SpeciesFamily,
Data_tmp.Species,
Data_tmp.IdNumber,
Data_tmp.Dates,
Data_tmp.Times,
Data_tmp.User,
Data_tmp.City,
Data_tmp.Dictrict,
Data_tmp.Place,
Data_tmp.Altitude,
Data_tmp.Latitude,
Data_tmp.Longitude,
Data_tmp.Description
])
#auto make the directories
newDir = current_path + "\\" + folder
if (not os.path.isdir(newDir)):
os.mkdir(newDir)
# 'a' stands for append, which can append the new data to old one
with open(File_name, mode='a', newline='', errors = "ignore") as employee_file:
employee_writer = csv.writer(employee_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
# init , for there is no file exists or the file is empty
if ((not file_check) or (file_size == 0)):
employee_writer.writerow(CSV_Head)
employee_writer.writerows(Data)
# for inserting the data into the old one
else:
for i in range(0, len(Data)):
oldData.insert(i, Data[i])
employee_writer.writerows(oldData)
Using Python, how do you read Outlook's Shared Calendar events, and hopefully, also using a time filter?
Here is a relevant post, but to answer this fully:
import win32com.client # for outlook
import datetime
"""This code reads shared calendars."""
# set variables
days = 3
begin = datetime.date.today()
end = begin + datetime.timedelta(days=days)
events = [] # to write results from calendar loop
# begin importing calendar
Outlook = win32com.client.Dispatch("Outlook.Application")
ns = Outlook.GetNamespace("MAPI")
# turn this into a list to read more calendars
recipient = ns.CreateRecipient("username") # cmd whoami to find this
resolved = recipient.Resolve() # checks for username in address book
# olFolderCalendar = 9
# appointments = ns.GetDefaultFolder(9).Items # for personal calendar
appointments = ns.GetSharedDefaultFolder(recipient, 9).Items
# filtering criteria
# https://learn.microsoft.com/en-us/office/vba/api/outlook.items.includerecurrences
appointments.Sort("[Start]") # suspect problem
appointments.IncludeRecurrences = "True"
restriction = "[Start] >= '" + begin.strftime("%m/%d/%Y") \
+ "' AND [End] <= '" + end.strftime("%m/%d/%Y") + "'"
# list of appointments
restrictedItems = appointments.Restrict(restriction)
# loop through all calendar events, and add elements to list
count = 0
for app in restrictedItems:
count += 1 # no len(range(restrictedItems)) allowed
# display values
print()
print("item: " + str(count))
print("start: \t\t" + str(app.Start))
print("subject: \t" + app.Subject)
print("end: \t\t" + str(app.End))
print("recurring: \t" + str(app.IsRecurring))
print("status: \t" + str(app.MeetingStatus))
# collect values
app_instance = [app.Subject,
app.Start,
app.End,
app.BusyStatus]
events.append(app_instance)
I am trying to download multiple netcdf4 files from GES DISC, but I seem to be having trouble with the Authorization.
'fpath' is location of the netcdf4 file. If I was to paste into address bar, a pop box will appear for 'https://urs.earthdata.nasa.gov' requiring username and password. If entered successfully, the file would download. However using 'fpath' in request.get() does not work.
request.get() successfully connects if I use 'https://urs.earthdata.nasa.gov' instead of fpath, but then I cannot download the netcdf4 file.
I've tried solution mentioned here but no luck.
Any help be appreciated
Code example below
import requests
from requests.auth import HTTPBasicAuth
from datetime import timedelta, date
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2016, 1, 1)
end_date = date(2016, 1, 2)
for single_date in daterange(start_date, end_date):
YYYY = single_date.strftime("%Y")
MM = single_date.strftime("%m")
DD = single_date.strftime("%d")
fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
fpath = fpath1 + fpath2 + fpath3
print(fpath)
# This successfully connects
# response = requests.get('https://urs.earthdata.nasa.gov', auth=HTTPBasicAuth('username', 'password'))
# print(response)
# This one does not
response = requests.get(fpath, auth=HTTPBasicAuth('username', 'password'))
print(response)
Note - anyone can create a free account to access this data by going to this website
Thank you #Stovfl for pointing me in the right direction.
Guidance led me to This website which contained information on how to set up a session for earthdata
the updated complete code is below
import requests
from datetime import timedelta, date
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2016, 1, 1)
end_date = date(2019, 7, 31)
# ***********************
# overriding requests.Session.rebuild_auth to maintain headers when redirected
# ***********************
class SessionWithHeaderRedirection(requests.Session):
AUTH_HOST = 'urs.earthdata.nasa.gov'
def __init__(self, username, password):
super().__init__()
self.auth = (username, password)
# Overrides from the library to keep headers when redirected to or from the NASA auth host.
def rebuild_auth(self, prepared_request, response):
headers = prepared_request.headers
url = prepared_request.url
if 'Authorization' in headers:
original_parsed = requests.utils.urlparse(response.request.url)
redirect_parsed = requests.utils.urlparse(url)
if (original_parsed.hostname != redirect_parsed.hostname) and \
redirect_parsed.hostname != self.AUTH_HOST and \
original_parsed.hostname != self.AUTH_HOST:
del headers['Authorization']
return
# create session with the user credentials that will be used to authenticate access to the data
username = "USERNAME"
password = "PASSWORD"
session = SessionWithHeaderRedirection(username, password)
# ***********************
# Loop through Files
# ***********************
for single_date in daterange(start_date, end_date):
YYYY = single_date.strftime("%Y")
MM = single_date.strftime("%m")
DD = single_date.strftime("%d")
fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
url = fpath1 + fpath2 + fpath3
# print(url)
# extract the filename from the url to be used when saving the file
filename = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc'
print(filename)
try:
# submit the request using the session
response = session.get(url, stream=True)
print(response.status_code)
# raise an exception in case of http errors
response.raise_for_status()
# save the file
with open(filename, 'wb') as fd:
for chunk in response.iter_content(chunk_size=1024 * 1024):
fd.write(chunk)
except requests.exceptions.HTTPError as e:
# handle any errors here
print(e)
I am currently having trouble trying to insert a float into my sql query, the value is pulled from a api and is normally a very exact number, I cannot get python to insert into my database with said float, I have even tried rounding it down but it is still throwing errors. Here is the errors as well as code:
Error
Traceback (most recent call last):
File "main.py", line 12, in <module>
ic.main()
File "/home/sage/Dev/market_Mon/interface_Controller.py", line 32, in main
ccr.config_Load_Cur()
File "/home/sage/Dev/market_Mon/config_Controller_Rebuild.py", line 41, in config_Load_Cur
dbc.database_Insert(str(arr[0]), str(arr[0]), str(arr[1]), arr[2], time.time())
File "/home/sage/Dev/market_Mon/database_Controller.py", line 14, in database_Insert
query = "INSERT INTO " + '`'+ table + '`' + " VALUES('`id` INTEGER PRIMARY KEY','" + ticker + "', '" + currency + "', '" + exchange + "', '" + date + "')"
TypeError: coercing to Unicode: need string or buffer, float found
Code:
Arr[2] is the value which is returning a float. Note that I've removed all my attempts at resolving this issue because they just get in the way of being able to see exactly how it works.
from decimal import Decimal, ROUND_HALF_UP
import ConfigParser
import io
import json_Controller as jsc
import database_Controller as dbc
import time
from colorama import init, Fore, Back, Style
import threading
import re
import json
from sys import stdout
#Function used to add new currency tickers into config.ini
def config_Add_Cur(currency):
conf = open('config.ini', w)
config = ConfigParser.ConfigParser()
config.add('currency', currency, True)
print (currency + 'added to config.ini')
#Function used to load all currency data from config.ini and to pull current
#Market data related to the relative tickers
def config_Load_Cur():
api_Key = '967OAAEVKJ9WT8F1'
arr = []
whilel = 1
var = 1
with open('config.ini') as f:
conf = f.read()
config = ConfigParser.RawConfigParser(allow_no_value=True)
config.readfp(io.BytesIO(conf))
while whilel == 1:
for (each_key, each_val) in config.items('currency'):
try:
currency = each_key
data = jsc.json_Import_Cur(currency, api_Key)
arr.insert(0, data['Realtime Currency Exchange Rate']['3. To_Currency Code'])
arr.insert(1, data['Realtime Currency Exchange Rate']['4. To_Currency Name'])
arr.insert(2, data['Realtime Currency Exchange Rate']['5. Exchange Rate'])
arr.insert(3, data['Realtime Currency Exchange Rate']['6. Last Refreshed'])
dbc.database_Insert(str(arr[0]), str(arr[0]), str(arr[1]), arr[2], time.time())
print (Fore.GREEN + "-------------------------------------------------------------------------------------------")
print (Back.WHITE + Style.DIM + Fore.CYAN + arr[0] + Style.NORMAL + Fore.YELLOW + arr[1] + Fore.MAGENTA + Back.WHITE + arr[2] + Fore.RED + Back.WHITE + arr[3] + Fore.BLUE + Back.WHITE +" inserted into database" + Style.RESET_ALL)
if var == 4:
print(Fore.GREEN + "-------------------------------------------------------------------------------------------")
print (Back.WHITE + Fore.RED + "Sleeping 60 Seconds" + Style.RESET_ALL)
time.sleep(60)
var = 1
else:
var = var + 1
except(KeyError):
pass
Database Controller:
import sqlite3
from colorama import init
#Initializes database connection and returns connection to function caller
def database_Connect():
connection = sqlite3.connect('cur_Monitor.db')
return connection
#Inserts new rows into a table, this is created to ensure uniformity between rows
def database_Insert(table, ticker, currency, exchange, date ):
try:
sql = database_Connect()
query = "INSERT INTO " + '`'+ table + '`' + " VALUES('`id` INTEGER PRIMARY KEY','" + ticker + "', '" + currency + "', '" + exchange + "', '" + date + "')"
sql.execute(query)
sql.commit()
except():
pass
print "Error occurred databasing!"
#Queries the table, and returns all values which either match or include the
#name string in their overall value
def database_Read(table, currency):
sql = database_Connect()
query = "SELECT * FROM " + table + " WHERE currency LIKE '" + currency + "%'"
for row in sql.execute(query):
return row
#Creates new tables into the database, this will always use the same format
#to ensure uniformity between tables
def database_Table(name):
sql = database_Connect()
query = "CREATE TABLE `" + name + "` (`id` INTEGER PRIMARY KEY,`ticker` text , `currency` text, `exchange` float, `date` timestamp)"
sql.execute(query)
sql.commit()
print name + " New table created!"
#Allows the ordering of table read outs to help allow users to view data better
def database_Order(table, order):
sql = database_Connect()
query = "SELECT * FROM " + table + " ORDER BY " + order
for row in sql.execute(query):
print row
It looks as though you may not have cast your exchange rate into a string before concatenating it with the rest of your SQL query. You do this for the other values in dbc.database_Insert but not for the exchange rate
I dont get why it keeps giving me the list out of range error for sys.argv[1]. From my understanding I am passing data to user_database. Help please
import sys, MySQLdb
def PrintFields(database, table):
host = 'localhost'
user = 'root'
password = 'boysnblue1'
conn = MySQLdb.Connection(db=parking_report, host=localhost, user=root, passwd=boysnblue1)
mysql = conn.cursor()
sql = """ SHOW COLUMNS FROM %s """ % table
mysql.execute("select id, date, time, status, from report_table ")
fields=mysql.fetchall()
print '<table border="0"><tr><th>order</th><th>name</th><th>type</th><th>description</th></tr>'
print '<tbody>'
counter = 0
for field in fields:
counter = counter + 1
id = field[0]
date = field[1]
time = field[2]
status = field[3]
print '<tr><td>' + str(counter) + '</td><td>' + id + '</td><td>' + date + '</td><td>' + time + '</td><td>' + status + ' </td></tr>'
print '</tbody>'
print '</table>'
mysql.close()
conn.close()
users_database = sys.argv[1]
users_table = sys.argv[2]
print "Wikified HTML for " + users_database + "." + users_table
print "========================"
PrintFields(users_database, users_table)
sys.argv is a list containing the name of the program's file and all of the arguments it was passed on the command line.
If you run python script2.py, the contents of sys.argv will be ['script2.py'].
If you run python script2.py database_name table_name, the contents of sys.argv will be ['script2.py', 'database_name', 'table_name'], which is what your program is currently configured to expect:
users_database = sys.argv[1]
users_table = sys.argv[2]
Since you are calling it the first way, sys.argv[1] does not exist, and you get your error that the index (1) is out of range (it only goes to 0).