Get data into sqlite from yahoo finance

Get data into sqlite from yahoo finance - python

I trying to get yahoo prices into sqlite...
I have the code below, but cant get the data into ipull []
then into sqlite...
from urllib import urlopen
import win32com.client as win32
import sqlite3
RANGE = range(3, 8)
COLS = ('TICKER', 'PRICE', 'Vol')
URL = 'http://quote.yahoo.com/d/quotes.csv?s=%s&f=sl1v'
TICKS = ('GGP', 'JPM', 'AIG', 'AMZN')
ipull =[]
def excel():
app = 'Excel'
xl = win32.gencache.EnsureDispatch('%s.Application' % app)
ss = xl.Workbooks.Add()
sh = ss.ActiveSheet
xl.Visible = True
for x in range(3):
sh.Cells(5, x+1).Value = COLS[x]
row = 6
u = urlopen(URL % ','.join(TICKS))
for data in u:
tick, price, per = data.split(',')
sh.Cells(row, 1).Value = eval(tick)
sh.Cells(row, 2).Value = ('%.2f' % float(price))
sh.Cells(row, 3).Value = eval(per.rstrip())
row += 1
u.close()
con = sqlite3.connect('/py/data/db2')
c = con.cursor()
c.execute('INSERT INTO prices VALUES (?,?,?)', ipull)
con.commit()
c.close()
if __name__=='__main__':
excel()

You declare ipull[], but never assign it.

Related

Split json file into multiple csv files depending on date?

I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
csv_file.write(dat)
csv_file.close()
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T
dxf.index.name = 'time'
dxf.reset_index(inplace=True)
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time'].dt.day
dxf['date'] = dxf['time'].dt.date
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1)
length.index.name = 'day'
length.reset_index(inplace=True)
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
dt_range.append(min_dt.strftime("%H:%M:%S"))
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
time.append(dasf['minute'][ib])
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
empty_value.append(dasf['ev'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1

BeautifulSoup find.all() web scraping returns empty

When trying to scrape multiple pages of this website, I get no content in return. I usually check to make sure all the lists I'm creating are of equal length, but all are coming back as len = 0.
I've used similar code to scrape other websites, so why does this code not work correctly?
Some solutions I've tried, but haven't worked for my purposes: requests.Session() solutions as suggested in this answer, .json as suggested here.
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
from time import sleep
from random import randint
from googletrans import Translator
translator = Translator()
rg = []
ctr_n = []
ctr = []
yr = []
mn = []
sub = []
cst_n = []
cst = []
mag = []
pty_n = []
pty = []
can = []
pev1 = []
vot1 = []
vv1 = []
ivv1 = []
to1 = []
cv1 = []
cvs1 = []
pv1 = []
pvs1 = []
pev2 = []
vot2 = []
vv2 = []
ivv2 = []
to2 = []
cv2 = []
cvs2 =[]
pv2 = []
pvs2 = []
seat = []
no_info = []
manual = []
START_PAGE = 1
END_PAGE = 42
for page in range(START_PAGE, END_PAGE + 1):
page = requests.get("https://sejmsenat2019.pkw.gov.pl/sejmsenat2019/en/wyniki/sejm/okr/" + str(page))
page.encoding = page.apparent_encoding
if not page:
pass
else:
soup = BeautifulSoup(page.text, 'html.parser')
tbody = soup.find_all('table', class_='table table-borderd table-striped table-hover dataTable no-footer clickable right2 right4')
sleep(randint(2,10))
for container in tbody:
col1 = container.find_all('tr', {'data-id':'26079'})
for info in col1:
col_1 = info.find_all('td')
for data in col_1:
party = data[0]
party_trans = translator.translate(party)
pty_n.append(party_trans)
pvotes = data[1]
pv1.append(pvotes)
pshare = data[2]
pvs1.append(pshare)
mandates = data[3]
seat.append(mandates)
col2 = container.find_all('tr', {'data-id':'26075'})
for info in col2:
col_2 = info.find_all('td')
for data in col_2:
party2 = data[0]
party_trans2 = translator.translate(party2)
pty_n.append(party_trans2)
pvotes2 = data[1]
pv1.append(pvotes2)
pshare2 = data[2]
pvs1.append(pshare2)
mandates2 = data[3]
seat.append(mandates2)
col3 = container.find_all('tr', {'data-id':'26063'})
for info in col3:
col_3 = info.find_all('td')
for data in col_3:
party3 = data[0].text
party_trans3 = translator.translate(party3)
pty_n.extend(party_trans3)
pvotes3 = data[1].text
pv1.extend(pvotes3)
pshare3 = data[2].text
pvs1.extend(pshare3)
mandates3 = data[3].text
seat.extend(mandates3)
col4 = container.find_all('tr', {'data-id':'26091'})
for info in col4:
col_4 = info.find_all('td',recursive=True)
for data in col_4:
party4 = data[0]
party_trans4 = translator.translate(party4)
pty_n.extend(party_trans4)
pvotes4 = data[1]
pv1.extend(pvotes4)
pshare4 = data[2]
pvs1.extend(pshare4)
mandates4 = data[3]
seat.extend(mandates4)
col5 = container.find_all('tr', {'data-id':'26073'})
for info in col5:
col_5 = info.find_all('td')
for data in col_5:
party5 = data[0]
party_trans5 = translator.translate(party5)
pty_n.extend(party_trans5)
pvotes5 = data[1]
pv1.extend(pvotes5)
pshare5 = data[2]
pvs1.extend(pshare5)
mandates5 = data[3]
seat.extend(mandates5)
col6 = container.find_all('tr', {'data-id':'26080'})
for info in col6:
col_6 = info.find_all('td')
for data in col_6:
party6 = data[0]
party_trans6 = translator.translate(party6)
pty_n.extend(party_trans6)
pvotes6 = data[1]
pv1.extend(pvotes6)
pshare6 = data[2]
pvs1.extend(pshare6)
mandates6 = data[3]
seat.extend(mandates6)
#### TOTAL VOTES ####
tfoot = soup.find_all('tfoot')
for data in tfoot:
fvote = data.find_all('td')
for info in fvote:
votefinal = info.find(text=True).get_text()
fvoteindiv = [votefinal]
fvotelist = fvoteindiv * (len(pty_n) - len(vot1))
vot1.extend(fvotelist)
#### CONSTITUENCY NAMES ####
constit = soup.find_all('a', class_='btn btn-link last')
for data in constit:
names = data.get_text()
names_clean = names.replace("Sejum Constituency no.","")
names_clean2 = names_clean.replace("[","")
names_clean3 = names_clean2.replace("]","")
namesfinal = names_clean3.split()[1]
constitindiv = [namesfinal]
constitlist = constitindiv * (len(pty_n) - len(cst_n))
cst_n.extend(constitlist)
#### UNSCRAPABLE INFO ####
region = 'Europe'
reg2 = [region]
reglist = reg2 * (len(pty_n) - len(rg))
rg.extend(reglist)
country = 'Poland'
ctr2 = [country]
ctrlist = ctr2 * (len(pty_n) - len(ctr_n))
ctr_n.extend(ctrlist)
year = '2019'
yr2 = [year]
yrlist = yr2 * (len(pty_n) - len(yr))
yr.extend(yrlist)
month = '10'
mo2 = [month]
molist = mo2 * (len(pty_n) - len(mn))
mn.extend(molist)
codes = ''
codes2 = [codes]
codeslist = codes2 * (len(pty_n) - len(manual))
manual.extend(codeslist)
noinfo = '-990'
noinfo2 = [noinfo]
noinfolist = noinfo2 * (len(pty_n) - len(no_info))
no_info.extend(noinfolist)
print(len(rg), len(pty_n), len(pv1), len(pvs1), len(no_info), len(vot1), len(cst_n))
poland19 = pd.DataFrame({
'rg' : rg,
'ctr_n' : ctr_n,
'ctr': manual,
'yr' : yr,
'mn' : mn,
'sub' : manual,
'cst_n': cst_n,
'cst' : manual,
'mag': manual,
'pty_n': pty_n,
'pty': manual,
'can': can,
'pev1': no_info,
'vot1': vot1,
'vv1': vot1,
'ivv1': no_info,
'to1': no_info,
'cv1': no_info,
'cvs1': no_info,
'pv1': cv1,
'pvs1': cvs1,
'pev2': no_info,
'vot2': no_info,
'vv2': no_info,
'ivv2': no_info,
'to2': no_info,
'cv2': no_info,
'cvs2': no_info,
'pv2' : no_info,
'pvs2' : no_info,
'seat' : manual
})
print(poland19)
poland19.to_csv('poland_19.csv')

As commented you probably need to use Selenium. You could replace the requests lib and replace the request statements with sth like this:
from selenium import webdriver
wd = webdriver.Chrome('pathToChromeDriver') # or any other Browser driver
wd.get(url) # instead of requests.get()
soup = BeautifulSoup(wd.page_source, 'html.parser')
You need to follow the instructions to install and implement the selenium lib at this link: https://selenium-python.readthedocs.io/
Note: I tested your code with selenium and I was able to get the table that you were looking for, but with the class_=... does not work for some reason.
Instead browsing at the scraped data I found that it has an attribute id. So maybe try also this instead:
tbody = soup.find_all('table', id="DataTables_Table_0")
And again, by doing the get requests with the selenium lib.
Hope that was helpful :)
Cheers

Python Infinite While Loop Not Consistent

The code I have wrote in Python for an infinite loop works fine the first time. However, it gives the following message upon the second run:
Traceback (most recent call last):
File "C:/Users/dell/PycharmProjects/pythonProject/main.py", line 27, in
symbol = str(symbol)
TypeError: 'tuple' object is not callable
Any ideas why I am not getting this message after second run?
from xlrd import open_workbook
import win32com.client as win32
from oandapyV20.contrib.requests import MarketOrderRequest
from oandapyV20.contrib.requests import TakeProfitDetails, StopLossDetails
import oandapyV20.endpoints.orders as orders
import oandapyV20
from oandapyV20 import API
import oandapyV20.endpoints.accounts as accounts
import oandapyV20.endpoints.pricing as pricing
import oandapyV20.endpoints.positions as positions
import easygui
import tkinter as tk
import time
while True:
time.sleep(5)
excel = win32.gencache.EnsureDispatch('Excel.Application')
for wb in excel.Workbooks:
if wb.Name == 'forex2.xlsx':
wb.Save()
wb = open_workbook('C:/Users/dell/Documents/forex2.xlsx')
xl_sheet = wb.sheet_by_index(0)
marginrate = xl_sheet.cell(1, 2)
symbol = xl_sheet.cell(1, 1)
symbol = str(symbol)
marginrate = str(marginrate)
symbol = symbol.replace("text:", "")
marginrate = 20
symbol = symbol.replace("'", "")
print("Symbol:", symbol)
print("Margin Rate:", marginrate)
access_token = "XXXX"
accountID = "XXXX"
client = API(access_token=access_token)
r = accounts.AccountDetails(accountID)
client.request(r)
dict = r.response
params = {"instruments": symbol}
r2 = pricing.PricingInfo(accountID=accountID, params=params)
rv2 = client.request(r2)
a = list(rv2.items())[1][1][0]
ask = float(a['closeoutAsk'])
print("Starting Ask:", ask)
a = list(dict.items())[0][1]
marginUsed = float(list(a.items())[25][1])
marginAvailable = float(list(a.items())[26][1])
balance = float(list(a.items())[9][1])
print("Margin Available:", marginAvailable)
print("Balance:", balance)
print("Margin Used + Margin Available:", balance)
STOP_LOSS = .001
TAKE_PROFIT = 100000
units0 = round((marginrate * marginAvailable) / ask * .95)
print("Order Units:", units0)
mktOrder = MarketOrderRequest(
instrument=symbol,
units=units0,
takeProfitOnFill=TakeProfitDetails(price=TAKE_PROFIT).data,
stopLossOnFill=StopLossDetails(price=STOP_LOSS).data)
r = orders.OrderCreate(accountID, data=mktOrder.data)
try:
rv = client.request(r)
except oandapyV20.exceptions.V20Error as err:
print("")
print("UNITS_INVALID")
else:
print("")
excel = win32.gencache.EnsureDispatch('Excel.Application')
for wb in excel.Workbooks:
if wb.Name == 'forex2.xlsx':
wb.Save()
book = open_workbook('C:/Users/dell/Documents/forex2.xlsx')
r = positions.PositionList(accountID=accountID)
client.request(r)
dict = r.response
a = list(dict.items())[0][1]
for i, element in enumerate(a):
long = a[i]
long2 = long['long']
symbol = long['instrument']
try:
averagePrice = long2['averagePrice']
except:
pass
else:
window = tk.Tk()
frame_a = tk.Frame()
label_a = tk.Label(master=frame_a, text="Hello")
label_a.pack()
frame_a.pack()
for sheet in book.sheets():
for rowidx in range(sheet.nrows):
row = sheet.row(rowidx)
for colidx, cell in enumerate(row):
if cell.value == symbol:
row = rowidx + 3
current_bid = sheet.cell(1, row)
current_bid = str(current_bid)
current_bid = float(current_bid.replace("number:", ""))
str = "Beginning Balance:", balance, "Current Bid:", current_bid, "Average Price:", averagePrice, "Current Profit:", round(
(current_bid - float(averagePrice)) * units0, 2)
print(str)
data = {"longUnits": "ALL"}
r = positions.PositionClose(accountID=accountID, instrument=symbol, data=data)
client.request(r)

The problem is your using
str = "Beginning Balance:", balance, "Current Bid:", current_bid, "Average Price:", averagePrice, "Current Profit:", round((current_bid - float(averagePrice)) * units0, 2) ,which replaces the function str with the variable you are assigning here. try replacing the name of this variable and it should work fine.

How can I import 1000 data into postgresql with python?

I've some code to input data into list, how can I import data in my list into database?
import psycopg2
import random
import string
import time
conn = psycopg2.connect(host="localhost",database="postgres", user="postgres", password="potatona1")
cursor = conn.cursor()
FullChar = 'CEFLMPRTVWXYK0123456789#'
total = 4
count = 10
count = int(count)
for i in range(1000):
for x in range(total):
unique_code = ''.join(random.sample(FullChar, count - 1)) + '#'
unique_code = ''.join(random.sample(unique_code, len(unique_code)))
list(unique_code)
postgres_insert_query = """ INSERT INTO employees (id_employee, name) VALUES (%s,%s)"""
record_to_insert = (1, unique_code)
cursor.execute(postgres_insert_query, record_to_insert)
conn.commit()
count = cursor.rowcount
print (count, "Record inserted successfully into mobile table")
I want import 1000 data to postgresql with python.

i just trying this, and it works
conn = psycopg2.connect(host="192.168.13.10",database="postgres", port="5432", user="postgres", password="potatona1")
cursor = conn.cursor()
FullChar = 'CEFLMPRTVWXYK0123456789'
total = 1000
count = 10
count = int(count)
entries = []
bcd = ""
flg = ""
rll = ""
def inputDatabase(data):
postgres_insert_query = """INSERT INTO unique_code(unique_code, barcode, flag, roll) VALUES (%s,%s,%s,%s)"""
cursor.executemany(postgres_insert_query, data)
conn.commit()
for i in range(5):
for x in range(total): # banyaknya code yang di print
unique_code = ''.join(random.sample(FullChar, count - 1))
unique_code = ''.join(random.sample(unique_code, len(unique_code)))
entry = (unique_code, bcd, flg, rll)
entries.append(entry)
inputDatabase(entries)
print(i)
count = cursor.rowcount
print (count, "Record inserted successfully into mobile table")

UnboundLocalError: local variable 'Core_prices' referenced before assignment

i use a function calculate servers prices so i made a function which retrieve a defalut prices of a server components and calculate server price for each server exsit in my DB but i try to run this function, i get this error:
function.py
import MySQLdb
def calculations_metric (param) :
db = MySQLdb.connect("localhost", "root", "aqw", "PFE_Project")
cursor = db.cursor()
sql = "SELECT * FROM examples_calculationsmetric"
cursor.execute(sql)
results = cursor.fetchall()
for row in results:
RAM_prices = int(row[1])
Core_prices = int(row[2])
HHD_SATA_prices =int(row[3])
HHD_SSD_prices =int(row[4])
CPU_priority = int(row[5])
Avaibility = int(row[6])
db.close()
db1 = MySQLdb.connect("localhost", "root", "aqw", "PFE_Project")
cursor1 = db1.cursor()
sql1 = "SELECT * FROM examples_servercomponents WHERE id ='%d'" %(param)
cursor1.execute(sql1)
results1 = cursor1.fetchall()
for row in results1:
if row[6] == 'SATA':
Core_price = int(row[2]) * Core_prices # the error is here
Priority_price = int(row[3]) * CPU_priority
RAM_price = int(row[4]) * RAM_prices
HDD_price = int(row[5]) * HHD_SATA_prices
Availibility_price = int(row[7])*Avaibility
elif row[6] == 'SSD':
Core_price = int(row[2]) * Core_prices
Priority_price = int(row[3]) * CPU_priority
RAM_price = int(row[4]) * RAM_prices
HDD_price = int(row[5]) * HHD_SSD_prices
Availibility_price = int(row[7])*Avaibility
price = Core_price + Priority_price + RAM_price + HDD_price + Availibility_price
db1.close()
return price
i don't get what is the error so if can anyone help i will be so greatful

When your SELECT * FROM examples_calculationsmetric doesn't return any results, Core_prices is never set (nor are the other variables in that loop).
Python names do not exist until assigned to, so if results is an empty list, the names inside the for loop never get assigned to and thus do not exist by the time you loop over results1 later on.
You could set default values for those names as a work-around:
RAM_prices = 0
Core_prices = 0
HHD_SATA_prices = 0
HHD_SSD_prices = 0
CPU_priority = 0
Avaibility = 0
to at least ensure that they are defined.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Get data into sqlite from yahoo finance - python

You declare ipull[], but never assign it.

Related

Split json file into multiple csv files depending on date?

BeautifulSoup find.all() web scraping returns empty

Python Infinite While Loop Not Consistent

How can I import 1000 data into postgresql with python?

UnboundLocalError: local variable 'Core_prices' referenced before assignment

Categories

Resources