import requests
import pandas as pd
import os
import io
import time
import csv
# Method 1: PRE-ENTER LIST OF stockS INSIDE STOCK LIST
# stock_list = ['QQQ', 'AAPL', 'TSLA', 'AMZN', 'GOOG',
# 'MSFT', 'META', 'BA', 'PFE', 'MRNA', 'BAC']
stock_list = ['TSLA', 'XLE']
for stock in stock_list:
os.chdir('C:/Users/bean/Desktop')
path = f'C:/Users/bean/Desktop'
API = 'APIKEY'
symbol = stock
if not os.path.exists(os.path.join(path, symbol)):
os.makedirs(os.path.join(symbol+'/months'))
# os.makedirs(os.path.join(symbol))
# Slice months for api calls.
month_slices = [f'year1month1', f'year1month2', f'year1month3',
f'year1month4', f'year1month5', f'year1month6',
f'year1month7', f'year1month8', f'year1month9',
f'year1month10', f'year1month11', f'year1month12',
f'year2month1', f'year2month2', f'year2month3',
f'year2month4', f'year2month5', f'year2month6',
f'year2month7', f'year2month8', f'year2month9',
f'year2month10', f'year2month11', f'year2month12']
# Get all URL links.
urls = []
for stock in stock_list:
for slice in month_slices:
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol={stock}&interval=1min&slice={slice}&apikey={API}'
urls.append(url)
print(url)
# Append the data.
data = []
counter = 0
for url in urls:
response = requests.get(url)
# df = pd.DataFrame(url)
df = pd.read_csv(io.BytesIO(response.content))
df.to_csv(
f'C:/Users/bean/Desktop/{stock}/months/{stock}_{slice}.csv', index=False)
data.append(df)
counter += 1
if counter % 5 == 0:
print(
f'counter is: {counter} for symbol: {stock}. ')
print(
'Sleeping one minute. API allows 5 calls per minute; 500 total daily.')
time.sleep(60)
counter = 0
# Combine and save sheets to your destination.
months_df = pd.concat(data)
months_df.to_csv(
f'C:/Users/bean/Desktop/{stock}/combined_{stock}_data.csv', index=False)
print(f' finished: {months_df}')
Essentially, using alpha vantage to try getting minute data for market. Can anyone help me with this code. The problem is I made this long version which works but here trying to make it more concise, I am using a loop and counter. Since the free version of the API only allows 5 calls per minute, I need to make the program sleep. The problem is using the counter and the loop, when I make it sleep, after it comes back it only does one api call instead of the next 5 like it should. Thats when the program stops again for 60 seconds, then proceeds again one call at time.
I am not sure why it wouldn't just repeat. I liked the idea of if remainder 5 (%5==0) because if I have a lot of symbols in list it can continue going.
Does this have to do with the indent of the counter?
Thanks
I am trying to pull out multiple ticker data from the yfinance API and save it to a csv file (in total I have 1000 tickers I need to get the data for, that data being the entire table of date, open, high, low, close, volume, etc etc), so far I am able to successfully get data for 1 ticker by using the following Python code:
import yfinance as yf
def yfinance(ticker_symbol):
ticker_data = yf.Ticker(ticker_symbol)
tickerDF = ticker_data.history(period='1d', start='2020-09-30', end='2020-10-31')
print(tickerDF)
yfinance('000001.SS')
However if I try on multiple tickers this doesn't work. Following the yfinance docs which say for multiple tickers use:
tickers = yf.Tickers('msft aapl goog')
# ^ returns a named tuple of Ticker objects
# access each ticker using (example)
tickers.tickers.MSFT.info
tickers.tickers.AAPL.history(period="1mo")
tickers.tickers.GOOG.actions
I have a couple of issue here, the docs use a string such as 'aapl' my tickers are all of digit format like '000001.SS', the ".SS" part is proving to be an issue when passing it into the code:
tickers.tickers.000001.SS.history(period="1mo")
# Clearly this wont for for a start
The next issue I am having is, even if I pass in for example 3 tickers to my function like so:
yfinance('000001.SS 000050.KS 00006.KS')
# similar to yfinance docs of tickers = yf.Tickers('msft aapl goog')
I get errors like:
AttributeError: 'Tickers' object has no attribute '000001.SS'
(I have also tried to run these into a for loop and pass each on to the Tickers object but get the same error.)
Im stuck now, I dont know how to pass in multiple tickers to yfinance and get back data that I want and the docs aren't very helpful.
Is anyone able to help me with this?
Could you not just store them in an array specifying the type as dtype object then use that pull the data from.
import yfinance as yf
import numpy as np
tickers = ['msft', 'aapl', 'goog']
totalPortfolio = np.empty([len(tickers)], dtype=object)
num = 0
for ticker in tickers:
totalPortfolio[num] = yf.download(ticker, start='2020-09-30', end='2020-10-31', interval="1d")
num = num + 1
Take a look at the code below:
test = yf.Tickers("A B C")
# creates test as a yf.tickers object
test_dict = test.tickers
# creates a dict object containing the individual tickers. Can be checked with type()
You are trying to use "tickers.tickers.MSFT.info" to retrieve the ticker data from your dictionary "tickers.tickers" but like your error message says, a dict object has no attributes named after your specific ticker names. This is in general not how you access elements in a dictionary.
Instead you should use the code as below (like with all dict objects):
#old code from above
test = yf.Tickers("A B C")
test_dict = test.tickers
#new code accessing the dict correctly
a_data = test_dict["A"]
a_data = test.tickers["A"] #does the same as the line above
b_data = test.tickers["B"] #and so on for the other tickers
In a loop this could look something like this:
ticker_list = ["A", "B", "C"] #add tickers as needed
tickers_data = {}
tickers_history = {}
for ticker in ticker_list:
tickers_data[ticker] = yf.Ticker(ticker)
tickers_history = tickers_data[ticker].history(period='1d', start='2020-09-30', end='2020-10-31')
#access the dicts as needed using tickers_data[" your ticker name "]
alternatively you can also use the "yf.Tickers" function to retrieve multiple tickers at once, but because you save the history seperately I don't think this will necessarily improve your code much.
You should pay attention however, that "yf.Ticker()" and "yf.Tickers()" are different functions from each other with differing syntax and are not interchangeable.
You did mix that up when you tried accessing multiple tickers with your custom "yfinance()" function, that has been previously defined with the "yf.Ticker()" function and thus only accepts one symbol at a time.
I am new to Python and currently working on a project that requires me to extract data from hundreds of websites that contains JSON data. I manage to scrape data from one website but have no idea how to scrape all websites simultaneously. Below is my code.
import openpyxl
import requests
import pandas as pd
import simplejson as json
url="https://ws-public.interpol.int/notices/v1/red?ageMin=45&ageMax=60&arrestWarrantCountryId=US&resultPerPage=20&page=1"
response=requests.get(url)
response.raise_for_status()
data=response.json()['_embedded']['notices']
list=[]
for item in data:
result={"forename":None,"date_of_birth":None,"nationalities":None,"name":None}
result["forename"] = item["forename"]
result["date_of_birth"]=item["date_of_birth"]
result["nationalities"] = item["nationalities"]
result["name"] = item["name"]
list.append(result)
#print(list)
df=pd.DataFrame(list)
df.to_excel("test.xlsx")
Example of other websites:
https://ws-public.interpol.int/notices/v1/red?arrestWarrantCountryId=BA&resultPerPage=20&page=5, https://ws-public.interpol.int/notices/v1/red?arrestWarrantCountryId=BA&resultPerPage=20&page=1,
I think this will work for you. You'll have to either add the urls manually or specify some logic to get them, I also noticed the json response has the url for the next page so you could have a list of all the first pages and use those to crawl through the pages, unless you can just get all the results in one json response. I also don't have excel installed so I used csv instead but it should be the same:
import requests
import pandas as pd
urls = [
'https://ws-public.interpol.int/notices/v1/red?ageMin=45&ageMax=60&arrestWarrantCountryId=US&resultPerPage=20&page=1',
'https://ws-public.interpol.int/notices/v1/red?arrestWarrantCountryId=BA&resultPerPage=20&page=5',
'https://ws-public.interpol.int/notices/v1/red?arrestWarrantCountryId=BA&resultPerPage=20&page=1',
# add more urls here, you could also use a file to store these
# you could also write some logic to get the urls but you'd need to specify that logic
]
def get_data(url):
data = requests.get(url).json()['_embedded']['notices']
# filter the returned fields
return [{k: v for k, v in row.items()
if k in ['forename', 'date_of_birth', 'nationalities', 'name']}
for row in data]
df = pd.DataFrame()
# the data from each url in a dataframe instead of in dictionary for speed
for url in urls:
print(f'Processing {url}')
df = df.append(get_data(url))
# output to csv or whatever (I don't have excel installed so I did csv)
df.to_csv('data.csv')
# df.to_excel('data.xlsx')
Output (data.csv):
,forename,date_of_birth,nationalities,name
0,CARLOS LEOPOLDO,1971/10/31,['US'],ALVAREZ
1,MOHAMED ABDIAZIZ,1974/01/01,"['SO', 'ET']",KEROW
2,SEUXIS PAUCIS,1966/07/30,['CO'],HERNANDEZ-SOLARTE
3,JOHN G.,1966/10/20,"['PH', 'US']",PANALIGAN
4,SOFYAN ISKANDAR,1968/04/04,['ID'],NUGROHO
5,SOLOMON ANTHONY,1965/02/05,['TZ'],BANDIHO
6,ROLAND,1969/07/21,"['US', 'DE']",AGUILAR
7,FERNANDO,1972/07/25,['MX'],RODRIGUEZ
8,RAUL,1966/12/08,['US'],ORTEGA
9,DANIEL,1962/08/30,['US'],LEIJA
10,FRANCISCO,1961/10/23,['EC'],MARTINEZ
11,HORACIO CARLOS,1963/09/10,"['US', 'MX']",TERAN
12,FREDIS RENTERIA,1965/07/07,['CO'],TRUJILLO
13,JUAN EXEQUIEL,1968/08/18,['AR'],HEINZ
14,JIMMY JULIUS,1971/05/03,"['IL', 'US']",KAROW
15,JOHN,1959/10/28,['LY'],LOWRY
16,FIDEL,1959/07/25,['CO'],CASTRO MURILLO
17,EUDES,1968/12/20,['CO'],OJEDA OVANDO
18,BEJARNI,1968/07/12,"['US', 'NI']",RIVAS
19,DAVID,1973/12/02,['GT'],ALDANA
20,SLOBODAN,1952/10/02,['BA'],RIS
21,ALEN,1978/05/27,['BA'],DEMIROVIC
22,DRAGAN,1987/02/09,['ME'],GAJIC
23,JOZO,1968/03/03,"['HR', 'BA']",BRICO
24,ZHIYIN,1962/07/01,['CN'],XU
25,NOVAK,1955/04/10,['BA'],DUKIC
26,NEBOJSA,1973/01/08,['BA'],MILANOVIC
27,MURADIF,1960/04/12,['BA'],HAMZABEGOVIC
28,BOSKO,1940/11/25,"['RS', 'BA']",LUKIC
29,RATKO,1967/05/16,['BA'],SAMAC
30,BOGDAN,1973/04/05,['BA'],BOZIC
31,ZELJKO,1965/10/21,"['BA', 'HR']",RODIN
32,SASA,1973/04/19,['RS'],DUNOVIC
33,OBRAD,1964/03/10,['BA'],OZEGOVIC
34,SENAD,1981/03/01,['BA'],KAJTEZOVIC
35,MLADEN,1973/04/29,"['HR', 'BA']",MARKOVIC
36,PERO,1972/01/29,"['BA', 'HR']",MAJIC
37,MARCO,1968/04/12,"['BA', 'HR']",VIDOVIC
38,MIRSAD,1964/07/27,['HR'],SMAJIC
39,NIJAZ,1961/11/20,,SMAJIC
40,GOJKO,1959/10/08,['BA'],BORJAN
41,DUSAN,1954/06/25,"['RS', 'BA']",SPASOJEVIC
42,MIRSAD,1991/04/20,['BA'],CERIMOVIC
43,GORAN,1962/01/24,['BA'],TESIC
44,IZET,1970/09/18,"['RS', 'BA']",REDZOVIC
45,DRAGAN,1973/09/30,['BA'],STOJIC
46,MILOJKO,1962/05/19,"['BA', 'RS']",KOVACEVIC
47,DRAGAN,1971/11/07,"['RS', 'BA']",MARJANOVIC
48,ALEKSANDAR,1979/09/22,"['AT', 'BA']",RUZIC
49,MIRKO,1992/04/29,['BA'],ATELJEVIC
50,SLAVOJKA,1967/01/13,['BA'],MARINKOVIC
51,SLADAN,1968/03/09,"['BA', 'RS']",TASIC
52,ESED,1963/01/12,['BA'],ABDAGIC
53,DRAGOMIR,1954/01/29,"['RS', 'BA']",KEZUNOVIC
54,NEDZAD,1961/01/01,['BA'],KAHRIMANOVIC
55,NEVEN,1980/10/08,"['BA', 'SI']",STANIC
56,VISNJA,1972/04/12,"['RS', 'BA']",ACIMOVIC
57,MLADEN,1974/08/05,"['HR', 'DE', 'BA']",DZIDIC
58,IVICA,1964/12/23,"['BA', 'HR']",KOLOBARA
59,ZORAN,1963/11/08,"['BA', 'RS']",ADAMOVIC
I have an array of strings (stock ticker symbols) that I have scraped from twitter. I scrape stock ticker symbols from one person's feed, however, sometimes the feed will have multiple tweets about the same stock ticker and will therefore repeat multiple times in my array. How do I stop the stock ticker from repeating in my array?
Here is my code
import csv
import urllib.request
from bs4 import BeautifulSoup
twiturl = "https://twitter.com/ACInvestorBlog"
twitpage = urllib.request.urlopen(twiturl)
soup = BeautifulSoup(twitpage,"html.parser")
tweets = [i.text for i in soup.select('a.twitter-cashtag.pretty-link.js-nav b')]
print(tweets)
here is what prints out
['AYTU', 'AYTU', 'AYTU', 'AYTU', 'INDU', 'JPM', 'BAC', 'INPX', 'MSFT', 'SPX', 'HMNY', 'YTEN', 'INPX', 'MACK', 'KDMN', 'AMBA', 'KDMN', 'KDMN', 'MACK']
use set comprehension instead of the list comprehension that you're using:
tweets = {i.text for i in soup.select('a.twitter-cashtag.pretty-link.js-nav b')}
you can transform your set to list using the code below, if you need to
tweets = list(tweets)
You can use an empty dictionary.
In the loop, you can perform a check:
if the dictionary does not contain key of the current element then insert it to tweets and the dictionary.
You can do a simple check on each iteration of the for loop:
tweets = []
for i in soup.select('a.twitter-cashtag.pretty-link.js-nav b'):
if i.text not in tweets:
tweets.append(i.text)
print(tweets)