Does anybody know how to get yahoo finance csv directly into python?
The problem is that when i try to get the data with this (example) link:
http://real-chart.finance.yahoo.com/table.csv?s=WU&a=4&b=20&c=2015&d=05&e=21&f=2016&g=d&ignore=.csv'
It gives a pop-up asking if i want to download the csv-file. This causes it to bugg when i try to read it in to python. My scripts is:
today = datetime.date.today()
def get_url(stock='GOOG', START_date = str(int(str(today).split('-')[0])-1)+
'-' +str(int(str(today).split('-')[1])-1) + ('-') +
str(int(str(today).split('-')[2])-1), END_date= str(today)):
baseurl = 'http://real-chart.finance.yahoo.com/table.csv?'
stock = 's=WU'
FROM_date = ('&a=' + START_date.split('-')[1] + '&b=' +
START_date.split('-')[2] + '&c=' +
START_date.split('-')[0])
TO_date = ('&d=' + END_date.split('-')[1] + '&e=' +
END_date.split('-')[2] + '&f=' + END_date.split('-')[0])
url = baseurl + stock + FROM_date + TO_date + '&g=d&ignore=.csv'
return url
rawdate = []
with open(get_url()) as csvfile:
reader = csv.reader(csvfile, delimiter = ",")
for row in reader:
rawdata.append(row)
If i download the csv first i can read it into python, but I want to get to access the csv file directly without having to download it first. Is this possible? alternatively have the csv as temp.
Thanks!
I would recommend that you use pandas. Here is a link.
import pandas.io.data as web
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)
f = web.DataReader("F", 'yahoo', start, end)
f.ix['2010-01-04']
Out[6]:
Open 1.017000e+01
High 1.028000e+01
Low 1.005000e+01
Close 1.028000e+01
Volume 6.085580e+07
Adj Close 8.755953e+00
Name: 2010-01-04 00:00:00, dtype: float64
Try it this way.
in this file "C:/Users/your_path/Desktop/symbols/tickers.txt"
you have the following tickers
ibm
sbux
msft
"""
import urllib
import re
import json
symbolslist = open("C:/Users/rshuell001/Desktop/symbols/tickers.txt").read()
symbolslist = symbolslist.split("\n")
for symbol in symbolslist:
myfile = open("C:/Users/rshuell001/Desktop/symbols/" +symbol +".txt", "w+")
myfile.close()
htmltext = urllib.urlopen("http://www.bloomberg.com/markets/chart/data/1D/"+ symbol+ ":US")
data = json.load(htmltext)
datapoints = data["data_values"]
myfile = open("C:/Users/rshuell001/Desktop/symbols/" +symbol +".txt", "a")
for point in datapoints:
myfile.write(str(symbol+","+str(point[0])+","+str(point[1])+"\n"))
myfile.close()
That should give you what you want.
Related
I need to get date from file name in python code. I found many solutions, but from fixed name and date. But I dont know what the name of the file will be, date is changing. How to do that?
I have a code which is working for known file name (current date), file is called micro20230125.txt
import re
import os
from datetime import datetime
header = """#SANR0000013003;*;#CNR0010;*;#RINVAL-777.0;*;"""
current_timestamp = datetime.today().strftime('%Y%m%d')
input_file = "micro" + current_timestamp + ".txt"
output_file = os.path.splitext(input_file)[0] + ".zrxp"
with open(input_file, "r") as f:
first_line = f.readline().strip('\n')
text = re.search('(\d{6})', first_line).group(1)
text = header + "\n" + text + "\n"
with open(output_file, "w") as f:
f.write(text)
print(text)
`
but I dont need current date. I will get file with some random date, so how can I extract unknown date from file name? How to change this variable current_timestamp?
I tried to use regex but I messed something up
EDIT: DIFF CODE, SIMILAR PROBLEM:
I was dealing with this code and then realized: python doesnt know what those numbers in name represent, so why treat them like a date and complicate things? Those are just numbers. As a matter of fact, I need those numbers as long as full file name. So I came up with different code.
import re
import os
def get_numbers_from_filename(filename):
return re.search(r'\d+', filename).group(0) #returns only numbers
for filename in os.listdir("my path"):
print (get_numbers_from_filename(filename))
def get_numbers_from_filename(filename):
return re.search(r"(.)+", filename).group(0) #returns all name
for filename in os.listdir("my path"):
print(get_numbers_from_filename(filename))
file was: micro20230104.txt
and result is:
result
Now, I want to use that result, dont want to print it.
No matter how I get that returns me error.
import re
import os
def get_numbers_from_filename(filename):
return re.search(r"(.)+", filename).group(0)
for filename in os.listdir("my path"):
print(get_numbers_from_filename(filename))
m = get_numbers_from_filename(filename)
output_file = os.path.splitext(m)[0] + ".zrxp"
with open(m, "r") as f:
first_line = f.readline().strip('\n')
text = re.search('(\d{6})', first_line).group(1)
text = header + "\n" + text + "\n"
with open(output_file, "w") as f:
f.write(text)
print(text)
but it it says error
error:there is no such file
what to do? what am I doing wrong?
Well, in case all the files have the format 'micro[YearMonthDay].txt', you can try this solution:
import os
from datetime import datetime
header = """#SANR0000013003;*;#CNR0010;*;#RINVAL-777.0;*;"""
#Change the variable folder_path for your actual directory path.
folder_path = "\\path_files\\"
filenames = []
# Iterate directory
for path in os.listdir(folder_path):
# check if current path is a file
if os.path.isfile(os.path.join(folder_path, path)):
filenames.append(path)
dates = []
for filename in filenames:
# First solution:
filename = filename.replace('micro', '')
filename = filename.replace('.txt', '')
date = datetime.strptime(filename, "%Y%m%d")
# Second solution:
# date = datetime.strptime(filename, "micro%Y%m%d.txt")
dates.append(date)
for date in dates:
print(date.strftime("%Y/%m/%d"))
with open(f'.\\micro{date.strftime("%Y/%m/%d")}.txt', "r") as f:
first_line = f.readline().strip('\n')
text = re.search('(\d{6})', first_line).group(1)
text = header + "\n" + text + "\n"
with open(output_file, "w") as f:
f.write(text)
print(text)
Use the solution you prefer and comment the other one.
Testing:
Text files for test
Code
Result
I hope I could help! :D
I'm in Windows and I have a script called csv.py, I recently installed Pandas, and created anotherscript.py. The only code I have under anotherscript.py is import pandas.
When I run py anotherscript.py all it is doing is running csv.py. I have renamed csv.py to something else and it is still getting called.
If I removed import pandas, it works. If I move anotherscript.py to a different folder it works fine. It looks like something is cached.
What am I missing???
anotherscript.py
import pandas
cmd call and output
C:\Users\*****>py anotherscript.py
0 634
1 Saturday, January 8, 2022
2 15:00 EST
.
.
.
<cal file created and uploaded>
csv.py
This script scrapes a webpage and creates a calendar file
import openpyxl
from openpyxl import load_workbook
from ics import Calendar, Event
from datetime import datetime
import pytz
from ftplib import FTP
import ftplib
import urllib.request
import requests
response = requests.post("urlRetrated")
with open('u7.xlsx', 'wb') as s:
s.write(response.content)
wb_obj = openpyxl.load_workbook('u7.xlsx')
worksheet = wb_obj.active
data = []
c = Calendar()
EST = pytz.timezone('US/Eastern')
for count, row_cells in enumerate(worksheet.iter_rows(min_row=2,values_only=True)):
for count, cell in enumerate(row_cells):
data.append(cell)
date_and_time = data[1] + " " + data[2].strip('EST ')
game_datetime = datetime.strptime(date_and_time, '%A, %B %d, %Y %H:%M')
if 'SoccerTeam' in data[3]:
data[3] = 'William'
if 'SoccerTeam' in data[5]:
data[5] = 'William'
game_title = data[3] + " Vs " + data[5]
game_location = data[6]
e = Event()
e.name = game_title
e.begin = game_datetime.replace(tzinfo=EST)
e.location = game_location
e.created = datetime.today()
c.events.add(e)
with open('marcos.ics', 'w', newline='') as f:
f.write(str(c))
f.close()
for index, value in enumerate(data):
print(index, value)
data = []
user = '****'
pas = '*****'
try:
ftp = ftplib.FTP('*****', user, pas)
print(ftp.getwelcome())
ftp.cwd('public_html')
file = open('will.ics','rb')
ftp.storbinary('STOR will.ics', file)
file.close()
ftp.quit()
except ftplib.error_perm as error:
if error:
print ('Login Failed')
Thanks
That's the clearest I could make my title.
I have some code that reads in two CSV files. One CSV file has the data, and the other has information about this data... let's call it config.
data_jan2018.csv
data_feb2018.csv
config.csv
Now, config has columns for which dates I want to read in. I'm reading these in as follows:
data_config = pd.read_csv(loc + data_config_name)
# Read in dates from config file
dates = data_config.drop_duplicates('Start_date')
dates = dates[['Start_date','End_date']]
print(dates)
Start_date = dates['Start_date'].tolist()
End_date = dates['End_date'].tolist()
StartDate = ''.join(Start_date)
EndDate = ''.join(End_date)
print(StartDate)
print(EndDate)
date1 = datetime.strptime(StartDate, '%d%b%Y')
date2 = datetime.strptime(EndDate, '%d%b%Y')
# Loop across months
for dt in rrule.rrule(rrule.MONTHLY, dtstart=date1, until=date2):
print(dt)
reporting_date = dt.strftime('%d%b%Y')
reporting_date_fmt = dt.strftime(date_format)
print('Formatted reporting_date is ' + reporting_date_fmt)
source_data = pd.read_csv(loc + source_data_name)
source_data.columns = source_data.columns.str.lower()
As you can see, I want to read in a csv file called source_data_name. However, this file name contains my formatted reporting_date_fmt. I want the programmer to edit the file name at the beginning of the code so I have these line right at the top:
date_format = '%b%Y'
source_data_name = 'g4_RWA_sample_' + reporting_date_fmt + '.csv'
But of course this flags a warning, telling me reporting_date_fmt hasn't been created yet. Is there a workaround to this?
Define data name separately at the top of the file, then append the format and extension after the format has been defined.
data_name = 'g4_RWA_sample_'
...
source_data_name = data_name + reporting_date_fmt + '.csv'
import pandas_datareader.data as web
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)
f = web.DataReader("ugaz", 'yahoo', start, end)
f.ix['2010-01-04']
The above is the code I am currently trying to use to get data from Yahoo Finance. when I run the code I get this in the python shell 3.5.2 window
==================== RESTART: C:/Users/Zac/Desktop/ll.py ====================
That's all that I get. I am using python 3.5 on windows 10
The first datapoint is
f.ix['2012-02-08']
Open 48.360002
High 48.360002
Low 48.360002
Close 48.360002
Volume 0.000000
Adj Close 6045.000287
Name: 2012-02-08 00:00:00, dtype: float64
There are so many ways to download financial data, or any kind of data, from the web. The script below downloads stock prices and saves everything to a CSV file.
import urllib2
listOfStocks = ["AAPL", "MSFT", "GOOG", "FB", "AMZN"]
urls = []
for company in listOfStocks:
urls.append('http://real-chart.finance.yahoo.com/table.csv?s=' + company + '&d=6&e=28&f=2015&g=m&a=11&b=12&c=1980&ignore=.csv')
Output_File = open('C:/Users/your_path/Historical_Prices.csv','w')
New_Format_Data = ''
for counter in range(0, len(urls)):
Original_Data = urllib2.urlopen(urls[counter]).read()
if counter == 0:
New_Format_Data = "Company," + urllib2.urlopen(urls[counter]).readline()
rows = Original_Data.splitlines(1)
for row in range(1, len(rows)):
New_Format_Data = New_Format_Data + listOfStocks[counter] + ',' + rows[row]
Output_File.write(New_Format_Data)
Output_File.close()
The script below will download multiple stock tickers into one folder.
import urllib
import re
import json
symbolslist = open("C:/Users/rshuell001/Desktop/symbols/tickers.txt").read()
symbolslist = symbolslist.split("\n")
for symbol in symbolslist:
myfile = open("C:/Users/your_path/Desktop/symbols/" +symbol +".txt", "w+")
myfile.close()
htmltext = urllib.urlopen("http://www.bloomberg.com/markets/chart/data/1D/"+ symbol+ ":US")
data = json.load(htmltext)
datapoints = data["data_values"]
myfile = open("C:/Users/rshuell001/Desktop/symbols/" +symbol +".txt", "a")
for point in datapoints:
myfile.write(str(symbol+","+str(point[0])+","+str(point[1])+"\n"))
myfile.close()
Finally...this will download prices for multiple stock tickers...
import urllib
import re
symbolfile = open("C:/Users/your_path/Desktop/symbols/amex.txt")
symbollist = symbolfile.read()
newsymbolslist = symbollist.split("\n")
i=0
while i<len(newsymbolslist):
url = "http://finance.yahoo.com/q?s=" + newsymbolslist[i] + "&ql=1"
htmlfile = urllib.urlopen(url)
htmltext = htmlfile.read()
regex = '<span id="yfs_l84_' + newsymbolslist[i] + '">(.+?)</span>'
pattern = re.compile(regex)
price = re.findall(pattern,htmltext)
print "the price of ", newsymbolslist[i] , "is", price[0]
i+=1
# Make sure you place the 'amex.txt' file in 'C:\Python27\'
I wrote a book about these kinds of things, and lots of other stuff. You can find it using the URL below.
https://www.amazon.com/Automating-Business-Processes-Reducing-Increasing-ebook/dp/B01DJJKVZC/ref=sr_1_1?
I'm working with python console of QGIS 2.8.1. I want to convert many xls files into csv format using python. My input directory is: D:\PATRICIA\TESTE\XLS and output is: D:\PATRICIA\TESTE\CSV2. I wrote this code based in your suggestions (Converting xls file into csv/txt file in Python) and developed it in differente files with different dates 1999/01/2 until 1999/01/31 as: RR_1999_1_2.xls, RR_1999_1_3.xls, ... RR_1999_1_31.xls
I don't know why my script doesn´t works. It means that nothing happened!
My script is:
import xlrd
import csv
import datetime as dt
from datetime import timedelta
#initial and final dates
data1='19990102'
data2='19990131'
anoi = int(data1[:4])
mesi = int(data1[4:6])
diai = int(data1[6:8])
anof = int(data2[:4])
mesf = int(data2[4:6])
diaf = int(data2[6:8])
start_date = dt.datetime(anoi, mesi, diai)
end_date = dt.datetime(anof, mesf, diaf)
total_days = (end_date - start_date).days + 1
for day in xrange(0, total_days):
current_date = (start_date + dt.timedelta(days = day)).date()
file_date = str(current_date.year)+'_'+str(current_date.month)+'_'+str(current_date.day)
srt1='D:/PATRICIA/TESTE/XLS/RR_'+file_date+'.xls'
srt2='D:/PATRICIA/TESTE/CSV2/RR_'+file_date+'.csv'
def xls_to_csv():
x = xlrd.open_workbook(str1)
x1 = x.sheet_by_name('Sheet1')
csvfile = open(str2, 'wb')
writecsv = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
for rownum in xrange(sh.nrows):
writecsv.writerow(x1.row_values(rownum))
csvfile.close()
Any help?
Thanks.
Unless I've missed an important thing, you declare the function xls_to_csv in a loop, but never call it. The general structure of your script should be:
#initializations
data1='19990102'
...
total_days = (end_date - start_date).days + 1
# function definition:
def xls_to_csv(str1, str2):
x = xlrd.open_workbook(str1)
x1 = x.sheet_by_name('Sheet1')
csvfile = open(str2, 'wb')
writecsv = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
for rownum in xrange(sh.nrows):
writecsv.writerow(x1.row_values(rownum))
csvfile.close()
# loop
for day in xrange(0, total_days):
current_date = (start_date + dt.timedelta(days = day)).date()
file_date = str(current_date.year)+'_'+str(current_date.month)+'_'+str(current_date.day)
srt1='D:/PATRICIA/TESTE/XLS/RR_'+file_date+'.xls'
srt2='D:/PATRICIA/TESTE/CSV2/RR_'+file_date+'.csv'
xls_to_csv(srt1, srt2) # function call