Unable to get help on retrieve data from Nasdaq in Python - python

I am planning to do some financial research and learning using data from the NASDAQ.
I want to retrieve data from Nasdaq such that the header has the following:
Stock Symbol
Company Name
Last Sale
Market Capitalization
IPO
Year
Sector
Industry
Last Update
And I used Python code to get the "list of companies and ticker names" using:
import pandas as pd
import json
PACKAGE_NAME = 'nasdaq-listings'
PACKAGE_TITLE = 'Nasdaq Listings'
nasdaq_listing = 'ftp://ftp.nasdaqtrader.com/symboldirectory/nasdaqlisted.txt'# Nasdaq only
def process():
nasdaq = pd.read_csv(nasdaq_listing,sep='|')
nasdaq = _clean_data(nasdaq)
# Create a few other data sets
nasdaq_symbols = nasdaq[['Symbol','Company Name']] # Nasdaq w/ 2 columns
# (dataframe, filename) datasets we will put in schema & create csv
datasets = [(nasdaq,'nasdaq-listed'), (nasdaq_symbols,'nasdaq-listed-symbols')]
for df, filename in datasets:
df.to_csv('data/' + filename + '.csv', index=False)
with open("datapackage.json", "w") as outfile:
json.dump(_create_datapackage(datasets), outfile, indent=4, sort_keys=True)
def _clean_data(df):
# TODO: do I want to save the file creation time (last row)
df = df.copy()
# Remove test listings
df = df[df['Test Issue'] == 'N']
# Create New Column w/ Just Company Name
df['Company Name'] = df['Security Name'].apply(lambda x: x.split('-')[0]) #nasdaq file uses - to separate stock type
#df['Company Name'] = TODO, remove stock type for otherlisted file (no separator)
# Move Company Name to 2nd Col
cols = list(df.columns)
cols.insert(1, cols.pop(-1))
df = df.loc[:, cols]
return df
def _create_file_schema(df, filename):
fields = []
for name, dtype in zip(df.columns,df.dtypes):
if str(dtype) == 'object' or str(dtype) == 'boolean': # does datapackage.json use boolean type?
dtype = 'string'
else:
dtype = 'number'
fields.append({'name':name, 'description':'', 'type':dtype})
return {
'name': filename,
'path': 'data/' + filename + '.csv',
'format':'csv',
'mediatype': 'text/csv',
'schema':{'fields':fields}
}
def _create_datapackage(datasets):
resources = []
for df, filename in datasets:
resources.append(_create_file_schema(df,filename))
return {
'name': PACKAGE_NAME,
'title': PACKAGE_TITLE,
'license': '',
'resources': resources,
}
process()
Now for each of these symbols, I want to get the other data (as in above).
Is there anyway I could do this?

Have you taken a look at pandas-datareader? You maybe able to get the other data from there. It has multiple data sources, such as Google, Yahoo Finance,
http://pandas-datareader.readthedocs.io/en/latest/remote_data.html#remote-data-google

Related

How to setup Pandas groupby into subplots of tables?

So I currently have what is above.
I've managed to separate them into categories using groupby but now I would like to put them in a subplot of tables.
##open comma separated file and the columns Name, In Stock, committed, reorder point
file = pd.read_csv('Katana/InventoryItems-2022-01-06-09_10.csv',
usecols=['Name','In stock','Committed', 'Reorder point','Category'])
##take the columns and put them in to a list
Name = file['Name'].tolist()
InStock = file['In stock'].tolist()
Committed = file['Committed'].tolist()
ReorderPT = file['Reorder point'].tolist()
Category = file['Category'].tolist()
##take the lists and change them into appropriate type of data
inStock = [int(float(i)) for i in InStock]
commited = [int(float(i)) for i in Committed]
reorderpt = [int(float(i)) for i in ReorderPT]
##have the liss with correct data type and arrange them
inventory = {'Name': Name,
'In stock': inStock,
'Committed': commited,
'Reorder point': reorderpt,
'Category': Category
}
##take the inventory arrangement and display them into a table
frame = DataFrame(inventory)
grouped = frame.groupby(frame.Category)
df_elec = grouped.get_group('Electronics')
df_bedp = grouped.get_group('Bed Packaging')
df_fil = grouped.get_group('Filament')
df_fast = grouped.get_group('Fasteners')
df_kit = grouped.get_group('Kit Packaging')
df_pap = grouped.get_group('Paper')
Try something along the lines of:
import matplotlib.pyplot as plt
fig,axs = plt.subplots(nrows=6,ncols=1)
for ax,data in zip(axs,[df_elec,df_bedp,df_fil,df_fast,df_kit,df_pap]):
data.plot(ax=ax,table=True)

Read json data from covid19 api using python

I was trying to import timeseries data from link Covid_data to get the daily historical and 7 day moving average data.But my code doesn't work. I am new to this so maybe my key value pair is not correct. The structure of the file is given here json_structure_link.
My Code
import requests
import pandas as pd
response = requests.get("https://api.covid19india.org/v4/min/timeseries.min.json")
if response.status_code == 200:
historical_day_numbers = response.json()
DATE = []
STATE = []
TOTAL_CASES = []
RECOVERED = []
DECEASED = []
TESTED = []
VACCINATED = []
for state in historical_day_numbers.keys():
STATE.append(state)
DATE.append(historical_day_numbers[state]["dates"])
TOTAL_CASES.append(historical_day_numbers[state]["dates"]["delta"]["confirmed"])
RECOVERED.append(historical_day_numbers[state]["dates"]["delta"]["recovered"])
DECEASED.append(historical_day_numbers[state]["dates"]["delta"]["deceased"])
TESTED.append(historical_day_numbers[state]["dates"]["delta"]["tested"])
VACCINATED.append(historical_day_numbers[state]["dates"]["delta"]["vaccinated"])
Covid19_historical_data = pd.DataFrame(
{
"STATE/UT": STATE,
"DATE": DATE,
"TOTAL_CASES": TOTAL_CASES,
"RECOVERED": RECOVERED,
"DECEASED": DECEASED,
"TESTED": TESTED,
"VACCINATED": VACCINATED,
}
)
#print(data.head())
else:
print("Error while calling API: {}".format(response.status_code, response.reason))
The error I am getting
KeyError: 'delta'
But I see the delta present.
historical_day_numbers[state]['dates'].keys()
Output: dict_keys(['2020-04-06', '2020-04-07', '2020-04-08', '2020-04-09', '2020-04-10', '2020-04-11', '2020-04-12', '2020-04-13', '2020-04-14', '2020-04-15', '2020-04-16', '2020-04-17', '2020-04-18', '2020-04-19', '2020-04-20', '2020-04-21',...])
When you type, you will realize that there is a key for each date and there is no key called 'delta' here.
If you edit your code as follows, you will not get this error.
historical_day_numbers[state]['dates']['2021-07-25']['delta']

How do I create a loop such that I get all the queries into one csv in through python?

I have created a function that fetches price, rating, etc after it hits an API:
def is_priced(business_id):
try:
priced_ind = get_business(API_KEY, business_id)
priced_ind1 = priced_ind['price']
except:
priced_ind1 = 'None'
return priced_ind1
priced_ind = is_priced(b_id)
print(priced_ind)
Similar for rating
def is_rated(business_id):
try:
rated_ind = get_business(API_KEY, business_id)
rated_ind1 = rated_ind['rating']
except:
rated_ind1 = 'None'
return rated_ind1
However, I want my function to loop through the business names I have in my CSV file and catch all this data and export it to a new csv file with these two parameters beside the names of the business.
The CSV file has info on the name of the business along with its address,city,state,zip and country
Eg:
Name address city state zip country
XYZ(The) 5* WE 223899th St. New York NY 19921 US
My output:
Querying https://api.xyz.com/v3/businesses/matches ...
True
Querying https://api.xyz.com/v3/businesses/matches ...
4.0
Querying https://api.xyz.com/v3/businesses/matches ...
$$
Querying https://api.xyz.com/v3/businesses/matches ...
Querying https://api.xyz.com/v3/businesses/matches ...
The real issue is my output only returns business id in the csv. and the rating etc as u see is just returned in the console. how do I set a loop such that it returns for all the businesses the info i desire into a single CSV?
The csv module is useful for this sort of thing e.g.
import csv
with open('f.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
with open('tmp.csv', 'w') as output:
writer = csv.writer(output)
for row in reader:
business_id = row[0]
row.append(get_price_index(business_id))
row.append(get_rate_index(business_id))
writer.writerow(row)
You can read the business names from the CSV file, iterate over them using a for loop, hit the API and store the results, and write to a new CSV file.
import csv
data = []
with open('businesses.csv') as fp:
# skip header line
header = next(fp)
reader = csv.reader(fp)
for row in reader:
b_name = reader[0]
# not sure how you get the business ID:
b_id = get_business_id(b_name)
p = is_priced(b_id)
r = is_rated(b_id)
out.append((b_name, p, r))
# write out the results
with open('business_data.csv', 'w') as fp:
writer = csv.writer(fp)
writer.writerow(['name', 'price', 'rating'])
for row in data:
writer.writerow(row)
You can do this easily using pandas:
import pandas as pd
csv = pd.read_csv('your_csv.csv', usecols=['business_name']) # since you only need the name
# you'll receive business_name in your functions
csv = csv.apply(is_priced, axis=1)
csv = csv.apply(is_rated, axis=1)
csv.to_csv('result.csv', index=False)
All you have to do in your functions is:
def is_priced(row):
business_name = row['business_name']
business_id = ??
...

save two list in one json file

I'm getting data with two lists and I want to save both of them in one single json file can someone help me.
I'm using selenium
def get_name(self):
name = []
name = self.find_elements_by_class_name ('item-desc')
price = []
price = self.find_elements_by_class_name ('item-goodPrice')
for names in name :
names = (names.text)
#print names
for prices in price :
prices = (prices.text)
#print price
I would create a dictionary and then JSON dumps
An example could be:
import json
def get_name(self):
names = [ name.text for name in self.find_elements_by_class_name('item-desc') ]
prices = [ price.text for price in self.find_elements_by_class_name('item-goodPrice')]
with open('output-file-name.json', 'w') as f:
f.write(json.dumps({'names': names, 'prices': prices}))
EDIT: In the first version of the answer I was only creating the JSON, if you want to create a file as well, you should include what suggested by #Andersson comment

In Python, trying to convert geocoded tsv file into geojson format

trying to convert a geocoded TSV file into JSON format but i'm having trouble with it. Here's the code:
import geojson
import csv
def create_map(datafile):
geo_map = {"type":"FeatureCollection"}
item_list = []
datablock = list(csv.reader(datafile))
for i, line in enumerate(datablock):
data = {}
data['type'] = 'Feature'
data['id'] = i
data['properties']={'title': line['Movie Title'],
'description': line['Amenities'],
'date': line['Date']}
data['name'] = {line['Location']}
data['geometry'] = {'type':'Point',
'coordinates':(line['Lat'], line['Lng'])}
item_list.append(data)
for point in item_list:
geo_map.setdefault('features', []).append(point)
with open("thedamngeojson.geojson", 'w') as f:
f.write(geojson.dumps(geo_map))
create_map('MovieParksGeocode2.tsv')
I'm getting a TypeError:list indices must be integers, not str on the data['properties'] line but I don't understand, isn't that how I set values to the geoJSON fields?
The file I'm reading from has values under these keys: Location Movie Title Date Amenities Lat Lng
The file is viewable here: https://github.com/yongcho822/Movies-in-the-park/blob/master/MovieParksGeocodeTest.tsv
Thanks guys, much appreciated as always.
You have a couple things going on here that need to get fixed.
1.Your TSV contains newlines with double quotes. I don't think this is intended, and will cause some problems.
Location Movie Title Date Amenities Formatted_Address Lat Lng
"
Edgebrook Park, Chicago " A League of Their Own 7-Jun "
Family friendly activities and games. Also: crying is allowed." Edgebrook Park, 6525 North Hiawatha Avenue, Chicago, IL 60646, USA 41.9998876 -87.7627672
"
2.You don't need the geojson module to dump out JSON - which is all GeoJSON is. Just import json instead.
3.You are trying to read a TSV, but you don't include the delimiter=\t option that is needed for that.
4.You are trying to read keys off the rows, but you aren't using DictReader which does that for you.Hence the TypeError about indices you mention above.
Check out my revised code block below..you still need to fix your TSV to be a valid TSV.
import csv
import json
def create_map(datafile):
geo_map = {"type":"FeatureCollection"}
item_list = []
with open(datafile,'r') as tsvfile:
reader = csv.DictReader(tsvfile,delimiter='\t')
for i, line in enumerate(reader):
print line
data = {}
data['type'] = 'Feature'
data['id'] = i
data['properties']={'title': line['Movie Title'],
'description': line['Amenities'],
'date': line['Date']}
data['name'] = {line['Location']}
data['geometry'] = {'type':'Point',
'coordinates':(line['Lat'], line['Lng'])}
item_list.append(data)
for point in item_list:
geo_map.setdefault('features', []).append(point)
with open("thedamngeojson.geojson", 'w') as f:
f.write(json.dumps(geo_map))
create_map('MovieParksGeocode2.tsv')

Categories

Resources