I've found a code here pretty good to retrieve some data I need (Python yahoo finance error market_cap=int(data.get_quote_yahoo(str)['marketCap']) TypeError: 'int' object is not callable):
tickers=["AAPL","GOOG","RY","HPQ"]
# Get market cap (not really necessary for you)
market_cap_data = web.get_quote_yahoo(tickers)['marketCap']
# Get the P/E ratio directly
pe_data = web.get_quote_yahoo(tickers)['trailingPE']
# print stock and p/e ratio
for stock, pe in zip(tickers, pe_data):
print(stock, pe)
# More keys that can be used
['language', 'region', 'quoteType', 'triggerable', 'quoteSourceName',
'currency', 'preMarketChange', 'preMarketChangePercent',
'preMarketTime', 'preMarketPrice', 'regularMarketChange',
'regularMarketChangePercent', 'regularMarketTime', 'regularMarketPrice',
'regularMarketDayHigh', 'regularMarketDayRange', 'regularMarketDayLow',
'regularMarketVolume', 'regularMarketPreviousClose', 'bid', 'ask',
'bidSize', 'askSize', 'fullExchangeName', 'financialCurrency',
'regularMarketOpen', 'averageDailyVolume3Month',
'averageDailyVolume10Day', 'fiftyTwoWeekLowChange',
'fiftyTwoWeekLowChangePercent', 'fiftyTwoWeekRange',
'fiftyTwoWeekHighChange', 'fiftyTwoWeekHighChangePercent',
'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'dividendDate',
'earningsTimestamp', 'earningsTimestampStart', 'earningsTimestampEnd',
'trailingAnnualDividendRate', 'trailingPE',
'trailingAnnualDividendYield', 'marketState', 'epsTrailingTwelveMonths',
'epsForward', 'sharesOutstanding', 'bookValue', 'fiftyDayAverage',
'fiftyDayAverageChange', 'fiftyDayAverageChangePercent',
'twoHundredDayAverage', 'twoHundredDayAverageChange',
'twoHundredDayAverageChangePercent', 'marketCap', 'forwardPE',
'priceToBook', 'sourceInterval', 'exchangeDataDelayedBy', 'tradeable',
'firstTradeDateMilliseconds', 'priceHint', 'exchange', 'shortName',
'longName', 'messageBoardId', 'exchangeTimezoneName',
'exchangeTimezoneShortName', 'gmtOffSetMilliseconds', 'market',
'esgPopulated', 'price']
I would like to retrieve most of the commented fields at the end of the previous code, but I've done this so far:
import pandas_datareader as web
tickers = ["AAPL", "GOOG", "RY", "SAB.MC"]
market_cap_data = web.get_quote_yahoo(tickers)['marketCap']
pe_data = web.get_quote_yahoo(tickers)['trailingPE']
fiftytwo_low_data = web.get_quote_yahoo(tickers)['fiftyTwoWeekLowChangePercent']
for stock, mcap, pe, fiftytwo_low in zip(tickers, market_cap_data, pe_data, fiftytwo_low_data):
print(stock, mcap, pe, fiftytwo_low)
Obviously I could continue with my brute force, but do you know any way to make the code more elegant to retrieve the whole string of fields with column names?
['language', 'region', 'quoteType', 'triggerable', 'quoteSourceName',
'currency', 'preMarketChange', 'preMarketChangePercent',
'preMarketTime', 'preMarketPrice', 'regularMarketChange',
'regularMarketChangePercent', 'regularMarketTime', 'regularMarketPrice',
'regularMarketDayHigh', 'regularMarketDayRange', 'regularMarketDayLow',
'regularMarketVolume', 'regularMarketPreviousClose', 'bid', 'ask',
'bidSize', 'askSize', 'fullExchangeName', 'financialCurrency',
'regularMarketOpen', 'averageDailyVolume3Month',
'averageDailyVolume10Day', 'fiftyTwoWeekLowChange',
'fiftyTwoWeekLowChangePercent', 'fiftyTwoWeekRange',
'fiftyTwoWeekHighChange', 'fiftyTwoWeekHighChangePercent',
'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'dividendDate',
'earningsTimestamp', 'earningsTimestampStart', 'earningsTimestampEnd',
'trailingAnnualDividendRate', 'trailingPE',
'trailingAnnualDividendYield', 'marketState', 'epsTrailingTwelveMonths',
'epsForward', 'sharesOutstanding', 'bookValue', 'fiftyDayAverage',
'fiftyDayAverageChange', 'fiftyDayAverageChangePercent',
'twoHundredDayAverage', 'twoHundredDayAverageChange',
'twoHundredDayAverageChangePercent', 'marketCap', 'forwardPE',
'priceToBook', 'sourceInterval', 'exchangeDataDelayedBy', 'tradeable',
'firstTradeDateMilliseconds', 'priceHint', 'exchange', 'shortName',
'longName', 'messageBoardId', 'exchangeTimezoneName',
'exchangeTimezoneShortName', 'gmtOffSetMilliseconds', 'market',
'esgPopulated', 'price']
thanks
Using the set, you can get all the items that can be retrieved by the ticker for the initial set, and using the union set, you can also add in a list, so you can get all the item names that have a value in the issue you want to retrieve.
import pandas_datareader as web
import pandas as pd
tickers = ["AAPL", "GOOG", "RY", "SAB.MC"]
names = set()
for t in tickers:
market_cap_data = web.get_quote_yahoo(t)
names |= set(market_cap_data.columns.to_list())
names
{'ask',
'askSize',
'averageAnalystRating',
'averageDailyVolume10Day',
'averageDailyVolume3Month',
'bid',
'bidSize',
'bookValue',
'cryptoTradeable',
'currency',
'customPriceAlertConfidence',
'displayName',
...
'trailingAnnualDividendYield',
'trailingPE',
'triggerable',
'twoHundredDayAverage',
'twoHundredDayAverageChange',
'twoHundredDayAverageChangePercent',
'typeDisp'}
I know this post is pretty old, but I just came across it now. Check out the 'yfinance' library. There's all kinds of stuff available over there!!
import pandas_datareader as web
import pandas as pd
df = web.DataReader('AAPL', data_source='yahoo', start='2011-01-01', end='2021-01-12')
df.head()
import yfinance as yf
aapl = yf.Ticker("AAPL")
aapl
# get stock info
aapl.info
# get historical market data
hist = aapl.history(period="max")
# show actions (dividends, splits)
aapl.actions
# show dividends
aapl.dividends
# show splits
aapl.splits
# show financials
aapl.financials
aapl.quarterly_financials
# show major holders
aapl.major_holders
# show institutional holders
aapl.institutional_holders
# show balance sheet
aapl.balance_sheet
aapl.quarterly_balance_sheet
# show cashflow
aapl.cashflow
aapl.quarterly_cashflow
# show earnings
aapl.earnings
aapl.quarterly_earnings
# show sustainability
aapl.sustainability
# show analysts recommendations
aapl.recommendations
# show next event (earnings, etc)
aapl.calendar
# show ISIN code - *experimental*
# ISIN = International Securities Identification Number
aapl.isin
# show options expirations
aapl.options
# get option chain for specific expiration
opt = aapl.option_chain('YYYY-MM-DD')
Result:
{'zip': '95014',
'sector': 'Technology',
'fullTimeEmployees': 164000,
'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. It also sells various related services. In addition, the company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. Further, it provides AppleCare support and cloud services store services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts. Additionally, the company offers various services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized fitness service; Apple Music, which offers users a curated listening experience with on-demand radio stations; Apple News+, a subscription news and magazine service; Apple TV+, which offers exclusive original content; Apple Card, a co-branded credit card; and Apple Pay, a cashless payment service, as well as licenses its intellectual property. The company serves consumers, and small and mid-sized businesses; and the education, enterprise, and government markets. It distributes third-party applications for its products through the App Store. The company also sells its products through its retail and online stores, and direct sales force; and third-party cellular network carriers, wholesalers, retailers, and resellers. Apple Inc. was incorporated in 1977 and is headquartered in Cupertino, California.',
'city': 'Cupertino',
'phone': '408 996 1010',
'state': 'CA',
'country': 'United States',
'companyOfficers': [],
'website': 'https://www.apple.com',
'maxAge': 1,
'address1': 'One Apple Park Way',
'industry': 'Consumer Electronics',
'ebitdaMargins': 0.33105,
'profitMargins': 0.2531,
'grossMargins': 0.43310001,
'operatingCashflow': 122151002112,
'revenueGrowth': 0.081,
'operatingMargins': 0.30289,
'ebitda': 130541002752,
'targetLowPrice': 122,
'recommendationKey': 'buy',
'grossProfits': 170782000000,
'freeCashflow': 90215251968,
'targetMedianPrice': 180,
'currentPrice': 151.29,
'earningsGrowth': 0.048,
'currentRatio': 0.879,
'returnOnAssets': 0.21214001,
'numberOfAnalystOpinions': 41,
'targetMeanPrice': 178.15,
'debtToEquity': 261.446,
'returnOnEquity': 1.75459,
'targetHighPrice': 214,
'totalCash': 48304001024,
'totalDebt': 132480000000,
'totalRevenue': 394328014848,
'totalCashPerShare': 3.036,
'financialCurrency': 'USD',
'revenuePerShare': 24.317,
'quickRatio': 0.709,
'recommendationMean': 1.9,
'exchange': 'NMS',
'shortName': 'Apple Inc.',
'longName': 'Apple Inc.',
'exchangeTimezoneName': 'America/New_York',
'exchangeTimezoneShortName': 'EST',
'isEsgPopulated': False,
'gmtOffSetMilliseconds': '-18000000',
'quoteType': 'EQUITY',
'symbol': 'AAPL',
'messageBoardId': 'finmb_24937',
'market': 'us_market',
'annualHoldingsTurnover': None,
'enterpriseToRevenue': 6.317,
'beta3Year': None,
'enterpriseToEbitda': 19.081,
'52WeekChange': -0.06042725,
'morningStarRiskRating': None,
'forwardEps': 6.82,
'revenueQuarterlyGrowth': None,
'sharesOutstanding': 15908100096,
'fundInceptionDate': None,
'annualReportExpenseRatio': None,
'totalAssets': None,
'bookValue': 3.178,
'sharesShort': 103178670,
'sharesPercentSharesOut': 0.0064999997,
'fundFamily': None,
'lastFiscalYearEnd': 1663977600,
'heldPercentInstitutions': 0.60030997,
'netIncomeToCommon': 99802996736,
'trailingEps': 6.11,
'lastDividendValue': 0.23,
'SandP52WeekChange': -0.15323704,
'priceToBook': 47.60541,
'heldPercentInsiders': 0.00071999995,
'nextFiscalYearEnd': 1727136000,
'yield': None,
'mostRecentQuarter': 1663977600,
'shortRatio': 1.14,
'sharesShortPreviousMonthDate': 1664496000,
'floatShares': 15891414476,
'beta': 1.246644,
'enterpriseValue': 2490915094528,
'priceHint': 2,
'threeYearAverageReturn': None,
'lastSplitDate': 1598832000,
'lastSplitFactor': '4:1',
'legalType': None,
'lastDividendDate': 1667520000,
'morningStarOverallRating': None,
'earningsQuarterlyGrowth': 0.008,
'priceToSalesTrailing12Months': 6.103387,
'dateShortInterest': 1667174400,
'pegRatio': 2.71,
'ytdReturn': None,
'forwardPE': 22.183283,
'lastCapGain': None,
'shortPercentOfFloat': 0.0064999997,
'sharesShortPriorMonth': 103251184,
'impliedSharesOutstanding': 0,
'category': None,
'fiveYearAverageReturn': None,
'previousClose': 150.72,
'regularMarketOpen': 152.305,
'twoHundredDayAverage': 155.0841,
'trailingAnnualDividendYield': 0.005971337,
'payoutRatio': 0.14729999,
'volume24Hr': None,
'regularMarketDayHigh': 152.57,
'navPrice': None,
'averageDailyVolume10Day': 84360340,
'regularMarketPreviousClose': 150.72,
'fiftyDayAverage': 147.0834,
'trailingAnnualDividendRate': 0.9,
'open': 152.305,
'toCurrency': None,
'averageVolume10days': 84360340,
'expireDate': None,
'algorithm': None,
'dividendRate': 0.92,
'exDividendDate': 1667520000,
'circulatingSupply': None,
'startDate': None,
'regularMarketDayLow': 149.97,
'currency': 'USD',
'trailingPE': 24.761045,
'regularMarketVolume': 74496725,
'lastMarket': None,
'maxSupply': None,
'openInterest': None,
'marketCap': 2406736461824,
'volumeAllCurrencies': None,
'strikePrice': None,
'averageVolume': 89929545,
'dayLow': 149.97,
'ask': 150.95,
'askSize': 1000,
'volume': 74496725,
'fiftyTwoWeekHigh': 182.94,
'fromCurrency': None,
'fiveYearAvgDividendYield': 1,
'fiftyTwoWeekLow': 129.04,
'bid': 150.82,
'tradeable': False,
'dividendYield': 0.0061000003,
'bidSize': 1100,
'dayHigh': 152.57,
'coinMarketCapLink': None,
'regularMarketPrice': 151.29,
'preMarketPrice': None,
'logo_url': 'https://logo.clearb
Just pick/choose what you want.
I am trying to use the scrape_linkedin package. I follow the section on the github page on how to set up the package/LinkedIn li_at key (which I paste here for clarity).
Getting LI_AT
Navigate to www.linkedin.com and log in
Open browser developer tools (Ctrl-Shift-I or right click -> inspect element)
Select the appropriate tab for your browser (Application on Chrome, Storage on Firefox)
Click the Cookies dropdown on the left-hand menu, and select the www.linkedin.com option
Find and copy the li_at value
Once I collect the li_at value from my LinkedIn, I run the following code:
from scrape_linkedin import ProfileScraper
with ProfileScraper(cookie='myVeryLong_li_at_Code_which_has_characters_like_AQEDAQNZwYQAC5_etc') as scraper:
profile = scraper.scrape(url='https://www.linkedin.com/in/justintrudeau/')
print(profile.to_dict())
I have two questions (I am originally an R user).
How can I input a list of profiles:
https://www.linkedin.com/in/justintrudeau/
https://www.linkedin.com/in/barackobama/
https://www.linkedin.com/in/williamhgates/
https://www.linkedin.com/in/wozniaksteve/
and scrape the profiles? (In R I would use the map function from the purrr package to apply the function to each of the LinkedIn profiles).
The output (from the original github page) is returned in a JSON style format. My second question is how I can convert this into a pandas data frame (i.e. it is returned similar to the following).
{'personal_info': {'name': 'Steve Wozniak', 'headline': 'Fellow at
Apple', 'company': None, 'school': None, 'location': 'San Francisco
Bay Area', 'summary': '', 'image': '', 'followers': '', 'email': None,
'phone': None, 'connected': None, 'websites': [],
'current_company_link': 'https://www.linkedin.com/company/sandisk/'},
'experiences': {'jobs': [{'title': 'Chief Scientist', 'company':
'Fusion-io', 'date_range': 'Jul 2014 – Present', 'location': 'Primary
Data', 'description': "I'm looking into future technologies applicable
to servers and storage, and helping this company, which I love, get
noticed and get a lead so that the world can discover the new amazing
technology they have developed. My role is principally a marketing one
at present but that will change over time.", 'li_company_url':
'https://www.linkedin.com/company/sandisk/'}, {'title': 'Fellow',
'company': 'Apple', 'date_range': 'Mar 1976 – Present', 'location': '1
Infinite Loop, Cupertino, CA 94015', 'description': 'Digital Design
engineer.', 'li_company_url': ''}, {'title': 'President & CTO',
'company': 'Wheels of Zeus', 'date_range': '2002 – 2005', 'location':
None, 'description': None, 'li_company_url':
'https://www.linkedin.com/company/wheels-of-zeus/'}, {'title':
'diagnostic programmer', 'company': 'TENET Inc.', 'date_range': '1970
– 1971', 'location': None, 'description': None, 'li_company_url':
''}], 'education': [{'name': 'University of California, Berkeley',
'degree': 'BS', 'grades': None, 'field_of_study': 'EE & CS',
'date_range': '1971 – 1986', 'activities': None}, {'name': 'University
of Colorado Boulder', 'degree': 'Honorary PhD.', 'grades': None,
'field_of_study': 'Electrical and Electronics Engineering',
'date_range': '1968 – 1969', 'activities': None}], 'volunteering':
[]}, 'skills': [], 'accomplishments': {'publications': [],
'certifications': [], 'patents': [], 'courses': [], 'projects': [],
'honors': [], 'test_scores': [], 'languages': [], 'organizations':
[]}, 'interests': ['Western Digital', 'University of Colorado
Boulder', 'Western Digital Data Center Solutions', 'NEW Homebrew
Computer Club', 'Wheels of Zeus', 'SanDisk®']}
Firstly, You can create a custom function to scrape data and use map function in Python to apply it over each profile link.
Secondly, to create a pandas dataframe using a dictionary, you can simply pass the dictionary to pd.DataFrame.
Thus to create a dataframe df, with dictionary dict, you can do like this:
df = pd.DataFrame(dict)
I'm new to the concept of generators and I'm struggling with how to apply my changes to the records within the generator object returned from the RISparser module.
I understand that a generator only reads a record at a time and doesn't actually store the data in memory but I'm having a tough time iterating over it effectively and applying my changes.
My changes will involve dropping records that have not got ['doi'] values that are contained within a list of DOIs [doi_match].
doi_match = ['10.1002/14651858.CD008259.pub2','10.1002/14651858.CD011552','10.1002/14651858.CD011990']
Generator object returned form RISparser contains the following information, this is just the first 2 records returned of a few 100. I want to iterate over it and compare the 'doi': key from the generator with the list of DOIs.
{'type_of_reference': 'JOUR', 'title': "The CoRe Outcomes in WomeN's health (CROWN) initiative: Journal editors invite researchers to develop core outcomes in women's health", 'secondary_title': 'Neurourology and Urodynamics', 'alternate_title1': 'Neurourol. Urodyn.', 'volume': '33', 'number': '8', 'start_page': '1176', 'end_page': '1177', 'year': '2014', 'doi': '10.1002/nau.22674', 'issn': '07332467 (ISSN)', 'authors': ['Khan, K.'], 'keywords': ['Bias (epidemiology)', 'Clinical trials', 'Consensus', 'Endpoint determination/standards', 'Evidence-based medicine', 'Guidelines', 'Research design/standards', 'Systematic reviews', 'Treatment outcome', 'consensus', 'editor', 'female', 'human', 'medical literature', 'Note', 'outcomes research', 'peer review', 'randomized controlled trial (topic)', 'systematic review (topic)', "women's health", 'outcome assessment', 'personnel', 'publication', 'Female', 'Humans', 'Outcome Assessment (Health Care)', 'Periodicals as Topic', 'Research Personnel', "Women's Health"], 'publisher': 'John Wiley and Sons Inc.', 'notes': ['Export Date: 14 July 2020', 'CODEN: NEURE'], 'type_of_work': 'Note', 'name_of_database': 'Scopus', 'custom2': '25270392', 'language': 'English', 'url': 'https://www.scopus.com/inward/record.uri?eid=2-s2.0-84908368202&doi=10.1002%2fnau.22674&partnerID=40&md5=b220702e005430b637ef9d80a94dadc4'}
{'type_of_reference': 'JOUR', 'title': "The CROWN initiative: Journal editors invite researchers to develop core outcomes in women's health", 'secondary_title': 'Gynecologic Oncology', 'alternate_title1': 'Gynecol. Oncol.', 'volume': '134', 'number': '3', 'start_page': '443', 'end_page': '444', 'year': '2014', 'doi': '10.1016/j.ygyno.2014.05.005', 'issn': '00908258 (ISSN)', 'authors': ['Karlan, B.Y.'], 'author_address': 'Gynecologic Oncology and Gynecologic Oncology Reports, India', 'keywords': ['clinical trial (topic)', 'decision making', 'Editorial', 'evidence based practice', 'female infertility', 'health care personnel', 'human', 'outcome assessment', 'outcomes research', 'peer review', 'practice guideline', 'premature labor', 'priority journal', 'publication', 'systematic review (topic)', "women's health", 'editorial', 'female', 'outcome assessment', 'personnel', 'publication', 'Female', 'Humans', 'Outcome Assessment (Health Care)', 'Periodicals as Topic', 'Research Personnel', "Women's Health"], 'publisher': 'Academic Press Inc.', 'notes': ['Export Date: 14 July 2020', 'CODEN: GYNOA', 'Correspondence Address: Karlan, B.Y.; Gynecologic Oncology and Gynecologic Oncology ReportsIndia'], 'type_of_work': 'Editorial', 'name_of_database': 'Scopus', 'custom2': '25199578', 'language': 'English', 'url': 'https://www.scopus.com/inward/record.uri?eid=2-s2.0-84908351159&doi=10.1016%2fj.ygyno.2014.05.005&partnerID=40&md5=ab5a4d26d52c12d081e38364b0c79678'}
I tried iterating over the generator and applying the changes. But the records that have matches are not being placed in the match list.
match = []
for entry in ris_records:
if entry['doi'] in doi_match:
match.append(entry)
else:
del entry
any advice on how to iterate over a generator correctly, thanks.
This questions has been asked many times - but only once with this special case and I could partially find an answer here but it flattens down to every object.
I have this dictionary:
{'address': {'address_line_1': 'Floor Dekk House',
'address_line_2': 'Zippora Street Providence Industrial Estate',
'country': 'Seychelles',
'locality': 'Mahe',
'premises': '1st'},
'address_snippet': '1st, Floor Dekk House, Zippora Street Providence Industrial Estate, Mahe, Seychelles',
'appointment_count': 1,
'description': 'Total number of appointments 1',
'description_identifiers': ['appointment-count'],
'kind': 'searchresults#officer',
'links': {'self': '/officers/z7s5QUnhlYpAT8GvqvJ5snKmtHE/appointments'},
'matches': {'snippet': [], 'title': [1, 8, 10, 11]},
'snippet': '',
'title': 'ASTROCOM AG '}
As you can see "description_identifiers" and "matches.snippet" and "matches.title" have a list as value. I'd like to edit my code below to flatten my dictionary so that the json is flattened in a{key:value, key:value, key:value}` pair - but if the value is a list of atomic objects (not a list of lists or a list of dictionaries), the value is maintained as a list.
The objective is so be able to upload then this json to postgresql.
Here's some code i found online:
def flatten_json(dictionary):
"""Flatten a nested json file"""
def unpack(parent_key, parent_value):
"""Unpack one level of nesting in json file"""
# Unpack one level only!!!
if isinstance(parent_value, dict):
for key, value in parent_value.items():
temp1 = parent_key + '_' + key
yield temp1, value
elif isinstance(parent_value, list):
i = 0
for value in parent_value:
temp2 = parent_key + '_' +str(i)
i += 1
yield temp2, value
else:
yield parent_key, parent_value
# Keep iterating until the termination condition is satisfied
while True:
# Keep unpacking the json file until all values are atomic elements (not dictionary or list)
dictionary = dict(chain.from_iterable(starmap(unpack, dictionary.items())))
# Terminate condition: not any value in the json file is dictionary or list
if not any(isinstance(value, dict) for value in dictionary.values()) and \
not any(isinstance(value, list) for value in dictionary.values()):
break
return dictionary
Desired output:
And to test, this dict:
Should not be (which is what I get now):
{'address_address_line_1': 'Floor Dekk House',
'address_address_line_2': 'Zippora Street Providence Industrial Estate',
'address_country': 'Seychelles',
'address_locality': 'Mahe',
'address_premises': '1st',
'address_snippet': '1st, Floor Dekk House, Zippora Street Providence Industrial Estate, Mahe, Seychelles',
'appointment_count': 1,
'description': 'Total number of appointments 1',
'description_identifiers_0': 'appointment-count',
'kind': 'searchresults#officer',
'links_self': '/officers/z7s5QUnhlYpAT8GvqvJ5snKmtHE/appointments',
'matches_title_0': 1,
'matches_title_1': 8,
'matches_title_2': 10,
'matches_title_3': 11,
'snippet': '',
'title': 'ASTROCOM AG '}
But rather
{'address_address_line_1': 'Floor Dekk House',
'address_address_line_2': 'Zippora Street Providence Industrial Estate',
'address_country': 'Seychelles',
'address_locality': 'Mahe',
'address_premises': '1st',
'address_snippet': '1st, Floor Dekk House, Zippora Street Providence Industrial Estate, Mahe, Seychelles',
'appointment_count': 1,
'description': 'Total number of appointments 1',
'description_identifiers_0': 'appointment-count',
'kind': 'searchresults#officer',
'links_self': '/officers/z7s5QUnhlYpAT8GvqvJ5snKmtHE/appointments',
'matches_title': [1, 8, 10, 11]
'snippet': '',
'title': 'ASTROCOM AG '}
You are almost done, except you need a little more check on the condition:
def flatten(dict_, prefix):
for k, v in dict_.items():
if isinstance(v, list) and len(v)==1:
if isinstance(v[0], dict):
for key, value in flatten(v[0], prefix+k+"_"):
yield key, value
else:
yield prefix+k+"_0", v[0]
elif isinstance(v, dict):
for key, value in flatten(v, prefix+k+"_"):
yield key, value
else:
yield prefix+k, v
Usage:
dict_ = {'address': {'address_line_1': 'Floor Dekk House',
'address_line_2': 'Zippora Street Providence Industrial Estate',
'country': 'Seychelles',
'locality': 'Mahe',
'premises': '1st'},
'address_snippet': '1st, Floor Dekk House, Zippora Street Providence Industrial Estate, Mahe, Seychelles',
'appointment_count': 1,
'description': 'Total number of appointments 1',
'description_identifiers': ['appointment-count'],
'kind': 'searchresults#officer',
'links': {'self': '/officers/z7s5QUnhlYpAT8GvqvJ5snKmtHE/appointments'},
'matches': {'snippet': [], 'title': [1, 8, 10, 11]},
'snippet': '',
'title': 'ASTROCOM AG '}
import json
print(json.dumps(dict(list(flatten(dict_, ""))), indent=4))
Output:
{
"address_address_line_1": "Floor Dekk House",
"address_address_line_2": "Zippora Street Providence Industrial Estate",
"address_country": "Seychelles",
"address_locality": "Mahe",
"address_premises": "1st",
"address_snippet": "1st, Floor Dekk House, Zippora Street Providence Industrial Estate, Mahe, Seychelles",
"appointment_count": 1,
"description": "Total number of appointments 1",
"description_identifiers_0": "appointment-count",
"kind": "searchresults#officer",
"links_self": "/officers/z7s5QUnhlYpAT8GvqvJ5snKmtHE/appointments",
"matches_snippet": [],
"matches_title": [
1,
8,
10,
11
],
"snippet": "",
"title": "ASTROCOM AG "
}