I've found a code here pretty good to retrieve some data I need (Python yahoo finance error market_cap=int(data.get_quote_yahoo(str)['marketCap']) TypeError: 'int' object is not callable):
tickers=["AAPL","GOOG","RY","HPQ"]
# Get market cap (not really necessary for you)
market_cap_data = web.get_quote_yahoo(tickers)['marketCap']
# Get the P/E ratio directly
pe_data = web.get_quote_yahoo(tickers)['trailingPE']
# print stock and p/e ratio
for stock, pe in zip(tickers, pe_data):
print(stock, pe)
# More keys that can be used
['language', 'region', 'quoteType', 'triggerable', 'quoteSourceName',
'currency', 'preMarketChange', 'preMarketChangePercent',
'preMarketTime', 'preMarketPrice', 'regularMarketChange',
'regularMarketChangePercent', 'regularMarketTime', 'regularMarketPrice',
'regularMarketDayHigh', 'regularMarketDayRange', 'regularMarketDayLow',
'regularMarketVolume', 'regularMarketPreviousClose', 'bid', 'ask',
'bidSize', 'askSize', 'fullExchangeName', 'financialCurrency',
'regularMarketOpen', 'averageDailyVolume3Month',
'averageDailyVolume10Day', 'fiftyTwoWeekLowChange',
'fiftyTwoWeekLowChangePercent', 'fiftyTwoWeekRange',
'fiftyTwoWeekHighChange', 'fiftyTwoWeekHighChangePercent',
'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'dividendDate',
'earningsTimestamp', 'earningsTimestampStart', 'earningsTimestampEnd',
'trailingAnnualDividendRate', 'trailingPE',
'trailingAnnualDividendYield', 'marketState', 'epsTrailingTwelveMonths',
'epsForward', 'sharesOutstanding', 'bookValue', 'fiftyDayAverage',
'fiftyDayAverageChange', 'fiftyDayAverageChangePercent',
'twoHundredDayAverage', 'twoHundredDayAverageChange',
'twoHundredDayAverageChangePercent', 'marketCap', 'forwardPE',
'priceToBook', 'sourceInterval', 'exchangeDataDelayedBy', 'tradeable',
'firstTradeDateMilliseconds', 'priceHint', 'exchange', 'shortName',
'longName', 'messageBoardId', 'exchangeTimezoneName',
'exchangeTimezoneShortName', 'gmtOffSetMilliseconds', 'market',
'esgPopulated', 'price']
I would like to retrieve most of the commented fields at the end of the previous code, but I've done this so far:
import pandas_datareader as web
tickers = ["AAPL", "GOOG", "RY", "SAB.MC"]
market_cap_data = web.get_quote_yahoo(tickers)['marketCap']
pe_data = web.get_quote_yahoo(tickers)['trailingPE']
fiftytwo_low_data = web.get_quote_yahoo(tickers)['fiftyTwoWeekLowChangePercent']
for stock, mcap, pe, fiftytwo_low in zip(tickers, market_cap_data, pe_data, fiftytwo_low_data):
print(stock, mcap, pe, fiftytwo_low)
Obviously I could continue with my brute force, but do you know any way to make the code more elegant to retrieve the whole string of fields with column names?
['language', 'region', 'quoteType', 'triggerable', 'quoteSourceName',
'currency', 'preMarketChange', 'preMarketChangePercent',
'preMarketTime', 'preMarketPrice', 'regularMarketChange',
'regularMarketChangePercent', 'regularMarketTime', 'regularMarketPrice',
'regularMarketDayHigh', 'regularMarketDayRange', 'regularMarketDayLow',
'regularMarketVolume', 'regularMarketPreviousClose', 'bid', 'ask',
'bidSize', 'askSize', 'fullExchangeName', 'financialCurrency',
'regularMarketOpen', 'averageDailyVolume3Month',
'averageDailyVolume10Day', 'fiftyTwoWeekLowChange',
'fiftyTwoWeekLowChangePercent', 'fiftyTwoWeekRange',
'fiftyTwoWeekHighChange', 'fiftyTwoWeekHighChangePercent',
'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'dividendDate',
'earningsTimestamp', 'earningsTimestampStart', 'earningsTimestampEnd',
'trailingAnnualDividendRate', 'trailingPE',
'trailingAnnualDividendYield', 'marketState', 'epsTrailingTwelveMonths',
'epsForward', 'sharesOutstanding', 'bookValue', 'fiftyDayAverage',
'fiftyDayAverageChange', 'fiftyDayAverageChangePercent',
'twoHundredDayAverage', 'twoHundredDayAverageChange',
'twoHundredDayAverageChangePercent', 'marketCap', 'forwardPE',
'priceToBook', 'sourceInterval', 'exchangeDataDelayedBy', 'tradeable',
'firstTradeDateMilliseconds', 'priceHint', 'exchange', 'shortName',
'longName', 'messageBoardId', 'exchangeTimezoneName',
'exchangeTimezoneShortName', 'gmtOffSetMilliseconds', 'market',
'esgPopulated', 'price']
thanks
Using the set, you can get all the items that can be retrieved by the ticker for the initial set, and using the union set, you can also add in a list, so you can get all the item names that have a value in the issue you want to retrieve.
import pandas_datareader as web
import pandas as pd
tickers = ["AAPL", "GOOG", "RY", "SAB.MC"]
names = set()
for t in tickers:
market_cap_data = web.get_quote_yahoo(t)
names |= set(market_cap_data.columns.to_list())
names
{'ask',
'askSize',
'averageAnalystRating',
'averageDailyVolume10Day',
'averageDailyVolume3Month',
'bid',
'bidSize',
'bookValue',
'cryptoTradeable',
'currency',
'customPriceAlertConfidence',
'displayName',
...
'trailingAnnualDividendYield',
'trailingPE',
'triggerable',
'twoHundredDayAverage',
'twoHundredDayAverageChange',
'twoHundredDayAverageChangePercent',
'typeDisp'}
I know this post is pretty old, but I just came across it now. Check out the 'yfinance' library. There's all kinds of stuff available over there!!
import pandas_datareader as web
import pandas as pd
df = web.DataReader('AAPL', data_source='yahoo', start='2011-01-01', end='2021-01-12')
df.head()
import yfinance as yf
aapl = yf.Ticker("AAPL")
aapl
# get stock info
aapl.info
# get historical market data
hist = aapl.history(period="max")
# show actions (dividends, splits)
aapl.actions
# show dividends
aapl.dividends
# show splits
aapl.splits
# show financials
aapl.financials
aapl.quarterly_financials
# show major holders
aapl.major_holders
# show institutional holders
aapl.institutional_holders
# show balance sheet
aapl.balance_sheet
aapl.quarterly_balance_sheet
# show cashflow
aapl.cashflow
aapl.quarterly_cashflow
# show earnings
aapl.earnings
aapl.quarterly_earnings
# show sustainability
aapl.sustainability
# show analysts recommendations
aapl.recommendations
# show next event (earnings, etc)
aapl.calendar
# show ISIN code - *experimental*
# ISIN = International Securities Identification Number
aapl.isin
# show options expirations
aapl.options
# get option chain for specific expiration
opt = aapl.option_chain('YYYY-MM-DD')
Result:
{'zip': '95014',
'sector': 'Technology',
'fullTimeEmployees': 164000,
'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. It also sells various related services. In addition, the company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. Further, it provides AppleCare support and cloud services store services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts. Additionally, the company offers various services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized fitness service; Apple Music, which offers users a curated listening experience with on-demand radio stations; Apple News+, a subscription news and magazine service; Apple TV+, which offers exclusive original content; Apple Card, a co-branded credit card; and Apple Pay, a cashless payment service, as well as licenses its intellectual property. The company serves consumers, and small and mid-sized businesses; and the education, enterprise, and government markets. It distributes third-party applications for its products through the App Store. The company also sells its products through its retail and online stores, and direct sales force; and third-party cellular network carriers, wholesalers, retailers, and resellers. Apple Inc. was incorporated in 1977 and is headquartered in Cupertino, California.',
'city': 'Cupertino',
'phone': '408 996 1010',
'state': 'CA',
'country': 'United States',
'companyOfficers': [],
'website': 'https://www.apple.com',
'maxAge': 1,
'address1': 'One Apple Park Way',
'industry': 'Consumer Electronics',
'ebitdaMargins': 0.33105,
'profitMargins': 0.2531,
'grossMargins': 0.43310001,
'operatingCashflow': 122151002112,
'revenueGrowth': 0.081,
'operatingMargins': 0.30289,
'ebitda': 130541002752,
'targetLowPrice': 122,
'recommendationKey': 'buy',
'grossProfits': 170782000000,
'freeCashflow': 90215251968,
'targetMedianPrice': 180,
'currentPrice': 151.29,
'earningsGrowth': 0.048,
'currentRatio': 0.879,
'returnOnAssets': 0.21214001,
'numberOfAnalystOpinions': 41,
'targetMeanPrice': 178.15,
'debtToEquity': 261.446,
'returnOnEquity': 1.75459,
'targetHighPrice': 214,
'totalCash': 48304001024,
'totalDebt': 132480000000,
'totalRevenue': 394328014848,
'totalCashPerShare': 3.036,
'financialCurrency': 'USD',
'revenuePerShare': 24.317,
'quickRatio': 0.709,
'recommendationMean': 1.9,
'exchange': 'NMS',
'shortName': 'Apple Inc.',
'longName': 'Apple Inc.',
'exchangeTimezoneName': 'America/New_York',
'exchangeTimezoneShortName': 'EST',
'isEsgPopulated': False,
'gmtOffSetMilliseconds': '-18000000',
'quoteType': 'EQUITY',
'symbol': 'AAPL',
'messageBoardId': 'finmb_24937',
'market': 'us_market',
'annualHoldingsTurnover': None,
'enterpriseToRevenue': 6.317,
'beta3Year': None,
'enterpriseToEbitda': 19.081,
'52WeekChange': -0.06042725,
'morningStarRiskRating': None,
'forwardEps': 6.82,
'revenueQuarterlyGrowth': None,
'sharesOutstanding': 15908100096,
'fundInceptionDate': None,
'annualReportExpenseRatio': None,
'totalAssets': None,
'bookValue': 3.178,
'sharesShort': 103178670,
'sharesPercentSharesOut': 0.0064999997,
'fundFamily': None,
'lastFiscalYearEnd': 1663977600,
'heldPercentInstitutions': 0.60030997,
'netIncomeToCommon': 99802996736,
'trailingEps': 6.11,
'lastDividendValue': 0.23,
'SandP52WeekChange': -0.15323704,
'priceToBook': 47.60541,
'heldPercentInsiders': 0.00071999995,
'nextFiscalYearEnd': 1727136000,
'yield': None,
'mostRecentQuarter': 1663977600,
'shortRatio': 1.14,
'sharesShortPreviousMonthDate': 1664496000,
'floatShares': 15891414476,
'beta': 1.246644,
'enterpriseValue': 2490915094528,
'priceHint': 2,
'threeYearAverageReturn': None,
'lastSplitDate': 1598832000,
'lastSplitFactor': '4:1',
'legalType': None,
'lastDividendDate': 1667520000,
'morningStarOverallRating': None,
'earningsQuarterlyGrowth': 0.008,
'priceToSalesTrailing12Months': 6.103387,
'dateShortInterest': 1667174400,
'pegRatio': 2.71,
'ytdReturn': None,
'forwardPE': 22.183283,
'lastCapGain': None,
'shortPercentOfFloat': 0.0064999997,
'sharesShortPriorMonth': 103251184,
'impliedSharesOutstanding': 0,
'category': None,
'fiveYearAverageReturn': None,
'previousClose': 150.72,
'regularMarketOpen': 152.305,
'twoHundredDayAverage': 155.0841,
'trailingAnnualDividendYield': 0.005971337,
'payoutRatio': 0.14729999,
'volume24Hr': None,
'regularMarketDayHigh': 152.57,
'navPrice': None,
'averageDailyVolume10Day': 84360340,
'regularMarketPreviousClose': 150.72,
'fiftyDayAverage': 147.0834,
'trailingAnnualDividendRate': 0.9,
'open': 152.305,
'toCurrency': None,
'averageVolume10days': 84360340,
'expireDate': None,
'algorithm': None,
'dividendRate': 0.92,
'exDividendDate': 1667520000,
'circulatingSupply': None,
'startDate': None,
'regularMarketDayLow': 149.97,
'currency': 'USD',
'trailingPE': 24.761045,
'regularMarketVolume': 74496725,
'lastMarket': None,
'maxSupply': None,
'openInterest': None,
'marketCap': 2406736461824,
'volumeAllCurrencies': None,
'strikePrice': None,
'averageVolume': 89929545,
'dayLow': 149.97,
'ask': 150.95,
'askSize': 1000,
'volume': 74496725,
'fiftyTwoWeekHigh': 182.94,
'fromCurrency': None,
'fiveYearAvgDividendYield': 1,
'fiftyTwoWeekLow': 129.04,
'bid': 150.82,
'tradeable': False,
'dividendYield': 0.0061000003,
'bidSize': 1100,
'dayHigh': 152.57,
'coinMarketCapLink': None,
'regularMarketPrice': 151.29,
'preMarketPrice': None,
'logo_url': 'https://logo.clearb
Just pick/choose what you want.
When I run a for loop to collect elements within a <div> tag it only returns the first from a list of all with the same class.
For example:
r = requests.get("https://one-versus-one.com/en/rankings/all/statistics")
soup = BeautifulSoup(r.content, 'lxml')
data = {
'players': [],
'club': [],
'rank': []
}
def getstuff(soup):
products = soup.find_all('div', {'class':'rankings-table'})
for name in products:
players = name.find('div', {'class':'player-name rankings-table__player-name'}).text
club = name.find('span', {'class':'rankings-table__club-name'}).text
rank = name.find('div', {'class':'rankings-table-cell value rankings-table__value'}).text.strip()
data['players'] = players
data['club'] = club
data['rank'] = rank
print(data)
getstuff(soup)
This returns:
{'players': 'Lionel Messi', 'club': 'Barcelona', 'rank': '100'}
Where I expected all players, clubs and ranks to be printed within the page.
You can try this:
import requests
from bs4 import BeautifulSoup
r = requests.get("https://one-versus-one.com/en/rankings/all/statistics")
soup = BeautifulSoup(r.content, 'lxml')
data = {'players': [],'club': [],'rank': []}
def getstuff(soup):
products = soup.find('div', {'class':'rankings-table'}).find_all("a")
for name in products:
players = name.find('div', {'class':'player-name rankings-table__player-name'}).text
club = name.find('span', {'class':'rankings-table__club-name'}).text
rank = name.find('div', {'class':'rankings-table-cell value rankings-table__value'}).text.strip()
data['players'].append(players)
data['club'].append(club)
data['rank'].append(rank)
print(data)
getstuff(soup)
"""
{'players': ['Lionel Messi', 'Junior Neymar', 'Robert Lewandowski', 'Joao Cancelo', 'Kevin de Bruyne', 'Rodri', 'Jesse Lingard', 'Riyad Mahrez', 'Ilkay Gundogan', 'John Stones'], 'club': ['Barcelona', 'Paris Saint-Germain', 'Bayern Munich', 'Manchester City', 'Manchester City', 'Manchester City', 'West Ham United', 'Manchester City', 'Manchester City', 'Manchester City'], 'rank': ['100', '95', '93', '92', '91', '90', '90', '89', '88', '88']}
"""
You have to use .find_all("a") to get info about all players. And additional you're just making adding new player in data['players'] insted of adding new player and for club, rank same.
You are overwriting the variable within each loop rather than appending to a data set. Also, your products search only had one player within it.
Try
data = []
products = soup.select('a .rankings-table-row')
for name in products:
players = name.find('div', {'class':'player-name rankings-table__player-name'}).text
club = name.find('span', {'class':'rankings-table__club-name'}).text
rank = name.find('div', {'class':'rankings-table-cell value rankings-table__value'}).text.strip()
data.append(
{
'Players': players,
'Club': club,
'Rank': rank
}
)
data = pd.DataFrame(data)
You should try
data['players'].append(players)
It's a list so appending should work. A list can be added to only by appending so if you do
data['players'] = players
it would assign the 'players' key to only a single value. Likewise for the other keys
The answer below me also mentions that you should use 'find_all'.
I tried solving this too but with selenium. I even used an explicit wait : WebDriverWait, to make sure the element loads.
and still only Messi returns, none of the other players. The elements exist as entries but when trying to access their ".text" they return blank. Have people above tried their suggested solutions ?
I am trying to use the scrape_linkedin package. I follow the section on the github page on how to set up the package/LinkedIn li_at key (which I paste here for clarity).
Getting LI_AT
Navigate to www.linkedin.com and log in
Open browser developer tools (Ctrl-Shift-I or right click -> inspect element)
Select the appropriate tab for your browser (Application on Chrome, Storage on Firefox)
Click the Cookies dropdown on the left-hand menu, and select the www.linkedin.com option
Find and copy the li_at value
Once I collect the li_at value from my LinkedIn, I run the following code:
from scrape_linkedin import ProfileScraper
with ProfileScraper(cookie='myVeryLong_li_at_Code_which_has_characters_like_AQEDAQNZwYQAC5_etc') as scraper:
profile = scraper.scrape(url='https://www.linkedin.com/in/justintrudeau/')
print(profile.to_dict())
I have two questions (I am originally an R user).
How can I input a list of profiles:
https://www.linkedin.com/in/justintrudeau/
https://www.linkedin.com/in/barackobama/
https://www.linkedin.com/in/williamhgates/
https://www.linkedin.com/in/wozniaksteve/
and scrape the profiles? (In R I would use the map function from the purrr package to apply the function to each of the LinkedIn profiles).
The output (from the original github page) is returned in a JSON style format. My second question is how I can convert this into a pandas data frame (i.e. it is returned similar to the following).
{'personal_info': {'name': 'Steve Wozniak', 'headline': 'Fellow at
Apple', 'company': None, 'school': None, 'location': 'San Francisco
Bay Area', 'summary': '', 'image': '', 'followers': '', 'email': None,
'phone': None, 'connected': None, 'websites': [],
'current_company_link': 'https://www.linkedin.com/company/sandisk/'},
'experiences': {'jobs': [{'title': 'Chief Scientist', 'company':
'Fusion-io', 'date_range': 'Jul 2014 – Present', 'location': 'Primary
Data', 'description': "I'm looking into future technologies applicable
to servers and storage, and helping this company, which I love, get
noticed and get a lead so that the world can discover the new amazing
technology they have developed. My role is principally a marketing one
at present but that will change over time.", 'li_company_url':
'https://www.linkedin.com/company/sandisk/'}, {'title': 'Fellow',
'company': 'Apple', 'date_range': 'Mar 1976 – Present', 'location': '1
Infinite Loop, Cupertino, CA 94015', 'description': 'Digital Design
engineer.', 'li_company_url': ''}, {'title': 'President & CTO',
'company': 'Wheels of Zeus', 'date_range': '2002 – 2005', 'location':
None, 'description': None, 'li_company_url':
'https://www.linkedin.com/company/wheels-of-zeus/'}, {'title':
'diagnostic programmer', 'company': 'TENET Inc.', 'date_range': '1970
– 1971', 'location': None, 'description': None, 'li_company_url':
''}], 'education': [{'name': 'University of California, Berkeley',
'degree': 'BS', 'grades': None, 'field_of_study': 'EE & CS',
'date_range': '1971 – 1986', 'activities': None}, {'name': 'University
of Colorado Boulder', 'degree': 'Honorary PhD.', 'grades': None,
'field_of_study': 'Electrical and Electronics Engineering',
'date_range': '1968 – 1969', 'activities': None}], 'volunteering':
[]}, 'skills': [], 'accomplishments': {'publications': [],
'certifications': [], 'patents': [], 'courses': [], 'projects': [],
'honors': [], 'test_scores': [], 'languages': [], 'organizations':
[]}, 'interests': ['Western Digital', 'University of Colorado
Boulder', 'Western Digital Data Center Solutions', 'NEW Homebrew
Computer Club', 'Wheels of Zeus', 'SanDisk®']}
Firstly, You can create a custom function to scrape data and use map function in Python to apply it over each profile link.
Secondly, to create a pandas dataframe using a dictionary, you can simply pass the dictionary to pd.DataFrame.
Thus to create a dataframe df, with dictionary dict, you can do like this:
df = pd.DataFrame(dict)
I'm new to the concept of generators and I'm struggling with how to apply my changes to the records within the generator object returned from the RISparser module.
I understand that a generator only reads a record at a time and doesn't actually store the data in memory but I'm having a tough time iterating over it effectively and applying my changes.
My changes will involve dropping records that have not got ['doi'] values that are contained within a list of DOIs [doi_match].
doi_match = ['10.1002/14651858.CD008259.pub2','10.1002/14651858.CD011552','10.1002/14651858.CD011990']
Generator object returned form RISparser contains the following information, this is just the first 2 records returned of a few 100. I want to iterate over it and compare the 'doi': key from the generator with the list of DOIs.
{'type_of_reference': 'JOUR', 'title': "The CoRe Outcomes in WomeN's health (CROWN) initiative: Journal editors invite researchers to develop core outcomes in women's health", 'secondary_title': 'Neurourology and Urodynamics', 'alternate_title1': 'Neurourol. Urodyn.', 'volume': '33', 'number': '8', 'start_page': '1176', 'end_page': '1177', 'year': '2014', 'doi': '10.1002/nau.22674', 'issn': '07332467 (ISSN)', 'authors': ['Khan, K.'], 'keywords': ['Bias (epidemiology)', 'Clinical trials', 'Consensus', 'Endpoint determination/standards', 'Evidence-based medicine', 'Guidelines', 'Research design/standards', 'Systematic reviews', 'Treatment outcome', 'consensus', 'editor', 'female', 'human', 'medical literature', 'Note', 'outcomes research', 'peer review', 'randomized controlled trial (topic)', 'systematic review (topic)', "women's health", 'outcome assessment', 'personnel', 'publication', 'Female', 'Humans', 'Outcome Assessment (Health Care)', 'Periodicals as Topic', 'Research Personnel', "Women's Health"], 'publisher': 'John Wiley and Sons Inc.', 'notes': ['Export Date: 14 July 2020', 'CODEN: NEURE'], 'type_of_work': 'Note', 'name_of_database': 'Scopus', 'custom2': '25270392', 'language': 'English', 'url': 'https://www.scopus.com/inward/record.uri?eid=2-s2.0-84908368202&doi=10.1002%2fnau.22674&partnerID=40&md5=b220702e005430b637ef9d80a94dadc4'}
{'type_of_reference': 'JOUR', 'title': "The CROWN initiative: Journal editors invite researchers to develop core outcomes in women's health", 'secondary_title': 'Gynecologic Oncology', 'alternate_title1': 'Gynecol. Oncol.', 'volume': '134', 'number': '3', 'start_page': '443', 'end_page': '444', 'year': '2014', 'doi': '10.1016/j.ygyno.2014.05.005', 'issn': '00908258 (ISSN)', 'authors': ['Karlan, B.Y.'], 'author_address': 'Gynecologic Oncology and Gynecologic Oncology Reports, India', 'keywords': ['clinical trial (topic)', 'decision making', 'Editorial', 'evidence based practice', 'female infertility', 'health care personnel', 'human', 'outcome assessment', 'outcomes research', 'peer review', 'practice guideline', 'premature labor', 'priority journal', 'publication', 'systematic review (topic)', "women's health", 'editorial', 'female', 'outcome assessment', 'personnel', 'publication', 'Female', 'Humans', 'Outcome Assessment (Health Care)', 'Periodicals as Topic', 'Research Personnel', "Women's Health"], 'publisher': 'Academic Press Inc.', 'notes': ['Export Date: 14 July 2020', 'CODEN: GYNOA', 'Correspondence Address: Karlan, B.Y.; Gynecologic Oncology and Gynecologic Oncology ReportsIndia'], 'type_of_work': 'Editorial', 'name_of_database': 'Scopus', 'custom2': '25199578', 'language': 'English', 'url': 'https://www.scopus.com/inward/record.uri?eid=2-s2.0-84908351159&doi=10.1016%2fj.ygyno.2014.05.005&partnerID=40&md5=ab5a4d26d52c12d081e38364b0c79678'}
I tried iterating over the generator and applying the changes. But the records that have matches are not being placed in the match list.
match = []
for entry in ris_records:
if entry['doi'] in doi_match:
match.append(entry)
else:
del entry
any advice on how to iterate over a generator correctly, thanks.