How to save AWS API Python code to a CSV with Nonetype - python

I have a code that successfully pulls out data between 2 specific times but it gives me an error when I try to save it as a csv.
Here is the code:
import argparse
import boto3
import datetime
import pandas as pd
import csv
import json
parser = argparse.ArgumentParser()
parser.add_argument('--days', type=int, default=30)
args = parser.parse_args()
session = boto3.Session(profile_name='UMW')
cd = session.client('ce', 'us-west-1')
results = []
token = None
while True:
if token:
kwargs = {'NextPageToken': token}
else:
kwargs = {}
data = cd.get_cost_and_usage(TimePeriod={'Start': '2020-03-11', 'End':
'2020-06-10'}, Granularity='DAILY', Metrics=['AmortizedCost'],
GroupBy=[{'Type': 'DIMENSION', 'Key':
'LINKED_ACCOUNT'}]} ,{'Dimensions': {'Key': 'LINKED_ACCOUNT','Values':
['12394850028']}}]}, **kwargs)
for info in data['ResultsByTime']:
for group in info['Groups']:
print(group['Keys'][0], info['TimePeriod']['Start'],
group['Metrics']['AmortizedCost']['Amount'])#, group['Keys'][1])
token = data.get('NextPageToken')
if not token:
break
with open('test.csv', 'w',) as csvfile:
writer = csv.writer(csvfile)
writer.writerow([results])
I'm trying to save the results to a CSV but this gives me a blank CSV. it runs but comes out with no results and prints the results on the command line.

You can use something like this:
def ask_date():
year = int(input('Input year'))
month = int(input('Input month'))
day = int(input('Input day'))
date = datetime.date(year, month, day)
return date
If you use a specific date format (dd-mm-yyyy) that you know your users will aways use correctly, you can ask the date in this format, then parse it
def ask_date():
res = input()
day, month, year = res.split('-')
date = datetime.date(int(year), int(month), int(day))
return date

Related

How to get only tweets within an hour from Snscrape?

After trying to scrape data from twitter using Snscrape, I am unable to get the data of tweets posted within the past hour only.
import pandas as pd
import snscrape.modules.twitter as sntwitter
from datetime import datetime, time
from datetime import timedelta
now = datetime.utcnow()
since = now - timedelta(hours=1)
since_str = since.strftime('%Y-%m-%d %H:%M:%S.%f%z')
until_str = now.strftime('%Y-%m-%d %H:%M:%S.%f%z')
# Query tweets with hashtag #SOSREX in the last one hour
query = '#SOSREX Since:' + since_str + ' until:' + until_str
SOSREX_data = []
for tweet in sntwitter.TwitterSearchScraper(query).get_items():
if len(SOSREX_data)>100:
break
else:
SOSREX_data.append([tweet.date,tweet.user.username,tweet.user.displayname,
tweet.content,tweet.likeCount,tweet.retweetCount,
tweet.sourceLabel,tweet.user.followersCount,tweet.user.location
])
Tweets_data = pd.DataFrame(SOSREX_data,
columns=["Date_tweeted","username","display_name",
"Tweets","Number_of_Likes","Number_retweets",
"Source_of_Tweet",
"number_of_followers","location"
])

ID = None script doesn't return ID from List

I am new to Python. A script was made for parsing Flashscore via request. It takes data from the first page for all games for tomorrow.
`
import json
import requests
from datetime import datetime
headers = {"x-fsign": "SW9D1eZo"}
def main():
feed = 'f_1_1_1_en_1'
url = f'https://d.flashscore.com/x/feed/{feed}'
response = requests.get(url=url, headers=headers)
data = response.text.split('¬')
data_list = [{}]
for item in data:
key = item.split('÷')[0]
value = item.split('÷')[-1]
if '~' in key:
data_list.append({key: value})
else:
data_list[-1].update({key: value})
for game in data_list:
if 'AA' in list(game.keys())[0]:
id = game.get("AA")
date = datetime.fromtimestamp(int(game.get("AD")))
team_1 = game.get("AE")
team_2 = game.get("AF")
score = f'{game.get("AG")} : {game.get("AH")}'
print(id, date, team_1, team_2, score, sep=' ')
#print(json.dumps(data_list, ensure_ascii=False, indent=2))
`
But then, I need to go into each game to get the H2H stats, get the average number of goals scored in the last 5 Home and Away games. And here I just can’t understand and do how I can pull out the ID of each game so that the page with H2H opens. On Selenium, I was able to do this, but nothing happens with request.
Please can you advise me and help me to do.
Thank you all in advance for your help and participation.

How to get data of a python code in pandas dataframe

The following is a python code which prints live data from an API of a data feed vendor. I want the data in the panda's data frame but it prints only the following result
"Empty DataFrame
Columns: []
Index: []"
from truedata_ws.websocket.TD import TD
import time
import pandas as pd
username = ''
password = ''
realtime_port = 8084
url = 'push.truedata.in'
symbols = []
td_obj = TD(username, password, live_port=realtime_port, url=url, log_level=logging.DEBUG, log_format="%(message)s")
print('\nStarting Real Time Feed.... ')
req_ids = td_obj.start_live_data(symbols)
live_data_objs = {}
time.sleep(1)
for req_id in req_ids:
print(f'touchlinedata -> {td_obj.touchline_data[req_id]}')
df=pd.DataFrame(live_data_objs)
print(df)
#td_obj.trade_callback
def strategy_callback(symbol_id, tick_data):
print(f'Trade update > {tick_data}')
while True:
time.sleep(120)
In your code, you pass an empty dictionary as an argument for creating a Data-frame, the Data-Frame you will get back for passing an empty dictionary will be Empty

NameError: name 'all_df' is not defined

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 14 2017
Modified on Wed Aug 16 2017
Author: Yanfei Wu
Get the past 500 S&P 500 stocks data
"""
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import pandas as pd
import pandas_datareader.data as web
def get_ticker_and_sector(url='https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'):
"""
get the s&p 500 stocks from Wikipedia:
https://en.wikipedia.org/wiki/List_of_S%26P_500_companies
---
return: a dictionary with ticker names as keys and sectors as values
"""
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, 'lxml')
# we only want to parse the first table of this wikipedia page
table = soup.find('table')
sp500 = {}
# loop over the rows and get ticker symbol and sector name
for tr in table.find_all('tr')[1:]:
tds = tr.find_all('td')
ticker = tds[0].text
sector = tds[3].text
sp500[ticker] = sector
return sp500
def get_stock_data(ticker, start_date, end_date):
""" get stock data from google with stock ticker, start and end dates """
data = web.DataReader(ticker, 'google', start_date, end_date)
return data
if __name__ == '__main__':
""" get the stock data from the past 5 years """
# end_date = datetime.now()
end_date = datetime(2017, 8, 14)
start_date = datetime(end_date.year - 5, end_date.month , end_date.day)
sp500 = get_ticker_and_sector()
sp500['SPY'] = 'SPY' # also include SPY as reference
print('Total number of tickers (including SPY): {}'.format(len(sp500)))
bad_tickers =[]
for i, (ticker, sector) in enumerate(sp500.items()):
try:
stock_df = get_stock_data(ticker, start_date, end_date)
stock_df['Name'] = ticker
stock_df['Sector'] = sector
if stock_df.shape[0] == 0:
bad_tickers.append(ticker)
#output_name = ticker + '_data.csv'
#stock_df.to_csv(output_name)
if i == 0:
all_df = stock_df
else:
all_df = all_df.append(stock_df)
except:
bad_tickers.append(ticker)
print(bad_tickers)
all_df.to_csv('./data/all_sp500_data_2.csv')
""" Write failed queries to a text file """
if len(bad_tickers) > 0:
with open('./data/failed_queries_2.txt','w') as outfile:
for ticker in bad_tickers:
outfile.write(ticker+'\n')
Your problem is in your try/except block. It is good style to always catch a specific exception, not just blindly throw except statements after a long block of code. The problem with this approach, as demonstrated in your problem, is that if you have an unrelated or unexpected error, you won't know about it. In this case, this is the exception I get from running your code:
NotImplementedError: data_source='google' is not implemented
I'm not sure what that means, but it looks like the pandas_datareader.data.DataReader docs have good information about how to use that DataReader correctly.

Unable to store pandas data frame as a csv

I am following this tutorial to retrieve data from news sites.
The main function is getDailyNews. It will loop on each news source, request the api, extract the data and dump it to a pandas DataFrame and then export the result into csv file.
But when I ran the code, I am getting an error.
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
from tqdm import tqdm, tqdm_notebook
from functools import reduce
def getSources():
source_url = 'https://newsapi.org/v1/sources?language=en'
response = requests.get(source_url).json()
sources = []
for source in response['sources']:
sources.append(source['id'])
return sources
def mapping():
d = {}
response = requests.get('https://newsapi.org/v1/sources?language=en')
response = response.json()
for s in response['sources']:
d[s['id']] = s['category']
return d
def category(source, m):
try:
return m[source]
except:
return 'NC'
def getDailyNews():
sources = getSources()
key = '96f279e1b7f845669089abc016e915cc'
url = 'https://newsapi.org/v1/articles?source={0}&sortBy={1}&apiKey={2}'
responses = []
for i, source in tqdm_notebook(enumerate(sources), total=len(sources)):
try:
u = url.format(source, 'top', key)
except:
u = url.format(source, 'latest', key)
response = requests.get(u)
r = response.json()
try:
for article in r['articles']:
article['source'] = source
responses.append(r)
except:
print('Rate limit exceeded ... please wait and retry in 6 hours')
return None
articles = list(map(lambda r: r['articles'], responses))
articles = list(reduce(lambda x,y: x+y, articles))
news = pd.DataFrame(articles)
news = news.dropna()
news = news.drop_duplicates()
news.reset_index(inplace=True, drop=True)
d = mapping()
news['category'] = news['source'].map(lambda s: category(s, d))
news['scraping_date'] = datetime.now()
try:
aux = pd.read_csv('./data/news.csv')
aux = aux.append(news)
aux = aux.drop_duplicates('url')
aux.reset_index(inplace=True, drop=True)
aux.to_csv('./data/news.csv', encoding='utf-8', index=False)
except:
news.to_csv('./data/news.csv', index=False, encoding='utf-8')
print('Done')
if __name__=='__main__':
getDailyNews()
Error:
FileNotFoundError: [Errno 2] No such file or directory: './data/news.csv'
I know that I have to give the path name in pd.read_csv but I don't know which path I have to give here.
This error would make sense if there wasn't already a data folder in the directory you are executing this program from. There is a similar problem in the post here.

Categories

Resources