iterating a stock tick data with append on python - python

I am trying to combine a series of stock tick data based on the dates.
But it wont work. Please help.
import pandas as pd
import tushare as ts
def get_all_tick(stockID):
dates=pd.date_range('2016-01-01',periods=5,freq='D')
append_data=[]
for i in dates:
stock_tick=pd.DataFrame(ts.get_tick_data(stockID,date=i))
stock_tick.sort('volume',inplace=True, ascending=False)
stock_tick=stock_tick[:10]
stock_tick.sort('time',inplace=True, ascending=False)
append_data.append(stock_tick.iterrows())
get_all_tick('300243')

I figure it out myself.
def get_all_tick(stockID):
.........
df = pd.DataFrame()
for i in get_date:
stock_tick = ts.get_tick_data(stockID, date=i)
stock_tick['Date']=i
stock_tick.sort('volume', inplace=True, ascending=False)
stock_tick = stock_tick[:10]
stock_tick.sort('time', inplace=True, ascending=False)
df = df.append(stock_tick)
df.to_excel('tick.xlsx',sheet_name='Sheet1')
get_all_tick('300243')

Related

Mismatch when filling yearly data into dataframe with daily data

I am trying to download data and add statistics and economic indicators, however my data is on a daily basis and the indicators are on a yearly basis.
I tried to store year/indicator pairs as a dictionary, go through each day in the dates column returned from yfinance, and populate a list with the GDP Deflator for each day using the dictionary. Then I convert that list to a Dataframe and add it as a row to the dataframe returned from yfinance and save it as a csv.
However, when I look at the csv file, the GDP deflator for 2004 shows up for the last day in 2003, and for the last two days in 2004 the GDP Deflator is that of 2005.
What am I doing wrong?
code below:
import pandas as pd
import yfinance as yf
import world_bank_data as wb
df = pd.DataFrame() # Empty DataFrame
GDPD = []
df = yf.download(tickers = 'USDSGD=X' , period='max', interval='1d')
df.reset_index(inplace=True)
date = df['Date']
SGD_def_dict = {"Year":[],"GDP_Deflator":[]}
for i in range(len(date)):
if date[i].year in SGD_def_dict['Year']:
GDPD.append(list(SGD_def_dict.values())[-1][-1])
else:
SGD_def_dict["Year"].append(date[i].year)
try:
SGD_def_dict["GDP_Deflator"].append(wb.get_series('NY.GDP.DEFL.ZS', country= 'SGP', date=date[i].year, id_or_value='id', simplify_index=True))
except:
SGD_def_dict["GDP_Deflator"].append(float("nan"))
#GDPD.append(list(SGD_def_dict.values())[-1][-1])
df2 = pd.DataFrame({"GDP_Deflator":GDPD})
df["GDP_Deflator"] = df2
df.to_csv(r'C:..WBTEST.csv')`
You need to match the year of each day to the corresponding GDP deflator in the dictionary, and then use the same value for all days in that year.
import pandas as pd
import yfinance as yf
import world_bank_data as wb
df = pd.DataFrame() # Empty DataFrame
df = yf.download(tickers = 'USDSGD=X' , period='max', interval='1d')
df.reset_index(inplace=True)
date = df['Date']
SGD_def_dict = {"Year":[],"GDP_Deflator":[]}
for i in range(len(date)):
year = date[i].year
if year not in SGD_def_dict['Year']:
SGD_def_dict["Year"].append(year)
try:
SGD_def_dict["GDP_Deflator"].append(wb.get_series('NY.GDP.DEFL.ZS', country= 'SGP', date=year, id_or_value='id', simplify_index=True))
except:
SGD_def_dict["GDP_Deflator"].append(float("nan"))
df['Year'] = df['Date'].dt.year
df = df.merge(pd.DataFrame(SGD_def_dict), on='Year')
df.drop(['Year'], axis=1, inplace=True)
df.to_csv(r'C:..WBTEST.csv')

Unable to create a Plot using the pivoted data set : key error

I want to create a plot chart with forecasted figures for next 2 months. The below is the code I wrote.
import pandas as pd
from datetime import datetime
df= pd.read_csv(r'C:\Users\Desktop\Customers.csv')
parsed = pd.to_datetime(df["Date"], errors="coerce").fillna(pd.to_datetime(df["Date"],format="%Y-%d-%m",errors="coerce"))
ordinal = pd.to_numeric(df["Date"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))
df["Date"] = parsed.fillna(ordinal)
df['Amount currency'] = df['Amount currency'].str.replace(r'[^0-9\.]', '', regex=True)
df['Amount'] = df['Amount'].str.replace(r'[^0-9\.]', '', regex=True)
df['Amount currency'] = pd.to_numeric(df['Amount currency'])
df['Amount'] = pd.to_numeric(df['Amount'])
#df.Date = pd.to_datetime(df.Date).dt.to_period('m')
df['Date'] = df['Date'].dt.to_period('M').dt.to_timestamp() + pd.offsets.MonthEnd()
columns = ['Date', 'Type', 'Amount']
df = df[columns]
and it is required to pivot the figures
df2=pd.pivot_table(df,index='Date',values = 'Amount', columns = 'Type',aggfunc='sum')
So the final output columns are,
Date
Customer Credit Note
Payment
Sales Invoice
Based on the above code, I wanted to create a plot with 2 months of forecast
import matplotlib.pyplot as plt
import seaborn as sns
sns.lineplot(x='Date',y='Payment',data=dataset)
plt.title("Monthly_cases")
plt.xlabel("Month end date")
plt.ylabel("Payment")
plt.show()
But the above code returns with an error named KeyError:Date
What would be the reason for this? Can anyone help me? Also can anyone help me to modify the above code to get next two months forecasted values?
Thanks

How fill new column with businessDuration result in Dataframe Python

please help me to solve this, How to make new column in df with duration result? also result for all row. Thanks.
import pandas as pd
from datetime import time,datetime
from itertools import repeat
df = pd.read_csv("data.csv")
df['startdate_column'] = pd.to_datetime(df['startdate_column'])
df['enddate_column'] = pd.to_datetime(df['enddate_column'])
start_time=time(8,0,0)
end_time=time(17,0,0)
unit='min'
df['Duration'] = list(map(businessDuration,startdate=df['startdate_column'],enddate=df['enddate_column'],repeat(start_time),repeat(end_time),repeat(weekendlist=[6]),repeat(unit)))```
Use:
f = lambda x: businessDuration(startdate=x['startdate_column'],
enddate=x['enddate_column'],
starttime=start_time,
endtime=end_time,
weekendlist=[6],
unit=unit)
df['Duration'] = df.apply(f, axis=1)

Filter particular date in a DF column

I want to filter particular date in a DF column.
My code:
df
df["Crawl Date"]=pd.to_datetime(df["Crawl Date"]).dt.date
date=pd.to_datetime("03-21-2020")
df=df[df["Crawl Date"]==date]
It is showing no match.
Note: df column is having time also with date which need to be trimmed.
Thanks in advance.
The following script assumes that the 'Crawl Dates' column contains strings:
import pandas as pd
import datetime
column_names = ["Crawl Date"]
df = pd.DataFrame(columns = column_names)
#Populate dataframe with dates
df.loc[0] = ['03-21-2020 23:45:57']
df.loc[1] = ['03-22-2020 23:12:33']
df["Crawl Date"]=pd.to_datetime(df["Crawl Date"]).dt.date
date=pd.to_datetime("03-21-2020")
df=df[df["Crawl Date"]==date]
Then df returns:
Crawl Date 0 2020-03-21

Why can't I search for a row in a pandas df using a date as part of a tuple index?

I am trying to search a pandas df I made which has a tuple as an index. The first part of the tuple is a date and the second part is a forex pair. I've tried a few things but I can't seem to search using a date-formatted string as part of a tuple with .loc or .ix
My df looks like this:
Open Close
(11-01-2018, AEDAUD) 0.3470 0.3448
(11-01-2018, AEDCAD) 0.3415 0.3408
(11-01-2018, AEDCHF) 0.2663 0.2656
(11-01-2018, AEDDKK) 1.6955 1.6838
(11-01-2018, AEDEUR) 0.2277 0.2261
Here is the complete code :
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
forex_11 = pd.read_csv('FOREX_20180111.csv', sep=',', parse_dates=['Date'])
forex_12 = pd.read_csv('FOREX_20180112.csv', sep=',', parse_dates=['Date'])
time_format = '%d-%m-%Y'
forex = forex_11.append(forex_12, ignore_index=False)
forex['Date'] = forex['Date'].dt.strftime(time_format)
GBP = forex[forex['Symbol'] == "GBPUSD"]
forex.index = list(forex[['Date', 'Symbol']].itertuples(index=False, name=None))
forex_open_close = pd.DataFrame(np.array(forex[['Open','Close']]), index=forex.index)
forex_open_close.columns = ['Open', 'Close']
print(forex_open_close.head())
print(forex_open_close.ix[('11-01-2018', 'GBPUSD')])
How do I get the row which has index ('11-01-2018', 'GBPUSD') ?
Can you try putting the tuple in a list using brackets?
Like this:
print(forex_open_close.ix[[('11-01-2018', 'GBPUSD')]])
I would recommend using the Pandas multiIndex. In your case you could do the following:
tuples = list(data[['Date', 'Symbol']].itertuples(index=False, name=None))
data.index = pd.MultiIndex.from_tuples(tuples, names=['Date', 'Symbol'])
# And then to index
data.loc['2018-01-11', 'AEDCAD']

Categories

Resources