df.csv is not saving actual dataframe - python

The code works fine and prints all the desired values but when I try to save as df to csv all the rows are the same.
a.csv file content
Domains
A.com
AMD.com
AMD.com
AOL.com
AOL.com
AWS.com
AWS.com
3ammagazine.com
3dmail.com
3email.com
3xl.net
444.net
4email.com
4email.net
4mg.com
4newyork.com
whdskcisail.com
wrestlingpages.com
wrexham.net
import whois
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import socket
import os
import csv
import datetime
import time
import requests
from ipwhois import IPWhois
from urllib import request
from ipwhois.utils import get_countries
import tldextract
from ipwhois.utils import get_countries
countries = get_countries(is_legacy_xml=True)
from ipwhois.experimental import bulk_lookup_rdap
from ipwhois.hr import (HR_ASN, HR_ASN_ORIGIN, HR_RDAP_COMMON, HR_RDAP, HR_WHOIS, HR_WHOIS_NIR)
countries = get_countries(is_legacy_xml=True)
import ipaddress
df = pd.read_csv('a.csv', nrows=100)
#TimeOut Setting
s = socket.socket()
s.settimeout(10)
#Date Processing Function
def check_date_type(d):
if type(d) is datetime.datetime:
return d
if type(d) is list:
return d[0]
for index,row in df.iterrows():
try:
DN = df.iloc[index]['Domains']
ip = socket.gethostbyname(DN)
ipwhois = IPWhois(ip).lookup_whois()
print(DN)
print(ip)
print(ipwhois)
df['IPcity'][index] = ipwhois['nets'][0]['city']
df['ASNumber'][index] = ipwhois['asn']
df['NetAddr'][index] = ipwhois['nets'][0]['address']
df['NetCity'][index] = ipwhois['city']
df['NetPostCode'][index] = ipwhois['nets'][0]['postal_code']
df.to_csv('a1.csv', index=False)
except Exception as e:
print(e)
df['e'] = e # want to save the errors in a differet column
df.to_csv('a2.csv', index=False)

This is happening because you're overwriting the .csv file in every iteration of your loop. If you move the df.to_csv() outside of your loop, you will get your desired dataframe:
...
for index,row in df.iterrows():
try:
DN = df.iloc[index]['Domains']
ip = socket.gethostbyname(DN)
ipwhois = IPWhois(ip).lookup_whois()
print(DN)
print(ip)
print(ipwhois)
df['IPcity'][index] = ipwhois['nets'][0]['city']
df['ASNumber'][index] = ipwhois['asn']
df['NetAddr'][index] = ipwhois['nets'][0]['address']
df['NetCity'][index] = ipwhois['city']
df['NetPostCode'][index] = ipwhois['nets'][0]['postal_code']
except Exception as e:
print(e)
df['e'] = e # want to save the errors in a differet column
df.to_csv('a1.csv', index=False)

Related

'numpy.int64' object has no attribute 'to_pydatetime' in backtrader feed

I am unable to fix the error - 'numpy.int64' object has no attribute 'to_pydatetime', I will be really grateful, if anyone could please help me out in this? I have already tried uninstalling pyfolio and itstalling it from git. Please see the complete code below
import os
import glob
import requests
import pandas as pd
from nsepy import *
from datetime import datetime
import backtrader as bt
import backtrader.feeds as btfeeds
from __future__ import (absolute_import, division, print_function,
unicode_literals)
class TestStrategy(bt.Strategy):
def log(self, txt, dt=None):
''' Logging function for this strategy'''
dt = dt or self.datas[0].datetime.date(0)
print('%s, %s' % (dt.isoformat(), txt))
def __init__(self):
# Keep a reference to the "close" line in the data[0] dataseries
self.dataclose = self.datas[0].close
def next(self):
# Simply log the closing price of the series from the reference
self.log('Close, %.2f' % self.dataclose[0])
if __name__ == '__main__':
cerebro = bt.Cerebro()
cerebro.addstrategy(TestStrategy)
#Data feed block
data_path = "/Users/kumarun/Documents/data/files"
joined_files = os.path.join(data_path, "Oct-MONTHLY-Expirydata_2020.csv")
joined_list = glob.glob(joined_files)
df = pd.concat(map(pd.read_csv, joined_list), ignore_index=True)
df.columns=['Ticker','date', 'open', 'high', 'low', 'close', 'volume','Open Interest']
filtered = df[(df['Ticker'] == 'BANKNIFTY')]
#Cerebro block
filtered.date = pd.to_datetime(filtered.date, format='%d-%m-%Y %H:%M:%S')
feed = bt.feeds.PandasData(dataname=filtered)
cerebro.adddata(feed)
cerebro.broker.setcash(100000.0)
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
I feel like you might be running into issues with the column names. Try ascribing your column names using:
data = bt.feeds.PandasData(
dataname=filtered,
datetime="date",
open='open',
high='high',
low='low',
close='close',
volume='volume',
openinterest="Open Interest"
)
Also, could drop the Ticker name column, and add the name using:
ticker = 'BANKNIFTY'
cerebro.adddata(feed, name=ticker)

How to send form data to Flask App API Pandas Dataframe

I am learning API's but I have been using Pandas for data analysis for some time. Can I send data to an API from a Pandas dataframe?
For example, if I make up some time series data in a Pandas df and attempt to use df.to_json(). Ultimate goal is here to make a Flask API that returns the median value of Value in the Pandas df.
import requests
import pandas as pd
import numpy as np
from numpy.random import randint
np.random.seed(11)
rows,cols = 50000,1
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='T')
df = pd.DataFrame(data, columns=['Value'], index=tidx)
median_val = df.Value.median()
print('[INFO]')
print(median_val)
print('[INFO]')
print(df.head())
json_data = df.to_json()
print('[Sending to API!]')
url = "http://127.0.0.1:5000/api/v1.0/median_val"
print(requests.post(url, json_data).text)
Is it possible (or bad practice) to send a years worth of time series data to an API to get processed? Or how much data can be sent as FORM on an HTTP POST request?
Here is something simple in Flask on a local route shown below which errors out. This is just something I made up on the fly trying to figure it out.
import numpy as np
import pandas as pd
import time, datetime
from datetime import datetime
import json
from flask import Flask, request, jsonify
#start flask app
app = Flask(__name__)
#Simple flask route to return Value average
#app.route("/api/v1.0/median_val", methods=['POST'])
def med_val():
r = request.form.to_dict()
print(r.keys())
df = pd.json_normalize(r)
print(df)
if r.keys() == {'Date','Value'}:
try:
df = pd.json_normalize(r)
df['Date'] = datetime.fromtimestamp(df['Date'].astype(float))
df = pd.DataFrame(df,index=[0])
df = df.set_index('Date')
df['Value'] = df['Value'].astype(float)
median_val = df.Value.median()
except Exception as error:
print("Internal Sever Error {}".format(error))
error_str = str(error)
return error_str, 500
return json.dumps(median_val)
else:
print("Error on api route, rejected unable to process keys")
print("rejected unable to process keys")
return 'Bad Request', 400
if __name__ == '__main__':
print("Starting main loop")
app.run(debug=True,port=5000,host="127.0.0.1")
I dont get why the print on the flask side the prints are empty. Any tips greatly appreciated there isnt a lot of wisdom here to web server processes/design.
r = request.form.to_dict()
print(r.keys())
df = pd.json_normalize(r)
print(df)
Full trace back on the Flask side.
dict_keys([])
Empty DataFrame
Columns: []
Index: [0]
Error on api route, rejected unable to process keys
rejected unable to process keys
127.0.0.1 - - [10/Feb/2021 07:50:44] "←[31m←[1mPOST /api/v1.0/median_val HTTP/1.1←[0m" 400 -
I got the code to work :) not using df.to_json() but populating an empty Python dictionary baggage_handler = {} with the data to send to the Flask App Api route to process the data.
Also not super sure on best practices for how much data can be sent as an HTTP POST body but this appears to work on local host :)
Flask APP:
import numpy as np
import pandas as pd
import time, datetime
from datetime import datetime
import json
from flask import Flask, request, jsonify
#start flask app
app = Flask(__name__)
#Simple flask route to return Value average
#app.route("/api/v1.0/median_val", methods=['POST'])
def med_val():
r = request.form.to_dict()
df = pd.json_normalize(r)
print('incoming keys')
print(r.keys())
if r.keys() == {'Value'}:
print('keys are good')
try:
df = pd.json_normalize(r)
df['Value'] = df['Value'].astype(float)
median_val = df.Value.median()
print('median value == ',median_val)
except Exception as error:
print("Internal Sever Error {}".format(error))
error_str = str(error)
return error_str, 00
return json.dumps(median_val)
else:
print("Error on api route, rejected unable to process keys")
print("rejected unable to process keys")
return 'Bad Request', 400
if __name__ == '__main__':
print("Starting main loop")
app.run(debug=True,port=5000,host="127.0.0.1")
HTTP Request script:
import requests
import pandas as pd
import numpy as np
from numpy.random import randint
np.random.seed(11)
rows,cols = 50000,1
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='T')
df = pd.DataFrame(data, columns=['Value'], index=tidx)
median_val = df.Value.median()
print('[INFO]')
print(median_val)
print('[INFO]')
print(df.head())
#create an empty dictionary
baggage_handler = {}
print('[packaging some data!!]')
values_to_send = df.Value.tolist()
baggage_handler['Value'] = values_to_send
print('[Sending to API!]')
response = requests.post('http://127.0.0.1:5000/api/v1.0/median_val', data=baggage_handler)
print("RESPONCE TXT", response.json())
data = response.json()
print(data)

the DataFrameClinet class of python's package for influxdb uploads only the last line from the dataframe

i am trying to use python's package for influxdb to upload dataframe into the database
i am using the write_points class to write point into the database as given in the documentation(https://influxdb-python.readthedocs.io/en/latest/api-documentation.html)
every time i try to use the class it only updates the last line of the dataframe instead of the complete dataframe.
is this a usual behavior or there is some problem here?
given below is my script:
from influxdb import InfluxDBClient, DataFrameClient
import pathlib
import numpy as np
import pandas as pd
import datetime
db_client = DataFrameClient('dbserver', port, 'username', 'password', 'database',
ssl=True, verify_ssl=True)
today = datetime.datetime.now().strftime('%Y%m%d')
path = pathlib.Path('/dir1/dir/2').glob(f'pattern_to_match*/{today}.filename.csv')
for file in path:
order_start = pd.read_csv(f'{file}')
if not order_start.empty:
order_start['data_line1'] = (order_start['col1'] - \
order_start['col2'])*1000
order_start['data_line2'] = (order_start['col3'] - \
order_start['col4'])*1000
d1 = round(order_start['data_line1'].quantile(np.arange(0,1.1,0.1)), 3)
d2 = round(order_start['data_line2'].quantile(np.arange(0,1.1,0.1)), 3)
out_file = pd.DataFrame()
out_file = out_file.append(d1)
out_file = out_file.append(d2)
out_file = out_file.T
out_file.index = out_file.index.set_names(['percentile'])
out_file = out_file.reset_index()
out_file['percentile'] = out_file.percentile.apply(lambda x: f'{100*x:.0f}%')
out_file['tag_col'] = str(file).split('/')[2]
out_file['time'] = pd.to_datetime('today').strftime('%Y%m%d')
out_file = out_file.set_index('time')
out_file.index = pd.to_datetime(out_file.index)
db_client.write_points(out_file, 'measurement', database='database',
retention_policy='rp')
can anyone please help?

'NoneType' 'NoneType' object is not iterable

I am trying to loop through a list of symbols to get rates for various currencies via the mt5. I use the code below but i get TypeError
d[i] = [y.close for y in rates1]
TypeError: 'NoneType' object is not iterable
I can't see where im going wrong i would like to use this structure to loop through create multiple dataframe and then make a big multiindex of all pairs and time using same kind of loop. I've not been coding long.
sym = ['GBPUSD','USDJPY','USDCHF','AUDUSD','GBPJPY']
# Copying data to dataframe
d = pd.DataFrame()
for i in sym:
rates1 = mt5.copy_rates_from(i, mt5.TIMEFRAME_M1, 5)
d[i] = [y.close for y in rates1]
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 29 18:38:11 2020
#author: DanPc
"""
# -*- coding: utf-8 -*-
"""
"""
import pytz
import pandas as pd
import MetaTrader5 as mt5
import time
from datetime import datetime
from threading import Timer
import talib
import numpy as np
import matplotlib as plt
from multiprocessing import Process
import sys
server_name = "" ENTER DETAILS HERE
server_num =
password = ""
#------------------------------------------------------------------------------
def actualtime():
# datetime object containing current date and time
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
#print("date and time =", dt_string)
return str(dt_string)
#------------------------------------------------------------------------------
def sync_60sec(op):
info_time_new = datetime.strptime(str(actualtime()), '%d/%m/%Y %H:%M:%S')
waiting_time = 60 - info_time_new.second
t = Timer(waiting_time, op)
t.start()
print(actualtime)
#------------------------------------------------------------------------------
def program(symbol):
if not mt5.initialize(login=server_num, server=server_name, password=password):
print("initialize() failed, error code =",mt5.last_error())
quit()
timezone = pytz.timezone("Etc/UTC")
utc_from = datetime.now()
######### Change here the timeframe 525600
# Create currency watchlist for which correlation matrix is to be plotted
sym = ['GBPUSD','USDJPY','USDCHF','AUDUSD','GBPJPY']
# Copying data to dataframe
d = pd.DataFrame()
for i in sym:
rates1 = mt5.copy_rates_from(i, mt5.TIMEFRAME_M1, 5)
d[i] = [y.close for y in rates1]
print(rates1)
mt5.shutdown()
if not mt5.initialize():
print("initialize() failed, error code =",mt5.last_error())
quit()
# starting mt5
if not mt5.initialize(login=server_num, server=server_name, password=password):
print("initialize() failed, error code =",mt5.last_error())
quit()
#------------------------------------------------------------------------------
# S T A R T I N G M T 5
#------------------------------------------------------------------------------
authorized=mt5.login(server_num, password=password)
if authorized:
account_info=mt5.account_info()
if account_info!=None:
account_info_dict = mt5.account_info()._asdict()
df=pd.DataFrame(list(account_info_dict.items()),columns=['property','value'])
print("account_info() as dataframe:")
print(df)
else:
print(mt5.last_error)
mt5.shutdown()
#------------------------------------------------------------------------------
def trading_bot():
symbol_1 = 'EURUSD'
symbol_2 = 'EURCAD'
while True:
program(symbol_1)
program(symbol_2)
time.sleep(59.8) # it depends on your computer and ping
sync_60sec(trading_bot)
copy_rates_from returns None if there is an error. The documentation suggests calling last_error() to find out what that error is.
(And no, I don't know why copy_rates_from doesn't just raise an exception to indicate the error. Apparently, the module is a thin wrapper around a C library.)
I came to this solution that creates a dictionary of dataframes.
sym = ["GBPUSD","USDJPY","USDCHF","AUDUSD","GBPJPY"]
# Copying data to dataframe
utc_from = datetime.now()
for i in sym:
rates = {i:pd.DataFrame(mt5.copy_rates_from(i, mt5.TIMEFRAME_M1, utc_from , 60),
columns=['time', 'open', 'low', 'high', 'close', 'tick_volume', 'spread', 'real_volume']) for i in sym}

how to connect input and function module in main module and can we return list from a function?

I have three modules: GetInput, Main and Converter. In the GetInput file there are all the inputs values and excel data in the form of list. In the Converter file I am using those input values from Getinput file and in the main file I am connecting both these files here. I am doing this so that my code can look more organized.
GetInput.py:
import pandas as pd
import numpy as np
import time
def getInputs():
df = pd.read_excel('input.xlsx')
actual = df['actual'].values.tolist()
schedule = df['schedule'].values.tolist()
freq = df['frequency'].values.tolist()
ACP = df['acp'].values.tolist()
modelInput = {
'actual': actual, 'schedule': schedule, 'freq': freq, 'ACP': ACP,'df' : df
}
return modelInput
Converter.py
import pandas as pd
def fun(modelInput):
underdraw = []
overdraw = []
for i,j, in zip(schedule, actual):
dev = j - i
if dev < 0:
underdraw.append(dev)
else:
underdraw.append(0)
if dev > 0:
overdraw.append(dev)
else:
overdraw.append(0)
df['underdraw'] = pd.Series(underdraw)
df['overdraw'] = pd.Series(overdraw)
df.to_excel('mainfile.xlsx')
Main.py
import pandas as pd
import numpy as np
from convert import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
This whole program works when I run it in a single module but it throw errors when I try divide my code into separate modules. Basically it throw error in GetInput.py and in Converter.py (df is not defined) file. I know its a very basic thing but I don't know how to make it work. There is no desired output for this program, I am already getting an output when I run it in a single file. I just want to divide my code in this format as I mentioned above: GetIput File, Converter File and Main File.
Keep all the files in same directory or else mention the file paths at the top of main code using os module.
You have misspelled the following in the main code:
from convert import *
from GetInputs import *
It should be:
from Converter import *
from GetInput import *
I have tested this using the following:
MainModule.py
from Converter import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
Converter.py
import pandas as pd
def fun(modelInput):
print("HIE" + modelInput)
GetInputs.py
def getInputs():
return "modelInput"

Categories

Resources