print multiple output one by one from index - python

import re
import pandas as pd
import sqlite3
connection = sqlite3.connect('C:\Users\wh112\Desktop\BRAIN.sqlite')
df1 = pd.read_sql('select * from PRONOUN',connection)
df2 = pd.read_sql('select * from VERB',connection)
df3 = pd.read_sql('select * from QUESTIONS',connection)
df = pd.concat([df1,df2,df3])
def word_list(text):
return list(filter(None, re.split('\W+', text)))
session = raw_input("Test on me!")
feedback = session
print(word_list(feedback))
dff = pd.DataFrame({'Sentence':[feedback]})
dff['1'] = dff['Sentence'].astype(str).str.split().str[0]
dff['2'] = dff['Sentence'].astype(str).str.split().str[1]
dff['3'] = dff['Sentence'].astype(str).str.split().str[2]
for pts1 in dff['1']:
pts1 = df.columns[df.isin([pts1]).any()]
for pts2 in dff['2']:
pts2 = df.columns[df.isin([pts2]).any()]
for pts3 in dff['3']:
pts3 = df.columns[df.isin([pts3]).any()]
Now the Topic is on:
pts1 = df.columns[df.isin([pts1]).any()]
when I am working with this to find any match from database string; multiple match shows together on output. like this:
Index([u'auxiliary_verbfirst_person_singular_pronounnon_wh_type'])
But I want them to be one by one like:
Index([u'auxiliary_verb, first_person_singular_pronoun,non_wh_type'])
is there any way to do this, can you help me, please?

Related

Is there a way to store output dataframes and appending them to the last output in the same dataframe

I am trying to fetch data from API for 50 parcels. I want them to be in a single data frame. While running this loop the data frame is storing only the last parcel which is satisfying the loop condition. Is there any way to store all the previous outputs also in the same dataframe.
For e.g upon running this code it only returns the data frame for foreign id=50, I want the dataframe for all 1-50.
import requests
import pandas as pd
foreign=1
while (foreign <=50):
s1_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s1?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs1product_end_time%2Cs1product_ron%2Ccohvh_avg%2Ccohvv_avg%2Cvhvv_avg'
s2_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s2?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs2product_start_time%2Cs2product_ron%2Cndvi_avg'
position = 101
foreign_n=str(foreign)
s1_time_series_url_p6 = s1_time_series_url_p6[:position] + foreign_n + s1_time_series_url_p6[position+1:]
s2_time_series_url_p6 = s2_time_series_url_p6[:position] + foreign_n + s2_time_series_url_p6[position+1:]
r_s1_time_series_p6 = requests.get(s1_time_series_url_p6)
r_s2_time_series_p6 = requests.get(s2_time_series_url_p6)
json_s1_time_series_p6 = r_s1_time_series_p6.json()
json_s2_time_series_p6 = r_s2_time_series_p6.json()
df_s1_time_series_p6 = pd.DataFrame(json_s1_time_series_p6['s1_time_series'])
df_s2_time_series_p6 = pd.DataFrame(json_s2_time_series_p6['s2_time_series'])
df_s2_time_series_p6.s2product_start_time=df_s2_time_series_p6.s2product_start_time.str[0:11]
df_s1_time_series_p6.s1product_end_time=df_s1_time_series_p6.s1product_end_time.str[0:11]
dfinal_p6 = df_s1_time_series_p6.merge(df_s2_time_series_p6, how='inner', left_on='s1product_end_time', right_on='s2product_start_time')
cols_p6 = ['parcel_foreign_id_x', 's1product_ron','parcel_foreign_id_y','s2product_ron']
dfinal_p6[cols_p6] = dfinal_p6[cols_p6].apply(pd.to_numeric, errors='coerce', axis=1)
dfinal_p6
The issue is resolved by first creating an empty data frame and then appending the outputs in the dataframe within the loop.
The updated code is as follows:
column_names = ["parcel_foreign_id_x", "s1product_end_time", "s1product_ron","cohvh_avg", "cohvv_avg", "vhvv_avg","parcel_foreign_id_y", "s2product_start_time", "s2product_ron", "ndvi_avg" ]
df = pd.DataFrame(columns = column_names)
foreign=1
while (foreign <=50):
s1_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s1?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs1product_end_time%2Cs1product_ron%2Ccohvh_avg%2Ccohvv_avg%2Cvhvv_avg'
s2_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s2?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs2product_start_time%2Cs2product_ron%2Cndvi_avg'
position = 101
foreign_n=str(foreign)
s1_time_series_url_p6 = s1_time_series_url_p6[:position] + foreign_n + s1_time_series_url_p6[position+1:]
s2_time_series_url_p6 = s2_time_series_url_p6[:position] + foreign_n + s2_time_series_url_p6[position+1:]
r_s1_time_series_p6 = requests.get(s1_time_series_url_p6)
r_s2_time_series_p6 = requests.get(s2_time_series_url_p6)
json_s1_time_series_p6 = r_s1_time_series_p6.json()
json_s2_time_series_p6 = r_s2_time_series_p6.json()
df_s1_time_series_p6 = pd.DataFrame(json_s1_time_series_p6['s1_time_series'])
df_s2_time_series_p6 = pd.DataFrame(json_s2_time_series_p6['s2_time_series'])
df_s2_time_series_p6.s2product_start_time=df_s2_time_series_p6.s2product_start_time.str[0:11]
df_s1_time_series_p6.s1product_end_time=df_s1_time_series_p6.s1product_end_time.str[0:11]
dfinal_p6 = df_s1_time_series_p6.merge(df_s2_time_series_p6, how='inner', left_on='s1product_end_time', right_on='s2product_start_time')
cols_p6 = ['parcel_foreign_id_x', 's1product_ron','parcel_foreign_id_y','s2product_ron']
dfinal_p6[cols_p6] = dfinal_p6[cols_p6].apply(pd.to_numeric, errors='coerce', axis=1)
df = pd.concat([dfinal_p6,df],ignore_index = True)
foreign = foreign+1

Pandas Dataframe Only Returning first Row of JSON Data

I'm working on a web scraping project, and have all the right code that returns me the json data in the format that I want if I used the #print command below, but when I got to run the same code except through Pandas Dataframe it only returns the first row of Data that I'm looking for. Just running the print, it returns the expected 17 rows of data I'm looking for. Dataframe to CSV gives me the first row only. Totally stumped! So grateful for anyone's help!
for item in response['body']:
DepartureDate = item['legs'][0][0]['departDate']
ReturnDate = item['legs'][1][0]['departDate']
Airline = item['legs'][0][0]['airline']['code']
Origin = item['legs'][0][0]['depart']
Destination = item['legs'][0][0]['destination']
OD = (Origin + Destination)
TrueBaseFare = item['breakdown']['baseFareAmount']
YQYR = item['breakdown']['fuelSurcharge']
TAX = item['breakdown']['totalTax']
TTL = item['breakdown']['totalFareAmount']
MARKEDUPTTL = item['breakdown']['totalCalculatedFareAmount']
MARKUP = ((MARKEDUPTTL - TTL) / (TTL)*100)
FBC = item['fareBasisCode']
#print(DepartureDate,ReturnDate,Airline,OD,TrueBaseFare,YQYR,TAX,TTL,MARKEDUPTTL,MARKUP,FBC)
MI = pd.DataFrame(
{'Dept': [DepartureDate],
'Ret': [ReturnDate],
'AirlineCode': [Airline],
'Routing': [OD],
'RealFare': [TrueBaseFare],
'Fuel': [YQYR],
'Taxes': [TAX],
'RealTotal': [TTL],
'AgencyTotal': [MARKEDUPTTL],
'Margin': [MARKUP],
'FareBasis': [FBC],
})
df = pd.DataFrame(MI)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df.to_csv('MITest7.csv')
When you print all your values after the cycle, you will see that you get only the last values. To resolve this problem you need to create lists and put there your values.
Try this:
DepartureDate = []
ReturnDate = []
Airline = []
Origin = []
Destination = []
OD = []
TrueBaseFare = []
YQYR = []
TAX = []
TTL = []
MARKEDUPTTL = []
MARKUP = []
FBC = []
for item in response['body']:
DepartureDate.append(item['legs'][0][0]['departDate'])
ReturnDate.append(item['legs'][1][0]['departDate'])
Airline.append(item['legs'][0][0]['airline']['code'])
Origin.append(item['legs'][0][0]['depart'])
Destination.append(item['legs'][0][0]['destination'])
OD.append((Origin[-1] + Destination[-1]))
TrueBaseFare.append(item['breakdown']['baseFareAmount'])
YQYR.append(item['breakdown']['fuelSurcharge'])
TAX.append(item['breakdown']['totalTax'])
TTL.append(item['breakdown']['totalFareAmount'])
MARKEDUPTTL.append(item['breakdown']['totalCalculatedFareAmount'])
MARKUP.append(((MARKEDUPTTL[-1] - TTL[-1]) / (TTL[-1])*100))
FBC.append(item['fareBasisCode'])

For loop to add column in multiple pandas dataframes

Good evening,
I will start by saying I am very early in my coding journey. Currently using a number of excel sheets from government data for a pandas project. Each of these sheets represents a year. I am attempting to add a column to each dataframe before I concat the entire list so I know which year the data came from at each point. Currently, the code looks like this:
'''
df10 = pd.read_excel(r'C:\Market research\national_m2010_dl.xls')
df11 = pd.read_excel(r'C:\Market research\national_m2011_dl.xls')
df12 = pd.read_excel(r'C:\Market research\national_m2012_dl.xls')
df13 = pd.read_excel(r'C:\Market research\national_m2013_dl.xls')
df14 = pd.read_excel(r'C:\Market research\national_m2014_dl.xlsx')
df15 = pd.read_excel(r'C:\Market research\national_m2015_dl.xlsx')
df16 = pd.read_excel(r'C:\Market research\national_m2016_dl.xlsx')
df17 = pd.read_excel(r'C:\Market research\national_m2017_dl.xlsx')
df18 = pd.read_excel(r'C:\Market research\national_m2018_dl.xlsx')
df19 = pd.read_excel(r'C:\Market research\national_m2019_dl.xlsx')
df10['Year'] = '2010'
df11['Year'] = '2011'
df12['Year'] = '2012'
df13['Year'] = '2013'
df14['Year'] = '2014'
df15['Year'] = '2015'
df16['Year'] = '2016'
df17['Year'] = '2017'
df18['Year'] = '2018'
df19['Year'] = '2019'
'''
However, I am sure there is a cleaner way to do this and was wondering if there might be a better way. I originally attempted a For loop similar to this:
'''
for num in range(10,20):
df+str(num)['Year'] = '20'+str(num)
'''
but I had no luck. Thoughts?
Try this:
df_dic = dict()
for n in range(10,20): #remember, the second number in range is exclusive
year = f"20{n}"
df = pd.read_excel(f'C:\Market research\national_m{year}_dl.xls')
df["Year"] = year
df_dic[year] = df
instead of using df10, df11,... you can use df[10], df[11],... which is make the code very easy.
for num in range(10,20):
df[num] = pd.read_excel(r'C:\Market research\national_m20'+str(num)+'_dl.xlsx')
df[num]['Year'] = '20'+str(num)

how to append a data at a list and plot it with python plotly

hi there i m still trying to get trade bot and try to plot them with their time and low price data.
i wanna get buy signals that i ve specified at if condition (when macdh turns from negative to positive). then i want to plot them at a data. but can not add them at buy_signal=[] place.
my error is
self.plotData(buy_signals = buy_signals)
IndexError: list index out of range
import requests
import json
from stockstats import StockDataFrame as Sdf
import plotly.graph_objects as go
from plotly.offline import plot
class TradingModel:
def __init__(self, symbol):
self.symbol = symbol
self.df = self.getData
def getData(self):
# define URL
base = 'https://api.binance.com'
endpoint = '/api/v3/klines'
params = '?&symbol='+self.symbol+'&interval=4h'
url = base + endpoint + params
# download data
data = requests.get(url)
dictionary = data.json()
# put in dataframe and clean-up
df = pd.DataFrame.from_dict(dictionary)
df = df.drop(range(6, 12), axis=1)
# rename columns and stockstasts
col_names = ['time', 'open', 'high', 'low', 'close', 'volume']
df.columns = col_names
stock = Sdf.retype(df)
for col in col_names:
df[col]=df[col].astype(float)
#defined macdh
df['macdh']=stock['macdh']
return (df)
def strategy(self):
df = self.df
buy_signals=[]
for i in range(1, len(df['close'])):
if df['macdh'].iloc[-1]>0 and df['macdh'].iloc[-2]<0:
buy_signals.append([df['time'][i], df['low'][i]])
self.plotData(buy_signals = buy_signals)
def plotData(self,buy_signal=False):
df=self.df
candle=go.Candlestick(
x=df['time'],
open=df['open'],
close=df['close'],
high=df['high'],
low=df['low'],
name="Candlesticks"
)
macdh=go.Scatter(
x=df['time'],
y=df['macdh'],
name="Macdh",
line = dict(color=('rgba(102, 207, 255, 50)')))
Data=[candle,macdh]
if buy_signals:
buys = go.Scatter(
x = [item[0] for item in buy_signals],
y = [item[1] for item in buy_signals],
name = "Buy Signals",
mode = "markers",
)
sells = go.Scatter(
x = [item[0] for item in buy_signals],
y = [item[1]*1.04 for item in buy_signals],
name = "Sell Signals",
mode = "markers",
)
data = [candle, macdh, buys, sells]
# style and display
layout = go.Layout(title = self.symbol)
fig = go.Figure(data = data, layout = layout)
plot(fig, filename=self.symbol)
def Main():
symbol = "BTCUSDT"
model = TradingModel(symbol)
model.strategy()
if __name__ == '__main__':
Main() ```
You need to replace :
self.plotData(buy_signals[i]) by self.plotData(buy_signals)
def plotData(self,buy_signal=False): by def plotData(self,buy_signals=None):
And it should be good to go !

Saving data from Arduino using Python - loss of data

With the help of web, i have created a code that collects the data form Arduino uno, and saves it to csv file.
The data collected are raw values of MEMS accelerometers.
The problem in code is that very often i loose a lot of data, if not all, if i terminate the Python. I noticed that at a random time, the output csv file has zero bytes.
Temporary solution is to start Arduino's "Serial monitor". This way most of the measured data is saved.
import serial
import time
import csv
import numpy as np
import pandas as pd
timeHr = []
timeT = []
mem1xD = []
mem1yD = []
mem1zD = []
#
mem2xD = []
mem2yD = []
mem2zD = []
arduinoData = serial.Serial('COM4',9600)
df = pd.DataFrame({
'timeHr':0,
'timeT':0,
'mem1xD':0,
'mem1yD':0,
'mem1zD':0,
'mem2xD':0,
'mem2yD':0,
'mem2zD':0,
},
index=[0]
)
while True:
while (arduinoData.inWaiting()==0):
pass
arduinoString = arduinoData.readline().decode("utf-8")
dataArray = arduinoString.split(",")
timehr = dataArray[0]
time = float(dataArray[1])/1000
mem1x = float(dataArray[2])
mem1y = float(dataArray[3])
mem1z = float(dataArray[4])
#
mem2x = float(dataArray[5])
mem2y = float(dataArray[6])
mem2z = float(dataArray[7])
timeHr.append(timehr)
timeT.append(time)
mem1xD.append(mem1x)
mem1yD.append(mem1y)
mem1zD.append(mem1z)
#
mem2xD.append(mem2x)
mem2yD.append(mem2y)
mem2zD.append(mem2z)
df = pd.DataFrame({
'timeHr':timeHr,
'timeT':timeT,
'mem1xD':mem1xD,
'mem1yD':mem1yD,
'mem1zD':mem1zD,
'mem2xD':mem2xD,
'mem2yD':mem2yD,
'mem2zD':mem2zD,
}
)
df.to_csv(r'time4.csv')
You need to append new data to your dataframe. Passing mode='a' in pd.Dataframe.to_csv will allow you to do that.
import time
tStart = str(time.time()).split('.')[0]
fileOut = tStart+'.csv'
while True:
while (arduinoData.inWaiting()==0):
pass
arduinoString = arduinoData.readline().decode("utf-8")
dataArray = arduinoString.split(",")
timehr = dataArray[0]
time = float(dataArray[1])/1000
mem1x = float(dataArray[2])
mem1y = float(dataArray[3])
mem1z = float(dataArray[4])
#
mem2x = float(dataArray[5])
mem2y = float(dataArray[6])
mem2z = float(dataArray[7])
timeHr.append(timehr)
timeT.append(time)
mem1xD.append(mem1x)
mem1yD.append(mem1y)
mem1zD.append(mem1z)
#
mem2xD.append(mem2x)
mem2yD.append(mem2y)
mem2zD.append(mem2z)
df = pd.DataFrame({
'timeHr':timeHr,
'timeT':timeT,
'mem1xD':mem1xD,
'mem1yD':mem1yD,
'mem1zD':mem1zD,
'mem2xD':mem2xD,
'mem2yD':mem2yD,
'mem2zD':mem2zD,
}
)
df.to_csv(fileOut,mode='a', header=False)

Categories

Resources