I am running a pandas.io.ga query and absolutely no object is being returned? I have copied the secret_clients.json file to two folders just to be safe because I don't know which pandas/io folder to do it to, i suspect the problem is that it can't find the client_secrets.json file - but with NO ERROR at all I have no clue?!
sudo cp ~/Desktop/client_secrets.json /Users/atrombley/anaconda/pkgs/pandas-0.14.1-np19py27_0/lib/python2.7/site-packages/pandas/io/
sudo cp ~/Desktop/client_secrets.json /Users/atrombley/anaconda/lib/python2.7/site-packages/pandas/io/
import numpy as np; import pandas as pd; import pandas.io.ga as ga; import os
print ga.read_ga(
account_id = "private",
property_id = "private",
metrics = ['users', 'pageviews'],
dimensions = ['dayOfWeek'],
start_date = "2015-01-01",
end_date = "2015-01-02",
index_col = 0,
)
>>> ## THIS IS JUST BLANK? NOTHING PRINTED?
Related
I am relatively new to python and am struggling to figure out a way to copy and paste data from one google sheet to another using gspread. Does anyone know how to do this without using win32 to copy to an excel as a bridge?? Please see the code and error msg below:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
Scope = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name(r'C:\Users\Documents\Scripts\FX Rates Query\key.json', Scope)
client = gspread.authorize(creds)
sheet = client.open("Capital").sheet1
data=sheet.get_all_records()
df = pd.DataFrame(data)
df.to_excel(r'C:\Users\Documents\Reserves_extract.xlsx')
sheet1 = client.open("Cash Duration ").sheet1
mgnt_fees = sheet1.col_values(5)
fees = pd.DataFrame(mgnt_fees)
fees1 = fees[fees!=0]
print(fees1)
update = sheet1.update('B7',fees1)
##^^ERROR MSG IS COMING FROM HERE
Error msg:
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type DataFrame is not JSON serializable
From your replying of I would like to copy a specific column from google spreadsheet A to google spreadsheet B, in this case, how about the following modification?
Modified script:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import numpy as np
Scope = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name(r'C:\Users\Documents\Scripts\FX Rates Query\key.json', Scope)
client = gspread.authorize(creds)
# I modified below script.
spreadsheetA = client.open("Capital")
spreadsheetB = client.open("Cash Duration ")
srcCol = "'" + spreadsheetA.sheet1.title + "'!A1:A" # This is the column "A" of the 1st tab of Spreadsheet A.
dstCol = "'" + spreadsheetB.sheet1.title + "'!B1:B" # This is the column "B" of the 1st tab of Spreadsheet B.
src = spreadsheetA.values_get(srcCol)
del src['range']
spreadsheetB.values_update(dstCol, params={'valueInputOption': 'USER_ENTERED'}, body=src)
In this modified script, column "A" of the 1st tab of Spreadsheet A is copied to column "B" of the 1st tab of Spreadsheet B. Please modify this for your actual situation.
References:
values_get
values_update
When I run this script, I do not get an output. It appears as if it is successful as I do not get any errors telling me otherwise. When I run the notebook, a cell appears below the 5th cell, indicating that the script ran successfully, but there's nothing populated. All of my auth is correct as when I use the same auth in postman to pull tag data values, it's successful. This script used to run fine and output a table in addition to a graph.
What gives? Any help would be greatly appreciated.
Sample dataset when pulling tag data values from the Azure API
"c": 100,
"s": "opc",
"t": "2021-06-11T16:45:55.04Z",
"v": 80321248.5
#Code
import pandas as pd
from modules.services_factory import ServicesFactory
from modules.data_service import TagDataValue
from modules.model_service import ModelService
from datetime import datetime
import dateutil.parser
pd.options.plotting.backend = "plotly"
#specify tag list, start and end times here
taglist = ['c41f-ews-systemuptime']
starttime = '2021-06-10T14:00:00Z'
endtime = '2021-06-10T16:00:00Z'
# Get data and model services.
services = ServicesFactory('local.settings.production.json')
data_service = services.get_data_service()
tagvalues = []
for tag in taglist:
for tagvalue in data_service.get_tag_data_values(tag, dateutil.parser.parse(starttime), dateutil.parser.parse(endtime)):
tagvaluedict = tagvalue.__dict__
tagvaluedict['tag_id'] = tag
tagvalues.append(tagvaluedict)
df = pd.DataFrame(tagvalues)
df = df.pivot(index='t',columns='tag_id')
fig = df['v'].plot()
fig.update_traces(connectgaps=True)
fig.show()
Updated question:
I need to get cloudformation stacks according to the environment. Below is the code I am using for this:
#!/usr/bin/env python
import boto3
import datetime
from datetime import date
import subprocess
import re, itertools
from collections import defaultdict
regions = ['us-west-2']
env_names = ["dev", "test", "stage"]
stack_names_found = defaultdict(list)
for region in regions:
session = boto3.session.Session(region_name=region)
cf_client = session.resource('cloudformation')
for i in cf_client.stacks.all():
StackStatus = i.stack_status
Createdtime = i.creation_time
StackName1 = i.stack_name
for env_name in env_names:
if ('-' + env_name + '-') in StackName1:
stack_names_found[env_name].append(StackName1)
output = {'StackName': stack_names_found,
'Createdtime': Createdtime,
'Status': StackStatus
}
print(output)
StackName in output looks like this:
{'StackName':defaultdict(<class 'list'>, {'test': ['customer1-test-server1', 'customer2-test-server1', 'customer3-test-server1','customer3-test-server1', 'customer1-test-server2]})
Instead of:
['customer1-test-server1']
['customer2-test-server1']
['customer3-test-server1']
['customer3-test-server1']
You can have a look at the following version, which uses defaultdict and creates dictionary of stack names for each env_name:
#!/usr/bin/env python
import boto3
import csv
import datetime
from datetime import date
import subprocess
import re, itertools
from collections import defaultdict
regions = ['us-west-2']
env_names = ["dev", "test", "stage"]
stack_names_found = defaultdict(list)
for region in regions:
session = boto3.session.Session(region_name=region)
cf_client = session.resource('cloudformation')
for i in cf_client.stacks.all():
StackStatus = i.stack_status
Createdtime = i.creation_time
StackName1 = i.stack_name
for env_name in env_names:
if ('-' + env_name + '-') in StackName1:
stack_names_found[env_name].append(StackName1)
print(stack_names_found)
Please note that I haven't run the code, thus some adjustment may need to be needed to make it fully work.
i am trying to use python's package for influxdb to upload dataframe into the database
i am using the write_points class to write point into the database as given in the documentation(https://influxdb-python.readthedocs.io/en/latest/api-documentation.html)
every time i try to use the class it only updates the last line of the dataframe instead of the complete dataframe.
is this a usual behavior or there is some problem here?
given below is my script:
from influxdb import InfluxDBClient, DataFrameClient
import pathlib
import numpy as np
import pandas as pd
import datetime
db_client = DataFrameClient('dbserver', port, 'username', 'password', 'database',
ssl=True, verify_ssl=True)
today = datetime.datetime.now().strftime('%Y%m%d')
path = pathlib.Path('/dir1/dir/2').glob(f'pattern_to_match*/{today}.filename.csv')
for file in path:
order_start = pd.read_csv(f'{file}')
if not order_start.empty:
order_start['data_line1'] = (order_start['col1'] - \
order_start['col2'])*1000
order_start['data_line2'] = (order_start['col3'] - \
order_start['col4'])*1000
d1 = round(order_start['data_line1'].quantile(np.arange(0,1.1,0.1)), 3)
d2 = round(order_start['data_line2'].quantile(np.arange(0,1.1,0.1)), 3)
out_file = pd.DataFrame()
out_file = out_file.append(d1)
out_file = out_file.append(d2)
out_file = out_file.T
out_file.index = out_file.index.set_names(['percentile'])
out_file = out_file.reset_index()
out_file['percentile'] = out_file.percentile.apply(lambda x: f'{100*x:.0f}%')
out_file['tag_col'] = str(file).split('/')[2]
out_file['time'] = pd.to_datetime('today').strftime('%Y%m%d')
out_file = out_file.set_index('time')
out_file.index = pd.to_datetime(out_file.index)
db_client.write_points(out_file, 'measurement', database='database',
retention_policy='rp')
can anyone please help?
I have three modules: GetInput, Main and Converter. In the GetInput file there are all the inputs values and excel data in the form of list. In the Converter file I am using those input values from Getinput file and in the main file I am connecting both these files here. I am doing this so that my code can look more organized.
GetInput.py:
import pandas as pd
import numpy as np
import time
def getInputs():
df = pd.read_excel('input.xlsx')
actual = df['actual'].values.tolist()
schedule = df['schedule'].values.tolist()
freq = df['frequency'].values.tolist()
ACP = df['acp'].values.tolist()
modelInput = {
'actual': actual, 'schedule': schedule, 'freq': freq, 'ACP': ACP,'df' : df
}
return modelInput
Converter.py
import pandas as pd
def fun(modelInput):
underdraw = []
overdraw = []
for i,j, in zip(schedule, actual):
dev = j - i
if dev < 0:
underdraw.append(dev)
else:
underdraw.append(0)
if dev > 0:
overdraw.append(dev)
else:
overdraw.append(0)
df['underdraw'] = pd.Series(underdraw)
df['overdraw'] = pd.Series(overdraw)
df.to_excel('mainfile.xlsx')
Main.py
import pandas as pd
import numpy as np
from convert import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
This whole program works when I run it in a single module but it throw errors when I try divide my code into separate modules. Basically it throw error in GetInput.py and in Converter.py (df is not defined) file. I know its a very basic thing but I don't know how to make it work. There is no desired output for this program, I am already getting an output when I run it in a single file. I just want to divide my code in this format as I mentioned above: GetIput File, Converter File and Main File.
Keep all the files in same directory or else mention the file paths at the top of main code using os module.
You have misspelled the following in the main code:
from convert import *
from GetInputs import *
It should be:
from Converter import *
from GetInput import *
I have tested this using the following:
MainModule.py
from Converter import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
Converter.py
import pandas as pd
def fun(modelInput):
print("HIE" + modelInput)
GetInputs.py
def getInputs():
return "modelInput"