Formatting the months and dates in a subplot

Formatting the months and dates in a subplot - python

I am trying to create a time series of the sea surface temperature data over the whole year for six consecutive years and plot them using the subplots. I want to mark the x-ticks as the months. I tried using the matplotlib.dates option. However the years doesn't change on the subsequent subplots.
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays19 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2019.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays19 = np.delete(arrays19,[0,1,2,3,4,215,216,217],0)
sst14 = []
for i in arrays14:
d1 = i[0]
d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
datetime2.append(d2)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
for i in arrays15:
sst2 = i[2]
sst15.append(sst2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
for i in arrays16:
sst3 = i[2]
sst16.append(sst3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
for i in arrays17:
sst4 = i[2]
sst17.append(sst4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
for i in arrays18:
sst5 = i[2]
sst18.append(sst5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
np.shape(sst18)
sst19 = []
for i in arrays19:
sst6 = i[2]
sst19.append(sst6)
sst19 = np.array(sst19,dtype = np.float64)
sst19_u = np.zeros(len(sst14), dtype = np.float64)
sst19_fill = np.full([118],-9.99,dtype=np.float64)
sst19_u[0:211] = sst19[0:211]
sst19_u[211:329] = sst19_fill
sst19_u[329:365] = sst19[211:247]
sst_19_m = np.ma.masked_equal(sst19_u,-9.99)
##########Plotting
new_epoch = '2016-01-01T00:00:00'
mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot(sst_14_m)
axs[1].plot(sst_15_m)
axs[2].plot(sst_16_m)
axs[3].plot(sst_17_m)
axs[4].plot(sst_18_m)
axs[5].plot(sst_19_m)
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
#axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
I got an output like the following:
I would like to change the xlabels as 2016,2017,2018,2019 etc.
The data can be found in the folder - https://drive.google.com/drive/folders/1bETa7PjWKIUNS13xg3RgIMa5L7bpYn5W?usp=sharing

I love NumPy as much as the next person but this is a good use case for pandas. Pandas has the advantage of being able to label rows with more meaningful things than just positional index. For example, you can use dates. This is very convenient.
First, load your data:
import pandas as pd
import glob
dfs = []
for fname in glob.glob('./sst15n90e_dy_*.ascii'):
df = pd.read_csv(fname, skiprows=4, delimiter='\s+')
dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
Now do df.head() and you'll see this:
Let's convert that date to a 'datetime' object, and use it as the index instead of the default row numbers. We'll also deal with those -9.99 values.
import numpy as np
df['ds'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')
df = df.set_index('ds')
df = df.sort_index()
df.loc[df['SST'] == -9.99, 'SST'] = np.nan
Now you have a dataset you can do all sorts of magic with, like df.resample('Y')['SST'].sum() shows you the annual sum of SST.
Anyway, now we can make plots in various ways. You can plot DataFrames directly, eg check out df.groupby(df.index.year)['SST'].plot(). Or you can use seaborn (check out the gallery!), which understands DataFrames. Or you can construct a plot with matplotlib in the usual way. For instance:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 8), sharey=True)
for ax, (year, group) in zip(axs.flat, df.groupby(df.index.year)):
ax.plot(group['SST'])
ax.set_title(year)
ax.grid(c='k', alpha=0.15)
date_form = DateFormatter("%b")
ax.xaxis.set_major_formatter(date_form)
plt.tight_layout()
This is close to what you wanted, but with a more useful data structure and quite a bit less code:

I did some modifications and got the results as desired:
from pickletools import float8
import os
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
from datetime import datetime
# for files in os.listdir('/home/swadhin/project/sst/daily'):
# path = (files)
# print(path)
# arrays = [np.asarray(list(map(str, line.split()))) for line in open(files)]
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays08 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2008.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays08 = np.delete(arrays08,[0,1,2,3,4,215,216,217],0)
sst14 = []
datetime1 = [] #year, month,date
#datetime2 = [] #hour,min,second
for i in arrays14:
d1 = i[0]
#d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
#datetime2.append(d2)
#reading the data
# datetime1 = np.array(datetime1,dtype = np.float64)
# datetime2 = np.array(datetime2,dtype = np.float64)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
datetime2 = []
for i in arrays15:
d2 = i[0]
sst2 = i[2]
sst15.append(sst2)
datetime2.append(d2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
datetime3 = []
for i in arrays16:
d3 = i[0]
sst3 = i[2]
sst16.append(sst3)
datetime3.append(d3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
datetime4 = []
for i in arrays17:
d4 = i[0]
sst4 = i[2]
sst17.append(sst4)
datetime4.append(d4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
datetime5 = []
for i in arrays18:
d5 = i[0]
sst5 = i[2]
sst18.append(sst5)
datetime5.append(d5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
sst08 = []
datetime6 = []
for i in arrays08:
d6 = i[0]
sst6 = i[2]
sst08.append(sst6)
datetime6.append(d6)
sst08 = np.array(sst08,dtype = np.float64)
# sst08_u = np.zeros(len(sst14), dtype = np.float64)
# sst08_fill = np.full([118],-9.99,dtype=np.float64)
# sst08_u[0:211] = sst08[0:211]
# sst08_u[211:329] = sst08_fill
# sst08_u[329:365] = sst08[211:247]
sst_08_m = np.ma.masked_equal(sst08,-9.99)
dt = np.asarray([datetime1,datetime2,datetime3,datetime4,datetime5,datetime6])
dt_m = []
for i in dt:
dt_m1= []
for j in i:
datetime_object = datetime.strptime(j,'%Y%m%d')
dt_m1.append(datetime_object)
dt_m.append(dt_m1)
##########Plotting
# new_epoch = '2016-01-01T00:00:00'
# mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot_date(dt_m[5],sst_08_m,'-')
axs[1].plot_date(dt_m[0],sst_14_m,'-')
axs[2].plot_date(dt_m[1],sst_15_m,'-')
axs[3].plot_date(dt_m[2],sst_16_m,'-')
axs[4].plot_date(dt_m[3],sst_17_m,'-')
axs[5].plot_date(dt_m[4],sst_18_m,'-')
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
And it solved the issue.

Related

Split json file into multiple csv files depending on date?

I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
csv_file.write(dat)
csv_file.close()
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T
dxf.index.name = 'time'
dxf.reset_index(inplace=True)
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time'].dt.day
dxf['date'] = dxf['time'].dt.date
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1)
length.index.name = 'day'
length.reset_index(inplace=True)
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
dt_range.append(min_dt.strftime("%H:%M:%S"))
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
time.append(dasf['minute'][ib])
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
empty_value.append(dasf['ev'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1

Change Trendline-Colour with openPyxl

I'm trying to write a Script in Python with openpyxl, that saves data in a excel-file and also draws a scatterchart with some trendlines. The problem with openpyxl is, that trendlines in default-mode are always colored black, and so my questione is, how to color them in different colors.
from openpyxl.chart import *
from openpyxl.chart.trendline import Trendline
from openpyxl.styles import *
from openpyxl.chart.shapes import *
from openpyxl.drawing.colors import *
wb = Workbook()
ws = wb.create_sheet(name)
#writes the Headlines of the columns into excel-file
for c in range(1,5):
ws.cell(1, c).value = ueberschriften[c-1]
#writes the data into excel-file
for c in range(2, points+2):
e = c-2
for d in range(1,5):
f = d-1
b = a[e][f]
ws.cell(c, d).value = b
for row in a:
ws.append(row)
chart = ScatterChart()
chart.title = "Measuring"
chart.style = 13
chart.x_axis.title = 'Time'
chart.y_axis.title = 'Value'
xvalues = Reference(ws, min_col=1, min_row=2, max_row=points+1)
for i in range(2, 5):
values = Reference(ws, min_col=i, min_row=1, max_row=points+1)
series = Series(values, xvalues, title_from_data=True)
chart.series.append(series)
l = chart.series[0]
l.graphicalProperties.line.solidFill = "FF0000"
l.trindline = Trendline(trendlineType = 'poly', order = fit_order) #trendline with polinomial fitting
l1 = chart.series[1]
l1.graphicalProperties.line.solidFill = "0000FF"
line1.trendline = Trendline(trendlineType = 'poly', order = fit_order)
l2 = chart.series[2]
l2.graphicalProperties.line.solidFill = "00FF00"
l2.trendline = Trendline(trendlineType = 'poly', order = fit_order)
ws.add_chart(chart, "A10")
wb.save("C:\****\****\testFile.xlsx")```

Let's say you want your linear trend line to be green. Then...
from openpyxl.chart.trendline import Trendline
from openpyxl.chart.shapes import GraphicalProperties
from openpyxl.drawing.line import LineProperties
line_props = LineProperties(solidFill='00FF00')
g_props = GraphicalProperties(ln=line_props)
linear_trendline = Trendline(spPr=g_props)
my_chart.series[0].trendline = linear_trendline

ValueError: could not convert string to float: '1.csv'

I get a ValueError: could not convert string to float: '1.csv' on Python 3.6.3 with the following code:
import numpy as np
import matplotlib.pyplot as plt
from lmfit import Model
'''####### functions #####'''
def moving_average(a, n=3) :
ret = np.cumsum(a, dtype=float)
ret[n:] = ret[n:] - ret[:-n]
return ret[n - 1:] / n
# Fitting function
def gaussian(x,a,x0,w):
return a*np.exp(-2*(x-x0)**2/w**2)
'''####### file name ######'''
savfile = False
file_n = ['1','2','3','4','5','6','7','8','9','10',
'11','12','13','14','15','16','17','18','19','20','21','22','23','24']
# hole number of reference
dist = [3,5,7,9,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,27,29,32,36,38]
dist = np.array(dist,dtype=float)*1e-3*25 +1e-3*25 +1e-3*230
'''####### flags representing which point on chopper #######'''
'''####### the razor initiates the plot, as beam diverges #######'''
flag = [0,0,1,1,0,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0]
'''####### empty arrays of zero ready to be filled #######'''
w_max_mean = np.zeros((2,np.size(dist)))
w_min_mean = np.zeros((2,np.size(dist)))
w_max_stderr = np.zeros((2,np.size(dist)))
w_min_stderr = np.zeros((2,np.size(dist)))
for k in range(np.size(file_n)):#range(1):#
file_name = file_n[k] + ".csv"
file_name1 = float(file_name)
#file_name = float(file_name)
# print(file_name)
data = np.loadtxt(file_name1,comments='%',delimiter=',',usecols=[0,1],skiprows=0)
time0 = data[:,0]
volt0 = data[:,1]
n_int = 10.#+ np.abs(k-15)
t = np.zeros(np.array(np.size(t0)/n_int,dtype = int)-1);
V = np.zeros(np.array(np.size(t0)/n_int,dtype = int)-1);
for j in range(np.array(np.size(t0)/n_int,dtype = int)-1):
t[j] = np.sum(t0[j*n_int:(j+1)*n_int-1])/n_int
V[j] = np.sum(V0[j*n_int:(j+1)*n_int-1])/n_int
What causes this?

[np.loadtxt]1 takes a str as it's first argument, not a float. Why are you trying to convert the filename to a float? Just remove file_name1 = float(file_name) and replace
data = np.loadtxt(file_name1,comments='%',delimiter=',',usecols=[0,1],skiprows=0)
with
data = np.loadtxt(file_name, comments='%', delimiter=',', usecols=[0,1], skiprows=0)
and it should all work.

Add each new dictionary result in the order of the columns of a dataframe

I am new to Python, but hope to explain the issue.
dfrow - is a dictionary of a single regression summary
results - is an empty dataframe with same columns as in dfrow
I would like to save regression results for each observation in the outer loop at the same time making sure column order in the inner loop. I am getting a result for the first observations but cannot move further, error saying:
Traceback (most recent call last):
File "<stdin>", line 109, in <module>
TypeError: 'numpy.int64' object is not iterable
when I run this code
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.stats import stattools as st
import statsmodels.api as sm
import collections
import datetime
import warnings
import scipy.stats
df_rent = import_rents()
df_return = import_ee_rets()
mostrecent = df_return.iloc[len(df_return) - 1]
mostrecentYYYY = mostrecent['Year']
mostrecentQ = mostrecent['Quarter']
mostrecentperiod = str(mostrecentYYYY) + "-Q" + str(mostrecentQ)
rentcols = df_rent.columns.values
colnames = []
#loop through the columns in df_rent until the column == the most recent period for which we have ee return data
for colname in rentcols:
if colname != mostrecentperiod:
colnames.append(colname)
else:
colnames.append(colname)
break
rentcols = colnames
#subset df_rent to only include columns that also have ee return data
df_rent = df_rent[rentcols]
#change dtype of metro_code / metro columns to string for matching later
df_rent['metro_code'] = df_rent['metro_code'].apply(str)
df_return['Metro'] = df_return['Metro'].apply(str)
df = pd.read_csv('//x/Project/_data/raw_data/rent_change.csv')
metros = list(np.unique(df['metro_code']))
regress_result_names = [
'metro',
'num_lag',
'num_ma',
'num_AR',
'beta_x1_retmov',
'x1_se',
'x1_tstat',
'x1_pval',
'r-squared',
'reg_fstat',
'fstat_pvalue',
'durbin-watson',
'resid_var']
regress_result_names = pd.Series(regress_result_names)
results = pd.DataFrame(columns=regress_result_names)
row = 0
for metro in metros:
for nlag in range(0, 5):
for nma in range(1, 11):
for AR in range(1, 5):
y = df_rent[df_rent['metro_code'] == str(metro)]
y = y.values.tolist()
y = y[0]
# delete first two columns of df_rent (they don't contain numeric data)
y.pop(0)
y.pop(0)
#y = rent time series data for specific metro
y = pd.Series(y)
#x1 = lagged moving average data for given params
df_return1 = df_return[df_return['Metro'] == str(metro)]
df_return1 = df_return1.reset_index(drop = True)
x1 = lagged_moving_avg(df = df_return1, metro_code = metro, nlag = nlag, nma = nma)
#y and x1 dataframe
y_label = 'y_Rent'
x_lagMA_label = 'x1_LaggedMA'
df1 = pd.DataFrame()
df1[y_label] = y
df1[x_lagMA_label] = x1
if mostrecentQ == 1:
currmonth = "01"
elif mostrecentQ == 2:
currmonth = "04"
elif mostrecentQ == 3:
currmonth = "07"
else:
currmonth = "10"
#convert index to datetime to run the regressions
currpd = pd.to_datetime((str(mostrecentYYYY) + currmonth), format='%Y%m')
df1.index = pd.date_range(*(pd.to_datetime(['1990-01', currpd]) + pd.offsets.QuarterEnd()), freq='Q')
#drop any rows that have missing observations
df1 = df1.dropna()
#df1.to_csv('//Nisfile01/x/Project - Real Estate Database/real_estate/odil/XandY.csv', index=True)
reg = ARIMA(endog = df1[y_label], order = (AR, 0,0)).fit(trend = 'nc', disp = 0, tol=1e-20)
resid_reg = reg.resid
reg2 = sm.OLS(resid_reg, df1[x_lagMA_label]).fit()
resid_reg2 = reg2.resid
dfrow = {
'metro': metro,
'num_lag': nlag,
'num_ma': nma,
'num_AR': AR,
'beta_x1_retmov': reg2.params[0],
'x1_se': reg2.bse[0],
'x1_tstat': reg2.tvalues[0],
'x1_pval': reg2.pvalues[0],
'r-squared': reg2.rsquared,
'reg_fstat':reg2.fvalue,
'fstat_pvalue': reg2.f_pvalue,
'durbin-watson': st.durbin_watson(reg2.resid),
'resid_var': resid_reg2.var(),
}
#create df for output called results
for key in dfrow.keys():
results.loc[row, key] = list(dfrow[key])
row = row + 1
Any help is very much appreciated.
P.S. Sorry for the messy code

The offending line is results.loc[row, key] = list(dfrow[key]).
You are trying to convert a single value, in this case a numpy.int64 object, to a list. I assume that what you're trying to do, and correct me if I am wrong, is create a singleton list with the int64 inside it. If that's what you want to do, you should use:
results.loc[row, key] = [dfrow[key]]

Looping same program for different data files

For the following program, I am trying to save time copying and pasting tons of code. I would like this program to plot using the data file 19_6.txt and aux.19_6, and then continue by plotting the files with 11,12,20,28,27, and 18 in 19's place with the same code and onto the same plot. Any help would be appreciated. Thanks!
from numpy import *
import matplotlib.pyplot as plt
datasim19 = loadtxt("/home/19_6.txt")
data19 = loadtxt("/home/aux.19_6")
no1=1
no2=2
no3=3
no4=4
no5=5
no7=7
no8=8
no9=9
no10=10
simrecno1inds19 = nonzero(datasim19[:,1]==no1)[0]
simrecno2inds19 = nonzero(datasim19[:,1]==no2)[0]
simrecno3inds19 = nonzero(datasim19[:,1]==no3)[0]
simrecno4inds19 = nonzero(datasim19[:,1]==no4)[0]
simrecno5inds19 = nonzero(datasim19[:,1]==no5)[0]
simrecno7inds19 = nonzero(datasim19[:,1]==no7)[0]
simrecno8inds19 = nonzero(datasim19[:,1]==no8)[0]
simrecno9inds19 = nonzero(datasim19[:,1]==no9)[0]
simrecno10inds19 = nonzero(datasim19[:,1]==no10)[0]
recno1inds19 = nonzero(data19[:,1]==no1)[0]
recno2inds19 = nonzero(data19[:,1]==no2)[0]
recno3inds19 = nonzero(data19[:,1]==no3)[0]
recno4inds19 = nonzero(data19[:,1]==no4)[0]
recno5inds19 = nonzero(data19[:,1]==no5)[0]
recno7inds19 = nonzero(data19[:,1]==no7)[0]
recno8inds19 = nonzero(data19[:,1]==no8)[0]
recno9inds19 = nonzero(data19[:,1]==no9)[0]
recno10inds19 = nonzero(data19[:,1]==no10)[0]
q1sim19 = qsim19[simrecno1inds19]
q2sim19 = qsim19[simrecno2inds19]
q3sim19 = qsim19[simrecno3inds19]
q4sim19 = qsim19[simrecno4inds19]
q5sim19 = qsim19[simrecno5inds19]
q7sim19 = qsim19[simrecno7inds19]
q8sim19 = qsim19[simrecno8inds19]
q9sim19 = qsim19[simrecno9inds19]
q10sim19 = qsim19[simrecno10inds19]
q1_19 = q19[recno1inds19]
q2_19 = q19[recno2inds19]
q3_19 = q19[recno3inds19]
q4_19 = q19[recno4inds19]
q5_19 = q19[recno5inds19]
q7_19 = q19[recno7inds19]
q8_19 = q19[recno8inds19]
q9_19 = q19[recno9inds19]
q10_19 = q19[recno10inds19]
sumq1sim19 = sum(q1sim19)
sumq2sim19 = sum(q2sim19)
sumq3sim19 = sum(q3sim19)
sumq4sim19 = sum(q4sim19)
sumq5sim19 = sum(q5sim19)
sumq7sim19 = sum(q7sim19)
sumq8sim19 = sum(q8sim19)
sumq9sim19 = sum(q9sim19)
sumq10sim19 = sum(q10sim19)
sumq1_19 = sum(q1_19)
sumq2_19 = sum(q2_19)
sumq3_19 = sum(q3_19)
sumq4_19 = sum(q4_19)
sumq5_19 = sum(q5_19)
sumq7_19 = sum(q7_19)
sumq8_19 = sum(q8_19)
sumq9_19 = sum(q9_19)
sumq10_19 = sum(q10_19)
xsim = [no1, no2, no3, no4, no5, no7, no8, no9, no10]
ysim = [sumq1sim_19, sumq2sim_19, sumq3sim_19, sumq4sim_19, sumq5sim_19, sumq7sim_19, sumq8sim_19, sumq9sim_19, sumq10sim_19]
x = [no1, no2, no3, no4, no5,no7, no8, no9, no10]
y = [sumq1_19, sumq2_19, sumq3_19, sumq4_19, sumq5_19, sumq7_19, sumq8_19, sumq9_19, sumq10_19]
plt.plot(x,log(y),'b',label='Data')
plt.plot(xsim,log(ysim),'r',label='Simulation')
plt.legend()
plt.title('Data vs. Simulation')
plt.show()

Tip: when you find yourself using lots of variables called n1, n2, n3 etc. you should probably use lists, dictionaries or other such containers, and loops instead.
For example, try replacing the following code:
simrecno1inds19 = nonzero(datasim19[:,1]==no1)[0]
simrecno2inds19 = nonzero(datasim19[:,1]==no2)[0]
simrecno3inds19 = nonzero(datasim19[:,1]==no3)[0]
simrecno4inds19 = nonzero(datasim19[:,1]==no4)[0]
simrecno5inds19 = nonzero(datasim19[:,1]==no5)[0]
simrecno7inds19 = nonzero(datasim19[:,1]==no7)[0]
simrecno8inds19 = nonzero(datasim19[:,1]==no8)[0]
simrecno9inds19 = nonzero(datasim19[:,1]==no9)[0]
simrecno10inds19 = nonzero(datasim19[:,1]==no10)[0]
With this:
simrecinds19 = [nonzero(datasim19[:,1] == i)[0] for i in range(1, 11)]
Then you can use simrecinds19[0] instead of simrecno1inds19.

You can do something like this:
nList = [19,11,12,20,28,27,18]
for n in nList:
file1 = "/home/" + str(n) + "_6.txt"
file2 = "/home/aux." + str(n) + "_6"
datasim19 = loadtxt(file1)
data19 = loadtxt(file2)
# do the rest of the plotting

You can greatly reduce the size of this script. I'm not quite sure where qsim19 and qsim come from, but take a look:
import numpy as np
import matplotlib.pyplot as plt
for index in [19, 11, 12, 20, 28, 27, 18]:
datasim = loadtxt("/home/%i_6.txt"%index)
data = loadtxt("/home/aux.%i_6"%index)
nos = range(1, 6) + range(7, 11)
simrecno = [np.nonzero(datasim[:,1] == n)[0] for n in nos]
recno = [np.nonzero(data[:,1] == n)[0] for n in nos]
qsim = [qsim[simrecno_i] for simrecno_i in simrecno]
q = [q[recno_i] for recno_i in recno]
sumqsim = [sum(qsim_i) for qsim_i in qsim]
sumq = [sum(q_i) for q_i in q]
xsim = nos
ysim = sumqsim
x = nos
y = sumq
plt.plot(x, log(y), 'b', label='Data')
plt.plot(xsim, log(ysim), 'r', label='Simulation')
plt.legend()
plt.title('Data vs. Simulation')
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Formatting the months and dates in a subplot - python

Related

Split json file into multiple csv files depending on date?

Change Trendline-Colour with openPyxl

ValueError: could not convert string to float: '1.csv'

Add each new dictionary result in the order of the columns of a dataframe

Looping same program for different data files

Categories

Resources