Looping same program for different data files - python

For the following program, I am trying to save time copying and pasting tons of code. I would like this program to plot using the data file 19_6.txt and aux.19_6, and then continue by plotting the files with 11,12,20,28,27, and 18 in 19's place with the same code and onto the same plot. Any help would be appreciated. Thanks!
from numpy import *
import matplotlib.pyplot as plt
datasim19 = loadtxt("/home/19_6.txt")
data19 = loadtxt("/home/aux.19_6")
no1=1
no2=2
no3=3
no4=4
no5=5
no7=7
no8=8
no9=9
no10=10
simrecno1inds19 = nonzero(datasim19[:,1]==no1)[0]
simrecno2inds19 = nonzero(datasim19[:,1]==no2)[0]
simrecno3inds19 = nonzero(datasim19[:,1]==no3)[0]
simrecno4inds19 = nonzero(datasim19[:,1]==no4)[0]
simrecno5inds19 = nonzero(datasim19[:,1]==no5)[0]
simrecno7inds19 = nonzero(datasim19[:,1]==no7)[0]
simrecno8inds19 = nonzero(datasim19[:,1]==no8)[0]
simrecno9inds19 = nonzero(datasim19[:,1]==no9)[0]
simrecno10inds19 = nonzero(datasim19[:,1]==no10)[0]
recno1inds19 = nonzero(data19[:,1]==no1)[0]
recno2inds19 = nonzero(data19[:,1]==no2)[0]
recno3inds19 = nonzero(data19[:,1]==no3)[0]
recno4inds19 = nonzero(data19[:,1]==no4)[0]
recno5inds19 = nonzero(data19[:,1]==no5)[0]
recno7inds19 = nonzero(data19[:,1]==no7)[0]
recno8inds19 = nonzero(data19[:,1]==no8)[0]
recno9inds19 = nonzero(data19[:,1]==no9)[0]
recno10inds19 = nonzero(data19[:,1]==no10)[0]
q1sim19 = qsim19[simrecno1inds19]
q2sim19 = qsim19[simrecno2inds19]
q3sim19 = qsim19[simrecno3inds19]
q4sim19 = qsim19[simrecno4inds19]
q5sim19 = qsim19[simrecno5inds19]
q7sim19 = qsim19[simrecno7inds19]
q8sim19 = qsim19[simrecno8inds19]
q9sim19 = qsim19[simrecno9inds19]
q10sim19 = qsim19[simrecno10inds19]
q1_19 = q19[recno1inds19]
q2_19 = q19[recno2inds19]
q3_19 = q19[recno3inds19]
q4_19 = q19[recno4inds19]
q5_19 = q19[recno5inds19]
q7_19 = q19[recno7inds19]
q8_19 = q19[recno8inds19]
q9_19 = q19[recno9inds19]
q10_19 = q19[recno10inds19]
sumq1sim19 = sum(q1sim19)
sumq2sim19 = sum(q2sim19)
sumq3sim19 = sum(q3sim19)
sumq4sim19 = sum(q4sim19)
sumq5sim19 = sum(q5sim19)
sumq7sim19 = sum(q7sim19)
sumq8sim19 = sum(q8sim19)
sumq9sim19 = sum(q9sim19)
sumq10sim19 = sum(q10sim19)
sumq1_19 = sum(q1_19)
sumq2_19 = sum(q2_19)
sumq3_19 = sum(q3_19)
sumq4_19 = sum(q4_19)
sumq5_19 = sum(q5_19)
sumq7_19 = sum(q7_19)
sumq8_19 = sum(q8_19)
sumq9_19 = sum(q9_19)
sumq10_19 = sum(q10_19)
xsim = [no1, no2, no3, no4, no5, no7, no8, no9, no10]
ysim = [sumq1sim_19, sumq2sim_19, sumq3sim_19, sumq4sim_19, sumq5sim_19, sumq7sim_19, sumq8sim_19, sumq9sim_19, sumq10sim_19]
x = [no1, no2, no3, no4, no5,no7, no8, no9, no10]
y = [sumq1_19, sumq2_19, sumq3_19, sumq4_19, sumq5_19, sumq7_19, sumq8_19, sumq9_19, sumq10_19]
plt.plot(x,log(y),'b',label='Data')
plt.plot(xsim,log(ysim),'r',label='Simulation')
plt.legend()
plt.title('Data vs. Simulation')
plt.show()

Tip: when you find yourself using lots of variables called n1, n2, n3 etc. you should probably use lists, dictionaries or other such containers, and loops instead.
For example, try replacing the following code:
simrecno1inds19 = nonzero(datasim19[:,1]==no1)[0]
simrecno2inds19 = nonzero(datasim19[:,1]==no2)[0]
simrecno3inds19 = nonzero(datasim19[:,1]==no3)[0]
simrecno4inds19 = nonzero(datasim19[:,1]==no4)[0]
simrecno5inds19 = nonzero(datasim19[:,1]==no5)[0]
simrecno7inds19 = nonzero(datasim19[:,1]==no7)[0]
simrecno8inds19 = nonzero(datasim19[:,1]==no8)[0]
simrecno9inds19 = nonzero(datasim19[:,1]==no9)[0]
simrecno10inds19 = nonzero(datasim19[:,1]==no10)[0]
With this:
simrecinds19 = [nonzero(datasim19[:,1] == i)[0] for i in range(1, 11)]
Then you can use simrecinds19[0] instead of simrecno1inds19.

You can do something like this:
nList = [19,11,12,20,28,27,18]
for n in nList:
file1 = "/home/" + str(n) + "_6.txt"
file2 = "/home/aux." + str(n) + "_6"
datasim19 = loadtxt(file1)
data19 = loadtxt(file2)
# do the rest of the plotting

You can greatly reduce the size of this script. I'm not quite sure where qsim19 and qsim come from, but take a look:
import numpy as np
import matplotlib.pyplot as plt
for index in [19, 11, 12, 20, 28, 27, 18]:
datasim = loadtxt("/home/%i_6.txt"%index)
data = loadtxt("/home/aux.%i_6"%index)
nos = range(1, 6) + range(7, 11)
simrecno = [np.nonzero(datasim[:,1] == n)[0] for n in nos]
recno = [np.nonzero(data[:,1] == n)[0] for n in nos]
qsim = [qsim[simrecno_i] for simrecno_i in simrecno]
q = [q[recno_i] for recno_i in recno]
sumqsim = [sum(qsim_i) for qsim_i in qsim]
sumq = [sum(q_i) for q_i in q]
xsim = nos
ysim = sumqsim
x = nos
y = sumq
plt.plot(x, log(y), 'b', label='Data')
plt.plot(xsim, log(ysim), 'r', label='Simulation')
plt.legend()
plt.title('Data vs. Simulation')
plt.show()

Related

Formatting the months and dates in a subplot

I am trying to create a time series of the sea surface temperature data over the whole year for six consecutive years and plot them using the subplots. I want to mark the x-ticks as the months. I tried using the matplotlib.dates option. However the years doesn't change on the subsequent subplots.
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays19 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2019.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays19 = np.delete(arrays19,[0,1,2,3,4,215,216,217],0)
sst14 = []
for i in arrays14:
d1 = i[0]
d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
datetime2.append(d2)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
for i in arrays15:
sst2 = i[2]
sst15.append(sst2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
for i in arrays16:
sst3 = i[2]
sst16.append(sst3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
for i in arrays17:
sst4 = i[2]
sst17.append(sst4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
for i in arrays18:
sst5 = i[2]
sst18.append(sst5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
np.shape(sst18)
sst19 = []
for i in arrays19:
sst6 = i[2]
sst19.append(sst6)
sst19 = np.array(sst19,dtype = np.float64)
sst19_u = np.zeros(len(sst14), dtype = np.float64)
sst19_fill = np.full([118],-9.99,dtype=np.float64)
sst19_u[0:211] = sst19[0:211]
sst19_u[211:329] = sst19_fill
sst19_u[329:365] = sst19[211:247]
sst_19_m = np.ma.masked_equal(sst19_u,-9.99)
##########Plotting
new_epoch = '2016-01-01T00:00:00'
mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot(sst_14_m)
axs[1].plot(sst_15_m)
axs[2].plot(sst_16_m)
axs[3].plot(sst_17_m)
axs[4].plot(sst_18_m)
axs[5].plot(sst_19_m)
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
#axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
I got an output like the following:
I would like to change the xlabels as 2016,2017,2018,2019 etc.
The data can be found in the folder - https://drive.google.com/drive/folders/1bETa7PjWKIUNS13xg3RgIMa5L7bpYn5W?usp=sharing
I love NumPy as much as the next person but this is a good use case for pandas. Pandas has the advantage of being able to label rows with more meaningful things than just positional index. For example, you can use dates. This is very convenient.
First, load your data:
import pandas as pd
import glob
dfs = []
for fname in glob.glob('./sst15n90e_dy_*.ascii'):
df = pd.read_csv(fname, skiprows=4, delimiter='\s+')
dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
Now do df.head() and you'll see this:
Let's convert that date to a 'datetime' object, and use it as the index instead of the default row numbers. We'll also deal with those -9.99 values.
import numpy as np
df['ds'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')
df = df.set_index('ds')
df = df.sort_index()
df.loc[df['SST'] == -9.99, 'SST'] = np.nan
Now you have a dataset you can do all sorts of magic with, like df.resample('Y')['SST'].sum() shows you the annual sum of SST.
Anyway, now we can make plots in various ways. You can plot DataFrames directly, eg check out df.groupby(df.index.year)['SST'].plot(). Or you can use seaborn (check out the gallery!), which understands DataFrames. Or you can construct a plot with matplotlib in the usual way. For instance:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 8), sharey=True)
for ax, (year, group) in zip(axs.flat, df.groupby(df.index.year)):
ax.plot(group['SST'])
ax.set_title(year)
ax.grid(c='k', alpha=0.15)
date_form = DateFormatter("%b")
ax.xaxis.set_major_formatter(date_form)
plt.tight_layout()
This is close to what you wanted, but with a more useful data structure and quite a bit less code:
I did some modifications and got the results as desired:
from pickletools import float8
import os
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
from datetime import datetime
# for files in os.listdir('/home/swadhin/project/sst/daily'):
# path = (files)
# print(path)
# arrays = [np.asarray(list(map(str, line.split()))) for line in open(files)]
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays08 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2008.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays08 = np.delete(arrays08,[0,1,2,3,4,215,216,217],0)
sst14 = []
datetime1 = [] #year, month,date
#datetime2 = [] #hour,min,second
for i in arrays14:
d1 = i[0]
#d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
#datetime2.append(d2)
#reading the data
# datetime1 = np.array(datetime1,dtype = np.float64)
# datetime2 = np.array(datetime2,dtype = np.float64)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
datetime2 = []
for i in arrays15:
d2 = i[0]
sst2 = i[2]
sst15.append(sst2)
datetime2.append(d2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
datetime3 = []
for i in arrays16:
d3 = i[0]
sst3 = i[2]
sst16.append(sst3)
datetime3.append(d3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
datetime4 = []
for i in arrays17:
d4 = i[0]
sst4 = i[2]
sst17.append(sst4)
datetime4.append(d4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
datetime5 = []
for i in arrays18:
d5 = i[0]
sst5 = i[2]
sst18.append(sst5)
datetime5.append(d5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
sst08 = []
datetime6 = []
for i in arrays08:
d6 = i[0]
sst6 = i[2]
sst08.append(sst6)
datetime6.append(d6)
sst08 = np.array(sst08,dtype = np.float64)
# sst08_u = np.zeros(len(sst14), dtype = np.float64)
# sst08_fill = np.full([118],-9.99,dtype=np.float64)
# sst08_u[0:211] = sst08[0:211]
# sst08_u[211:329] = sst08_fill
# sst08_u[329:365] = sst08[211:247]
sst_08_m = np.ma.masked_equal(sst08,-9.99)
dt = np.asarray([datetime1,datetime2,datetime3,datetime4,datetime5,datetime6])
dt_m = []
for i in dt:
dt_m1= []
for j in i:
datetime_object = datetime.strptime(j,'%Y%m%d')
dt_m1.append(datetime_object)
dt_m.append(dt_m1)
##########Plotting
# new_epoch = '2016-01-01T00:00:00'
# mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot_date(dt_m[5],sst_08_m,'-')
axs[1].plot_date(dt_m[0],sst_14_m,'-')
axs[2].plot_date(dt_m[1],sst_15_m,'-')
axs[3].plot_date(dt_m[2],sst_16_m,'-')
axs[4].plot_date(dt_m[3],sst_17_m,'-')
axs[5].plot_date(dt_m[4],sst_18_m,'-')
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
And it solved the issue.

Scipy Curve_fit gives a rather weird fit

Dear Python programmers,
I am currently working with curve_fit from scipy inorder to find out what correlation the x and y data have with echouter. However, the curve fit becomes really weird even when I fit a simple lineair formule towards it. I've tried changing the array to a numpy array at the: def func(x, a, b, c): "Fit functie" return a * np.asarray(x) + b part but it still gives me a graph that looks like a 3 year old who scratched with some red pencil.
One thing I do remember is sorting the values of massflows and rms_smote from low to high. Which you can view above the def func(x, a, b, c) bit. Since the curve_fit was giving me a fit. Yet also kinda scratched out as if you're sketching when the values ware unsorted. I don't know if curve_fit considers data differently if it's sorted or not.
If you need any more information, let me know :) Any suggestion is welcome!
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy.stats import linregress
from scipy.optimize import curve_fit
data_15 = pd.read_csv(r"C:\Users\Thomas\Documents\Pythondata\2022-01-15_SMOTERapport.csv", header= 0, sep=';', decimal=',')
data_06 = pd.read_csv(r"C:\Users\Thomas\Documents\Pythondata\2022-02-06_SMOTERapport.csv", header= 0, sep=';', decimal=',')
data_10 = pd.read_csv(r"C:\Users\Thomas\Documents\Pythondata\2022-02-10_SMOTERapport.csv", header= 0, sep=';', decimal=',')
speed_15 = data_15['SPEED_ACT']
speed_06 = data_06['SPEED_ACT']
speed_10 = data_10['SPEED_ACT']
"Data filter 01_15"
filter = [i for i, e in enumerate(speed_15) if e >= 80]
s_15 = pd.DataFrame(data_15)
speed15 = s_15.filter(items = filter, axis=0)
speed15.reset_index(drop=True, inplace=True)
temp15 = speed15['TP_SMOTE']
foutmetingen2 = [i for i, e in enumerate(temp15) if e < 180]
speed15 = speed15.drop(foutmetingen2)
tp_strip15 = speed15['TP_AMBIENT']
tp_target15 = speed15['TP_TARGET']
tp_smote15 = speed15['TP_SMOTE']
v_15 = speed15['SPEED_ACT']
width15 = speed15['STRIP_WIDTH']
thickness15 = speed15['STRIP_THICKNESS']
power15 = speed15['POWER_INVERTER_PRE']
voltage15 = speed15['VOLTAGE_INVERTER_PRE']
"Data filter 02_06"
filter = [i for i, e in enumerate(speed_06) if e >= 80]
s_06 = pd.DataFrame(data_06)
speed06 = s_06.filter(items = filter, axis=0)
speed06.reset_index(drop=True, inplace=True)
temp06 = speed06['TP_SMOTE']
foutmetingen2 = [i for i, e in enumerate(temp06) if e < 180]
speed06 = speed06.drop(foutmetingen2)
tp_strip06 = speed06['TP_AMBIENT']
tp_target06 = speed06['TP_TARGET']
tp_smote06 = speed06['TP_SMOTE']
v_06 = speed06['SPEED_ACT']
width06 = speed06['STRIP_WIDTH']
thickness06 = speed06['STRIP_THICKNESS']
power06 = speed06['POWER_INVERTER_PRE']
voltage06 = speed06['VOLTAGE_INVERTER_PRE']
"Data filter 02_10"
filter = [i for i, e in enumerate(speed_10) if e >= 80]
s_10 = pd.DataFrame(data_10)
speed10 = s_10.filter(items = filter, axis=0)
speed10.reset_index(drop=True, inplace=True)
temp_01 = speed10['TP_SMOTE']
foutmetingen2 = [i for i, e in enumerate(temp_01) if e < 180]
speed10 = speed10.drop(foutmetingen2)
tp_strip10 = speed10['TP_AMBIENT']
tp_target10 = speed10['TP_TARGET']
tp_smote10 = speed10['TP_SMOTE']
v_10 = speed10['SPEED_ACT']
width10 = speed10['STRIP_WIDTH']
thickness10 = speed10['STRIP_THICKNESS']
power10 = speed10['POWER_INVERTER_PRE']
voltage10 = speed10['VOLTAGE_INVERTER_PRE']
"Constanten"
widthmax = 1253
Kra = 0.002033636
Kosc = 0.073086272
Pnominal = 2200
meting_15 = np.arange(0, len(speed15), 1)
meting_06 = np.arange(0, len(speed06), 1)
meting_10 = np.arange(0, len(speed10), 1)
cp = 480
rho = 7850
"---------------------------------------------------------------------"
def temp(power, speed, width, thickness, tp_strip, tp_target, tp_smote,
voltage):
"Berekende temperatuur vergelijken met target temperatuur"
massflow = (speed/60)*width*10**-3*thickness*10**-3*rho
LossesRA = Kra*Pnominal*(width/widthmax)
LossesOSC = Kosc*Pnominal*(voltage/100)**2
Plosses = (LossesRA + LossesOSC)
power_nl = (power/100)*Pnominal - Plosses
temp_c = ((power_nl*1000)/(massflow*cp)) + tp_strip
verschil_t = (temp_c/tp_target)*100-100
verschil_smote = (temp_c/tp_smote)*100-100
return temp_c, verschil_t, verschil_smote, massflow
temp_15 = temp(power15, v_15, width15, thickness15, tp_strip15, tp_target15,
tp_smote15, voltage15)
temp_06 = temp(power06, v_06, width06, thickness06, tp_strip06, tp_target06,
tp_smote06, voltage06)
temp_10 = temp(power10, v_10, width10, thickness10, tp_strip10, tp_target10,
tp_smote10, voltage10)
"---------------------------------------------------------------------"
def rms(Temperatuurberekend, TemperatuurGemeten):
"De Root Mean Square berekenen tussen berekend en gemeten data"
rootmeansquare = (TemperatuurGemeten - Temperatuurberekend)
rootmeansquare_totaal = np.sum(rootmeansquare)
rootmeansquare_gem = rootmeansquare_totaal/len(rootmeansquare)
return rootmeansquare, rootmeansquare_totaal, rootmeansquare_gem
rms_tp_smote15 = (rms(temp_15[0], tp_smote15))
rms_tp_smote06 = (rms(temp_06[0], tp_smote06))
rms_tp_smote10 = (rms(temp_10[0], tp_smote10))
"----------------------------------------------------------------------"
massflows = [np.sum(temp_06[3])/len(temp_06[3]), np.sum(temp_15[3])/
len(temp_15[3]), np.sum(temp_10[3])/len(temp_10[3])]
rms_smote = [rms_tp_smote06[2], rms_tp_smote10[2], rms_tp_smote15[2]]
rms_tp_smote_pre = np.append(rms_tp_smote15[0].tolist(),
rms_tp_smote06[0].tolist())
rms_tp_smote = np.append(rms_tp_smote_pre, rms_tp_smote10[0].tolist())
massflow_pre = np.append(temp_15[3].tolist(), temp_06[3].tolist())
massflow = np.append(massflow_pre, temp_10[3].tolist())
massflow_sort = np.sort(massflow)
rms_tp_smote_sort = [x for _, x in sorted(zip(massflow, rms_tp_smote))]
a,b,r,p, s_a= linregress (massflows,rms_smote)
print('RC: ' ,a ,'\n','std: ', s_a , '\n', 'Offset: ', b)
def func(x, a, b, c):
"Fit functie"
return a * np.asarray(x) + b
popt, pcov = curve_fit(func, massflow_sort, rms_tp_smote_sort)
popt
functie = func(massflow_sort, *popt)
sns.set_theme(style='whitegrid')
fig, axs = plt.subplots(2, figsize=(10, 10))
axs[0].plot(massflows, rms_smote, label='Temp afwijking als f(massflow)')
axs[0].plot ([massflows[0] ,massflows[len (massflows) -1]] ,
[a*massflows [0]+b,a*massflows[len (massflows) -1]+b] ,
label ='trendlijn')
axs[0].set(xlabel='Mass flow ($kg/s$)',
ylabel='Temperatuur afwijking gem ($\u00b0C$)', title='Met Verliezen')
axs[0].legend(loc='upper right')
axs[1].plot(massflow_sort, rms_tp_smote_sort, 'o', label='Temp/Massflow 01-15')
#axs[1].plot(temp_06[3], rms_tp_smote06[0], 'o', label='Temp/Massflow 02-06')
#axs[1].plot(temp_10[3], rms_tp_smote10[0], 'o', label='Temp/Massflow 02-10')
axs[1].plot(massflow, func(massflow_sort, *popt), 'r-',
label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))
axs[1].set(xlabel='Mass flow ($kg/s$)',
ylabel='Temperatuur afwijking gem ($\u00b0C$)')
axs[1].legend(loc='upper right')
print("Gemiddelde verschil temperatuur smote: ", rms_tp_smote15[1])
print("Gemiddelde uitwijking temperatuur smote: ", rms_tp_smote15[2])

Problematic for loop conversion from MATLAB to Python

I am converting some code from MATLAB to Python, and I have encountered an issue I can't resolve. When iterating over the For loop in the section of code, my for loop is spitting out repeated values, that are also incorrect. I believe this has to do with my definition of "x" and "z", but I am not quite Here is my Python script and the matrices D2A1 and D2A2 are giving the repeated blocks of incorrect values.
import sys
import numpy as np
import scipy as sp
import scipy.special as scl
import numpy.matlib as mat
###
#np.set_printoptions(threshold = sys.maxsize)
##
###Constants and Parameters
w = np.array([.09,.089])
a = np.array([0,3])
coup = np.array([w[0],0])/10
dE12 = -2*w[0]
gs = np.array([0,0])
ws = w**2
alpha = a[0]*ws[0]/a[1]/ws[1]
dEp = (dE12+a[0]**2*ws[0]/2+a[1]**2*ws[1]/2)/a[1]/ws[1]
ac = np.array([0,0],dtype = 'float')
ac[0] = alpha*dEp*ws[1]/(ws[0]+alpha**2*ws[1])
ac[1] = dEp - alpha*ac[0]
iS = 0 ## starting state
z0c = gs[1]
x0c = gs[0]
Mx = 128*2
Mz = 128*2
N = 2
dt = 0.05
#Now we need grid lengths L[1x1]
Lx = 10
Lz = 10
LxT = Lx*2
LzT = Lz*2
#x0-z0 = z0[1XM] = Grod of M points from 0 to L
x0 = np.array([np.linspace(-Lx,Lx,Mx)])
z0 = np.array([np.linspace(-Lz,Lz, Mz)])
x0op = np.transpose(np.matlib.repmat(x0,Mz,1))
z0op = np.matlib.repmat(z0,Mx,1)
## For loop over matricies
VDI = np.zeros((2,2),dtype = 'complex')
D2A1 = np.zeros(((2,Mx*Mz)),dtype = 'complex')
D2A2 = D2A1
V1 = D2A1
V2 = V1
VP1 = V1
VP2 = V1
for ig in range(Mz):
for jg in range(Mx):
z = z0[0,ig]
x = x0[0,jg]
###Diabtic Matrix###
VDI[0,0] = (w[1]*z)**2/2+(w[0]*x)**2/2
VDI[1,1] = (w[1]*(z-a[1]))**2/2+(w[0]*(x-a[0]))**2/2+dE12
VDI[0,1] = coup[1]*(z+ac[1])+coup[0]*(x+ac[0])
VDI[1,0] = VDI[0,1]
###Adiabatdization###
[VDt, U] = np.linalg.eigh(VDI)
VDt = np.array(VDt).reshape(2,1)
VDt = np.diagflat(VDt)
UUdVP = np.array([U#sp.linalg.expm(-1.j*dt*VDt)#U.T])
V = U#VDt#U.T
ixz = jg+(ig-1)*Mx
D2A1[:, ixz] = np.conj((U[:,0]))
D2A2[:, ixz] = np.conj((U[:,1]))
print(D2A1)
Below is the MATLAB loop I am trying to recreate.
VDI=zeros(2,2);
D2A1=zeros(2,Mx*Mz); D2A2=D2A1; V1=D2A1; V2=V1; VP1=V1; VP2=V1;
for ig=1:Mz,
for jg=1:Mx,
z = z0(ig); x = x0(jg);
% diabatic matrix
VDI(1,1) = (w(2)*z)^2/2+(w(1)*x)^2/2;
VDI(2,2) = (w(2)*(z-a(2)))^2/2+(w(2)*(x-a(1)))^2/2+dE12;
VDI(1,2) = coup(2)*(z+ac(2))+coup(1)*(x+ac(1)); VDI(2,1)=VDI(1,2);
% adiabatization
[U,VDt]=eig(VDI) ;
[VDt Ind]=sort(diag(VDt)); U=U(:,Ind);
UUdVP=U*diag(exp(-1i*dt*VDt))*U';
V=U*diag(VDt)*U';
ixz = jg + (ig-1)*Mx;
D2A1(:,ixz) = conj(U(:,1)); D2A2(:,ixz) = conj(U(:,2));
end; end;
Any help would be greatly appreciated. Thanks!
Fixed. Error was in the definition of matrices to be generated. From what I gather in Python you must specifically define each array, while in MATLAB you can set matrix equivalences and run them through a for-loop.

Splittig data in python dataframe and getting the array values automatically

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('D:\ history/segment.csv')
data = pd.DataFrame(data)
data = data.sort_values(['Prob_score'], ascending=[False])
one = len(data)
actualpaid_overall = len(data.loc[data['paidstatus'] == 1])
data_split = np.array_split(data, 10)
data1 = data_split[0]
actualpaid_ten = len(data1.loc[data1['paidstatus'] == 1])
percent_ten = actualpaid_ten/actualpaid_overall
data2 = data_split[1]
actualpaid_twenty = len(data2.loc[data2['paidstatus'] == 1])
percent_twenty = (actualpaid_twenty/actualpaid_overall) + percent_ten
data3 = data_split[2]
actualpaid_thirty = len(data3.loc[data3['paidstatus'] == 1])
percent_thirty = (actualpaid_thirty/actualpaid_overall) + percent_twenty
data4 = data_split[3]
actualpaid_forty = len(data4.loc[data4['paidstatus'] == 1])
percent_forty = (actualpaid_forty/actualpaid_overall) + percent_thirty
data5 = data_split[4]
actualpaid_fifty = len(data5.loc[data5['paidstatus'] == 1])
percent_fifty = (actualpaid_fifty/actualpaid_overall) + percent_forty
data6 = data_split[5]
actualpaid_sixty = len(data6.loc[data6['paidstatus'] == 1])
percent_sixty = (actualpaid_sixty/actualpaid_overall) + percent_fifty
data7 = data_split[6]
actualpaid_seventy = len(data7.loc[data7['paidstatus'] == 1])
percent_seventy = (actualpaid_seventy/actualpaid_overall) + percent_sixty
data8 = data_split[7]
actualpaid_eighty = len(data8.loc[data8['paidstatus'] == 1])
percent_eighty = (actualpaid_eighty/actualpaid_overall) + percent_seventy
data9 = data_split[8]
actualpaid_ninenty = len(data9.loc[data9['paidstatus'] == 1])
percent_ninenty = (actualpaid_ninenty/actualpaid_overall) + percent_eighty
data10 = data_split[9]
actualpaid_hundred = len(data10.loc[data10['paidstatus'] == 1])
percent_hundred = (actualpaid_hundred/actualpaid_overall) + percent_ninenty
array_x = [10,20,30,40,50,60,70,80,90,100]
array_y = [ percent_ten, percent_twenty, percent_thirty, percent_forty,percent_fifty, percent_sixty, percent_seventy, percent_eighty, percent_ninenty, percent_hundred]
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
The above is my python code i have splitted my dataframe into 10 equal sections and plotted the graph but I'm not satisfied with this i have two concerns:
array_x = [10,20,30,40,50,60,70,80,90,100] in this line of code i have manually taken the x values, is there any possible way to process automatically as i have taken split(data,10) it should show 10 array values
As we can see the whole data1,2,3,4...10 is being repeated again and again is there a solution to write this in a function or loop.
Any help with codes will be appreciated. Thanks
I believe you need list comprehension and for count is possible use simplier way - sum of boolean mask, True values are processes like 1, then convert list to numpy array and use numpy.cumsum:
data = pd.read_csv('D:\ history/segment.csv')
data = data.sort_values('Prob_score', ascending=False)
one = len(data)
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
Sample:
np.random.seed(2019)
N = 1000
data = pd.DataFrame({'paidstatus':np.random.randint(3, size=N),
'Prob_score':np.random.randint(100, size=N)})
#print (data)
data = data.sort_values(['Prob_score'], ascending=[False])
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
print (array_x)
[ 100 200 300 400 500 600 700 800 900 1000]
print (array_y)
[0.09118541 0.18844985 0.27963526 0.38601824 0.49848024 0.61702128
0.72036474 0.81155015 0.9331307 1. ]

Making linear regression more compact (python)

Im trying to make a linear expression for a dataset. I have plotted the data and plottet the regression, but my code is not very efficient. Is there any way to make it more compact?
import numpy as np
import matplotlib.pyplot as plt
temp1, tid0 = np.genfromtxt("forsok1.txt", dtype=float, skip_header=41, usecols = (1,2)).T
tid1 = tid0 - 200
temp2, tid2 = np.genfromtxt("forsok2.txt", dtype=float, skip_header=1, usecols = (1,2)).T
temp3, tid3 = np.genfromtxt("forsok3.txt", dtype=float, skip_header=1, usecols = (1,2)).T
tempreg1_1 = np.zeros(88)
tidreg1_1 = np.zeros(88)
for i in range(0, 88):
tempreg1_1[i] = temp1[i]
tidreg1_1[i] = tid1[i]
tempreg2_1 = np.zeros(65)
tidreg2_1 = np.zeros(65)
tempreg3_1 = np.zeros(65)
tidreg3_1 = np.zeros(65)
for i in range(0, 65):
tempreg2_1[i] = temp2[i]
tidreg2_1[i] = tid2[i]
tempreg3_1[i] = temp3[i]
tidreg3_1[i] = tid3[i]
tempreg1_2 = np.zeros(59)
tidreg1_2 = np.zeros(59)
for i in range(0, 59):
tempreg1_2[i] = temp1[i+112]
tidreg1_2[i] = tid1[i+112]
tempreg2_2 = np.zeros(76)
tidreg2_2 = np.zeros(76)
for i in range(0, 76):
tempreg2_2[i] = temp2[i+93]
tidreg2_2[i] = tid2[i+93]
tempreg3_2 = np.zeros(55)
tidreg3_2 = np.zeros(55)
for i in range(0,55):
tempreg3_2[i] = temp3[i+100]
tidreg3_2[i] = tid3[i+100]
tempreg1_3 = np.zeros(76)
tidreg1_3 = np.zeros(76)
for i in range(0, 76):
tempreg1_3[i] = temp1[i+210]
tidreg1_3[i] = tid1[i+210]
tempreg2_3 = np.zeros(80)
tidreg2_3 = np.zeros(80)
for i in range(0, 80):
tempreg2_3[i] = temp2[i+207]
tidreg2_3[i] = tid2[i+207]
tempreg3_3 = np.zeros(91)
tidreg3_3 = np.zeros(91)
for i in range(0,91):
tempreg3_3[i] = temp3[i+181]
tidreg3_3[i] = tid3[i+181]
R1_1, b1_1 = np.polyfit(tidreg1_1, tempreg1_1, 1)
R2_1, b2_1 = np.polyfit(tidreg2_1, tempreg2_1, 1)
R3_1, b3_1 = np.polyfit(tidreg3_1, tempreg3_1, 1)
R1_2, b1_2 = np.polyfit(tidreg1_2, tempreg1_2, 1)
R2_2, b2_2 = np.polyfit(tidreg2_2, tempreg2_2, 1)
R3_2, b3_2 = np.polyfit(tidreg3_2, tempreg3_2, 1)
R1_3, b1_3 = np.polyfit(tidreg1_3, tempreg1_3, 1)
R2_3, b2_3 = np.polyfit(tidreg2_3, tempreg2_3, 1)
R3_3, b3_3 = np.polyfit(tidreg3_3, tempreg3_3, 1)
tempreg1_1[0] = b1_1
tempreg2_1[0] = b2_1
tempreg3_1[0] = b3_1
for j in range(1, 88):
tempreg1_1[j] = tempreg1_1[j-1] + 5*R1_1
for j in range(1, 65):
tempreg2_1[j] = tempreg2_1[j-1] + 5*R2_1
tempreg3_1[j] = tempreg3_1[j-1] + 5*R3_1
tempreg1_2[0] = b1_2 + 560*R1_2
tempreg2_2[0] = b2_2 + 465*R2_2
tempreg3_2[0] = b3_2 + 500*R3_2
for j in range(1, 59):
tempreg1_2[j] = tempreg1_2[j-1] + 5*R1_2
for j in range(1, 76):
tempreg2_2[j] = tempreg2_2[j-1] + 5*R2_2
for j in range(1, 55):
tempreg3_2[j] = tempreg3_2[j-1] + 5*R3_2
tempreg1_3[0] = b1_3 + 1050*R1_3
tempreg2_3[0] = b2_3 + 1035*R2_3
tempreg3_3[0] = b3_3 + 905*R3_3
for j in range(1, 76):
tempreg1_3[j] = tempreg1_3[j-1] + 5*R1_3
for j in range(1, 80):
tempreg2_3[j] = tempreg2_3[j-1] + 5*R2_3
for j in range(1, 91):
tempreg3_3[j] = tempreg3_3[j-1] + 5*R3_3
plt.figure()
ax1 = plt.subplot(311)
ax2 = plt.subplot(312)
ax3 = plt.subplot(313)
ax1.plot(tid1, temp1, ':', color="g")
ax1.plot(tidreg1_1, tempreg1_1, '-.',color="b")
ax1.plot(tidreg1_2, tempreg1_2, '-.',color="b")
ax1.plot(tidreg1_3, tempreg1_3, '-.',color="b")
ax2.plot(tid2, temp2, ':', color="g")
ax2.plot(tidreg2_1, tempreg2_1, '-.',color="b")
ax2.plot(tidreg2_2, tempreg2_2, '-.',color="b")
ax2.plot(tidreg2_3, tempreg2_3, '-.',color="b")
ax3.plot(tid3, temp3, ':', color="g")
ax3.plot(tidreg3_1, tempreg3_1, '-.',color="b")
ax3.plot(tidreg3_2, tempreg3_2, '-.',color="b")
ax3.plot(tidreg3_3, tempreg3_3, '-.',color="b")
The code i have used is making arrays from small parts of the dataset, then making a linear regression from those arrays. The regression is then made into another array, whitch is plotted in the subplots. This is done for three different dataplots.
I have tried to make it more compact but havent foud a function to use. Thanks for the help and sorry for bad english.
This:
tempreg1_1 = np.zeros(88)
tidreg1_1 = np.zeros(88)
for i in range(0, 88):
tempreg1_1[i] = temp1[i]
tidreg1_1[i] = tid1[i]
Is the same as this:
tempreg1_1 = temp1[:88]
tidreg1_1 = tid1[:88]
So you may not even need make those arrays, since you can potentially just use the 'slices' directly.
In general, you rarely need to pre-create an empty array then fill it with a loop. If you find yourself doing this in NumPy, there's almost certainly a better way.
You don't have to do all of this explicitly, you can iterate through these almost-all-the-same works. Here's a simplified case, sorry your variables is a bit too much, so I use some easy names:
#read data
plt.figure()
ax1 = plt.subplot(311)
ax2 = plt.subplot(312)
ax3 = plt.subplot(313)
plots = [ax1, ax2, ax3]
for subplot in plots:
#operating tidreg and tempreg here
xCordinate = #should be your tidreg
y1 = tempreg1
y2 = tempreg2
regression1 = np.poly1d(np.polyfit(xCordinate , y1, 1))
regression2 = np.poly1d(np.polyfit(xCordinate , y2, 1))
subplot.plot(xCordinate, regression1(xCordinate), 'b-')
subplot.plot(xCordinate, regression2(xCordinate), 'b-')
plt.show()
Each for loop corresponds to a subplot, you need only operating data that would be used in that subplot. During each loop, the variable is renewed, so you also don't have to create so many variables. theoretically, that could cut down two third of the work and save a lot of memory.
For indexing or slicing arrays, you can refer this question and this numpy manual

Categories

Resources