class Clinic:
def __init__(self, medicationList):
self._medicationList = {}
def addMedication(self, medication):
self._medicationList.append(medication)
def main():
m1 = ('CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3)
m2 = ('DM01', 'Dex-2 trimethorphan-0', 0.25, 15.0, 2)
m3 = ('LH03', 'Lyso-X Hydrochloride', 1.00, 10.0, 1)
cl = Clinic()
cl.addMedication('CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3)
print(c1)
main()
I am trying to append the medicine into the _medicationList. How do I go about doing it? So the dictionary will be something like
{m1 : ['CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3 ] , m2 : ['DM01', 'Dex-2 trimethorphan-0', 0.25, 15.0, 2]}`
It looks like you want to be using a list instead of a dictionary. Maybe something like this?
from typing import List, Tuple
Medication = Tuple[str, str, float, float, int]
class Clinic:
def __init__(self):
self._medications: List[Medication] = []
def add_medication(self, medication: Medication) -> None:
self._medications.append(medication)
def __str__(self) -> str:
return str(self._medications)
def main() -> None:
m1 = ('CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3)
m2 = ('DM01', 'Dex-2 trimethorphan-0', 0.25, 15.0, 2)
m3 = ('LH03', 'Lyso-X Hydrochloride', 1.00, 10.0, 1)
cl = Clinic()
for m in m1, m2, m3:
cl.add_medication(m)
print(cl)
if __name__ == '__main__':
main()
If you want it to be a dict instead of a list, maybe something more like this?
from typing import Dict, Tuple
Medication = Tuple[str, str, float, float, int]
class Clinic:
def __init__(self):
self._medications: Dict[str, Medication] = {}
def add_medication(self, name: str, medication: Medication) -> None:
self._medications[name] = medication
def __str__(self) -> str:
return str(self._medications)
def main() -> None:
m1 = ('CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3)
m2 = ('DM01', 'Dex-2 trimethorphan-0', 0.25, 15.0, 2)
m3 = ('LH03', 'Lyso-X Hydrochloride', 1.00, 10.0, 1)
cl = Clinic()
cl.add_medication("m1", m1)
cl.add_medication("m2", m2)
cl.add_medication("m3", m3)
print(cl)
if __name__ == '__main__':
main()
class Clinic:
def __init__(self):
self._medication_list = {}
def add_medication(self, medication):
self._medication_list[medication[0]] = medication[1:]
def get_medication_list(self):
return self._medication_list
def main():
m1 = ('m1','CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3)
m2 = ('m2','DM01', 'Dex-2 trimethorphan-0', 0.25, 15.0, 2)
m3 = ('m3','LH03', 'Lyso-X Hydrochloride', 1.00, 10.0, 1)
lst = [m1,m2,m3]
cl = Clinic()
for m in lst:
cl.add_medication(m)
print(cl.get_medication_list())
main()
is this what you wanted?
1.Your self._medicationList is not a list first (I think you show assign to an empty dictionary instead)
Your addMedication() function takes only one parameter, you've provided 6, I guess you should use '*'
Here is the code that works, try running it:
class Clinic:
def __init__(self):
self.medicationList = dict()
def addMedication(self, name, *medication):
self.medicationList.update({name: list(medication)})
def main():
m1 = ('CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3)
m2 = ('DM01', 'Dex-2 trimethorphan-0', 0.25, 15.0, 2)
m3 = ('LH03', 'Lyso-X Hydrochloride', 1.00, 10.0, 1)
cl = Clinic()
cl.addMedication('CP12', 'Chloro-6 pheniramine-X', 0.08, 4.0, 3)
print(cl.medicationList)
main()
Related
I am trying to make an app using streamlit. Inside the script there is a preprocessing of MinMaxScaler using scikitlearn. But, after the transformation it return all the values with zero. Whats wrong with my code?
Here is some of the script :
contract = ['Proyek dibawah 100M','Proyek 100M-150M','Proyek 150M-500M','Proyek diatas 500M']
project_contract = st.selectbox("Select your project contract", contract)
input_spec = pd.DataFrame(columns=['FC','SL','FA'], data=[[FC, SL, FA]])
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'])
if area == 'Jakarta':
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'], data=[[1,0,0,0,0,0]])
elif area == 'Jawa':
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'], data=[[0,1,0,0,0,0]])
elif area == 'Kalimantan':
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'], data=[[0,0,1,0,0,0]])
elif area == 'Papua':
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'], data=[[0,0,0,1,0,0]])
elif area == 'Sulawesi':
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'], data=[[0,0,0,0,1,0]])
elif area == 'Sumatera':
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'], data=[[0,0,0,0,0,1]])
elif area == 'Bali & Nusa Tenggara':
input_area = pd.DataFrame(columns=['area_JAKARTA', 'area_JAWA', 'area_KALIMANTAN',
'area_PAPUA', 'area_SULAWESI', 'area_SUMATERA'], data=[[0,0,0,0,0,0]])
if project_contract == 'Proyek dibawah 100M':
input_project = pd.DataFrame(columns=['project_contract_150M-500M', 'project_contract_above 500M',
'project_contract_below 100M'], data =[[0,0,1]])
elif project_contract == 'Proyek 150M-500M':
input_project = pd.DataFrame(columns=['project_contract_150M-500M', 'project_contract_above 500M',
'project_contract_below 100M'], data =[[1,0,0]])
elif project_contract == 'Proyek diatas 500M':
input_project = pd.DataFrame(columns=['project_contract_150M-500M', 'project_contract_above 500M',
'project_contract_below 100M'], data =[[0,1,0]])
elif project_contract == 'Proyek 100M-150M':
input_project = pd.DataFrame(columns=['project_contract_150M-500M', 'project_contract_above 500M',
'project_contract_below 100M'], data =[[0,0,0]])
for i in input_area.columns:
input_area[i] = input_area[i].astype('float')
for j in input_project.columns:
input_project[j] = input_project[j].astype('float')
input_submit = pd.concat([input_spec, input_area, input_project], axis=1)
st.dataframe(input_submit)
scaler = MinMaxScaler()
input_submit_scaled = pd.DataFrame(scaler.fit_transform(input_submit.values), columns=input_submit.columns)
st.dataframe(input_submit_scaled)
The input_submit dataframe
The input_submit_scaled dataframe
Your issue has nothing to do with streamlit but on scaler. When you instantiate the MinMaxScaler() with:
scaler = MinMaxScaler()
Use this scaler to fit the training data. When you have a test sample, use again this scaler to transform it. But do not fit.
Here is a demo.
Code
def demo():
train_data = [[5.0, 16.0, 7.0, 4.0, 1.0, 0.0, 0.0, 0.0],
[8.0, 7.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0],
[5.0, 9.0, 9.0, 0.0, 1.0, 0.0, 1.0, 1.0]]
test_data = [[25.0, 12.0, 15.0, 0.0, 1.0, 0.0, 0.0, 0.0]] # input_submit.values
# Scale, fit and transform the train data.
min_max_scaler = MinMaxScaler()
train_data_minmax = min_max_scaler.fit_transform(train_data)
# Save the scaler to disk. import pickle
# scaler_fn = 'scaler_project.pkl'
# with open(scaler_fn, 'wb') as handle:
# pickle.dump(min_max_scaler, handle)
# Scale new data using scaler from distk.
# with open(scaler_fn, 'rb') as handle:
# loaded_scaler = pickle.load(handle)
# test_data_loaded_minmax = loaded_scaler.transform(test_data)
# Scale the single test or input data.
test_data_minmax = min_max_scaler.transform(test_data)
print(f'train_data:\n{train_data}')
print(f'train_data_minmax:\n{train_data_minmax}\n')
print(f'test_data:\n{test_data}')
print(f'test_data_minmax:\n{test_data_minmax}')
demo()
Output
train_data:
[[5.0, 16.0, 7.0, 4.0, 1.0, 0.0, 0.0, 0.0], [8.0, 7.0, 8.0, 1.0, 1.0, 0.0, 0.0, 0.0], [5.0, 9.0, 9.0, 0.0, 1.0, 0.0, 1.0, 1.0]]
train_data_minmax:
[[0. 1. 0. 1. 0. 0.
0. 0. ]
[1. 0. 0.5 0.25 0. 0.
0. 0. ]
[0. 0.22222222 1. 0. 0. 0.
1. 1. ]]
test_data:
[[25.0, 12.0, 15.0, 0.0, 1.0, 0.0, 0.0, 0.0]]
test_data_minmax:
[[6.66666667 0.55555556 4. 0. 0. 0.
0. 0. ]]
Not all scaled input data values are zero.
Reference
https://scikit-learn.org/stable/modules/preprocessing.html#scaling-features-to-a-range
I try to solve the bellow convex problem:
In my opinion, the objective is convex.
my code is:
import cvxpy as cp
import numpy as np
ppMax = 1
Tmax = np.array([1.19, 1.99, 4.16, 1.98, 2.53])
d = np.array([2648000, 5552000, 4744000, 4056000, 6168000])
p = np.array([0.19952623149688797, 0.00018021843172751523, 0.0020210434604112652, 0.001602417432034276, 0.003647501823979989])
r = np.array([8574212.020483451, 6619470.077787987, 7521159.373986546, 7135440.631765847, 6832684.423897811])
c = np.array([430000000.0, 700000000.0, 400000000.0, 220000000.0, 170000000.0])
fc = np.array([40000000000, 40000000000, 40000000000, 40000000000, 40000000000])
ff = np.array([4000000000, 4000000000, 4000000000, 4000000000, 4000000000])
W = np.array([0.7, 0.2, 0.3, 0.7, 0.5])
wt = np.array([0.609, 0.04000000000000001, 0.255, 0.308, 0.43])
we = np.array([4.336742687028045, 10.647756980938421, 8.263103073749088, 7.675258157093112, 6.322105707432189])
pp = cp.Variable(len(Tmax))
cons = [cp.sum(pp) <= ppMax, d/r+c/fc+d/(W*cp.log(1+pp)) <= Tmax]
object = cp.Minimize(wt*(2*d/r+c/ff+c/fc+d/(W*cp.log(1+pp)))+we*(2*p*(d/r)+pp*(d/(W*cp.log(1+pp)))))
prob = cp.Problem(object, cons)
prob.solve()
print(prob.value, pp.value)
I've been trying to figure out for a while now how to connect 2 shader nodes for the material I'm making in a blender, Been googling all over but I can't seem to wrap my head around how to connect them; The last 2 lines of code below are my best attempts. Hopefully, someone can see through this object madness as I cannot.
class WM_OT_textOpBasic(bpy.types.Operator):
"""Creates the Base Planet"""
bl_idname = "wm.textopbasic"
bl_label = " Text Tool Operator"
def execute(self, context):
bpy.ops.mesh.primitive_ico_sphere_add(subdivisions=6, radius=1.0, calc_uvs=True, enter_editmode=False, align='WORLD', location=(0.0, 0.0, 0.0), rotation=(0.0, 0.0, 0.0), scale=(1.0, 1.0, 1.0))
planet = bpy.context.selected_objects[0]
planet.name = "Planet"
planet_material = bpy.data.materials.get("planet material")
if planet_material is None:
# create material
planet_material = bpy.data.materials.new(name="planet material")
planet.data.materials.append(planet_material)
planet_material.use_nodes = True
nodes = planet_material.node_tree.nodes
ColorRamp1 = nodes.new('ShaderNodeValToRGB')
ColorRamp1.location = -400,100
ColorRamp2 = nodes.new('ShaderNodeValToRGB')
ColorRamp2.location = -700,100
ColorRamp3 = nodes.new('ShaderNodeValToRGB')
ColorRamp3.location = -1000,100
Noise1 = nodes.new('ShaderNodeTexNoise')
Noise1.location = -1100,300
Noise2 = nodes.new('ShaderNodeTexNoise')
Noise2.location = -900,300
Bump = nodes.new('ShaderNodeBump')
Bump.location = -150,-150
planet.active_material.node_tree.links.new(Noise1.outputs[0],Noise2.inputs[1])
planet_material.node_tree.links(Noise1.outputs[0],Noise2.inputs[1])
```
Your question doesn't specify the exact configuration of the nodes but this code shows a bit of how it works
import bpy
bpy.ops.mesh.primitive_ico_sphere_add(subdivisions=6, radius=1.0, calc_uvs=True, enter_editmode=False, align='WORLD', location=(0.0, 0.0, 0.0), rotation=(0.0, 0.0, 0.0), scale=(1.0, 1.0, 1.0))
planet = bpy.context.selected_objects[0]
planet.name = "Planet"
planet_material = bpy.data.materials.get("planet material")
if planet_material is None:
# create material
planet_material = bpy.data.materials.new(name="planet material")
planet.data.materials.append(planet_material)
planet_material.use_nodes = True
if planet_material.node_tree:
planet_material.node_tree.links.clear()
planet_material.node_tree.nodes.clear()
nodes = planet_material.node_tree.nodes
links = planet_material.node_tree.links
ColorRamp1 = nodes.new('ShaderNodeValToRGB')
ColorRamp1.location = -400,100
Noise1 = nodes.new('ShaderNodeTexNoise')
Noise1.location = -1100,300
output = nodes.new(type='ShaderNodeOutputMaterial') # you need an output node to display
links.new(Noise1.outputs[0], ColorRamp1.inputs[0]) # Noise1.output[0] just takes the start node of noise one at spot 0, ColorRamp1.inputs[0] is the input spot for the noise again at spot 0
links.new(Noise1.outputs[0], output.inputs[1])
links.new(ColorRamp1.outputs[0], output.inputs[1])
planet.data.materials.append(planet_material)
I have below code that converts an implied volatility into piecewise constant volatility. In the below code I get an error:
for j, _vol in enumerate(_boot_vol,2):
TypeError: 'numpy.float64' object is not iterable
But neither _vol or _boot_vol is a numpy array. Need your wisdom to resolve this please
Code:
termstruct = np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]);
forwardcurve = np.array([0.0112, 0.0118, 0.0123, 0.0127, 0.0132, 0.0137, 0.0145,0.0154, 0.0163, 0.0174]);
capletvols = np.array([0.2366, 0.2487, 0.2573, 0.2564, 0.2476, 0.2376, 0.2252,0.2246, 0.2223]);
num_times = len(termstruct);
tau= np.diff(termstruct);
class computevol:
def _caliberatevol():
global termstruct
global forwardcurve
global tau
global capletvols
_vols = np.zeros((len(forwardcurve),len(termstruct)))
_boot_vol = []
for i , _capvol in enumerate(capletvols,2):
_boot_vol = _capvol**2 * termstruct[i-1]
for j, _vol in enumerate(_boot_vol,2):
_boot_vol -= _vol**2*tau[j-1]
_boot_vol.append(_boot_vol,np.sqrt(_boot_vol/tau(0)))
_vols[1:,1] = _boot_vol
for i in range(2,len(termstruct)):
_vols[i:,i] = _boot_vol[:-i+1]
return _vols
Needed to use a temporary variable in between
class computevol:
def _caliberatevol():
global termstruct
global forwardcurve
global tau
global capletvols
_vols = np.zeros((len(forwardcurve),len(termstruct)))
_boot_vol = []
for i , _capvol in enumerate(capletvols,2):
_temp= _capvol**2 * termstruct[i-1]
for j, _vol in enumerate(_boot_vol,2):
_temp -= _vol**2*tau[j-1]
_boot_vol.append(np.sqrt(_temp/tau[0]))
_vols[1:,1] = _boot_vol
for i in range(2,len(termstruct)):
_vols[i:,i] = _boot_vol[:-i+1]
return _vols
I am trying to implement the code on this website to estimate what value of K I should use for my K means clustering.
https://datasciencelab.wordpress.com/2014/01/21/selection-of-k-in-k-means-clustering-reloaded/
However I am not getting any success - in particular I am trying to get the f(k) vs the number of clusters k graph which I can use to procure the ideal value of k to use.
My data format is as follows:
Each of the coordinates have 5 dimensions/variables i.e. they are data points that live in a five-dimensional space.
The list of the coordinates are below, where for example the first data point has coordinates ( 35.38361202590826,-24.022420305129415, 0.9608968122051765, -11.700331772145386, -9.4393980963685).
Variable1 = [35.38361202590826, 3.0, 10.0, 10.04987562112089, 5.385164807134505, 24.35159132377184, 10.77032961426901, 10.816653826391967, 18.384776310850235, 14.317821063276353, 24.18677324489565, 3.0, 24.33105012119288, 8.94427190999916, 2.82842712474619, 4.123105625617661, 4.47213595499958, 13.453624047073712, 12.529964086141668, 19.4164878389476, 5.385164807134505, 5.0, 24.041630560342618, 30.083217912982647, 15.132745950421555, 1.414213562373095, 21.470910553583888, 12.649110640673516, 9.0, 9.055385138137416, 16.124515496597102, 18.027756377319946, 7.615773105863908, 4.47213595499958, 5.0, 16.124515496597102, 8.246211251235321, 3.0, 23.02172886644268, 2.23606797749979, 10.0, 13.416407864998737, 14.7648230602334, 12.649110640673516, 2.82842712474619, 9.899494936611665, 12.806248474865697, 13.0, 10.19803902718557, 10.440306508910549]
Variable2 = [-24.022420305129415, -40.0, -21.0, -36.020346285601605, -14.298541039632994, -10.225204451297113, -7.242118188905023, -10.816653826391967, -16.263455967290593, -0.9079593845004517, -5.70559779110359, -1.0, -17.426292654367874, -0.4472135954999579, -12.727922061357855, -38.32062875574061, -15.205262246998569, -13.89960053482201, -6.943355894868313, -18.43793805396085, -14.298541039632994, -8.0, -9.899494936611665, -10.537436550735357, -9.251460406371256, -1.414213562373095, -0.23287321641631115, -4.743416490252569, -10.0, -25.951408627588936, -5.457528321925173, -11.648704120729812, -15.231546211727816, -9.838699100999074, -2.2, 4.713319914389921, -3.395498750508662, -32.0, -16.59301967354925, -4.47213595499958, -3.4, -13.416407864998737, 4.944183868793753, -3.478505426185217, -21.213203435596423, -18.384776310850235, -6.871645523098667, -21.0, -5.491251783869154, -8.620436566990362]
Variable3 = [0.9608968122051765, 22.0, 21.0, 18.507691737905798, 15.412713068695306, -8.08982038917884, -0.7427813527082074, -7.211102550927978, -14.849242404917499, -0.4190581774617469, -10.170848236315095, -7.0, 1.150792911137501, -5.366563145999495, -12.727922061357855, 4.85071250072666, 9.838699100999074, -8.473553267217696, 6.065460321953928, -10.249021432229634, 4.642383454426297, -9.0, 9.899494936611665, 4.354587344310195, -8.854969246098202, -8.48528137423857, -10.292996165600954, -11.067971810589327, -30.0, -10.932721081409808, -14.6360986815266, -22.188007849009164, 0.0, -7.155417527999327, -5.4, -12.279438724331637, 19.40285000290664, -7.0, 18.938629784469825, 8.94427190999916, 3.8, -8.94427190999916, -43.549455173073746, -8.538149682454623, -11.31370849898476, 1.4142135623730951, -10.619815808425212, 12.0, 7.060180864974626, -7.854175538813441]
Variable4 = [-11.700331772145386, -8.0, -5.0, -2.9851115706299676, -10.398938937914904, -8.459406092237773, -7.242118188905023, -10.539303728279352, -21.920310216782973, -8.03194840135015, -10.791021909261136, -10.0, -9.69954025101608, -2.6832815729997477, -23.33452377915607, -7.761140001162655, -17.44133022449836, -4.980070779856015, -2.7134954071899156, -6.48933015307002, -12.441587657862476, -5.2, -18.384776310850235, -10.603918800266811, -14.604091070057484, -4.949747468305833, -1.3506646552146047, -7.905694150420948, -14.0, -29.706080514133717, -2.4806946917841692, -23.574758339572238, -3.2826608214930637, -5.813776741499453, -13.4, -4.9613893835683385, -11.884245626780316, -19.0, -5.473090258814675, -2.23606797749979, -2.0, -2.6832815729997477, -6.163297699455227, -12.01665510863984, -12.727922061357855, -12.020815280171307, -8.589556903873333, -18.53846153846154, -5.491251783869154, -4.789131426105757]
Variable5 = [-9.4393980963685, -4.0, -2.0, -0.29851115706299675, -9.84185292338375, 6.118696639531204, -6.127946159842712, -2.218800784900916, 10.606601717798213, 0.6984302957695782, 0.7442084075352507, -0.0, 3.452378733412503, 1.3416407864998738, -6.363961030678928, 6.305926250944657, -5.813776741499453, -0.4459764877482998, -0.7980868844676221, 7.673890419106611, -1.4855627054164149, 1.4, -2.8284271247461903, -2.925218979383948, 3.9649116027305387, 0.7071067811865475, 0.4191717895493601, 1.5811388300841895, -4.0, 4.748555621218401, 4.341215710622296, 4.714951667914447, -5.120950881529179, 4.919349550499537, 6.2, 0.6201736729460423, -6.305926250944657, -9.0, -6.168085847235585, 0.0, -1.0, 1.3416407864998738, 3.3186987612451224, 4.427188724235731, 4.242640687119285, 4.949747468305833, 5.9346029517670305, 2.3076923076923075, -3.1378581622109447, 1.436739427831727]
I am able to use scikit-learn to create clusters with these coordinates however I am interested in finding the optimal k value to use - however scikit-learn does not have a feature where I can estimate the optimal value of K with this technique (or any technique as far as I am aware).
You can try the code in the last comment by Monte Shaffer.
Here's a simplified version:
import numpy as np
import random
from numpy import zeros
class KMeansFK():
def __init__(self, K, X):
self.K = K
self.X = X
self.N = len(X)
self.mu = None
self.clusters = None
self.method = None
def _cluster_points(self):
mu = self.mu
clusters = {}
for x in self.X:
bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
for i in enumerate(mu)], key=lambda t:t[1])[0]
try:
clusters[bestmukey].append(x)
except KeyError:
clusters[bestmukey] = [x]
self.clusters = clusters
def _reevaluate_centers(self):
clusters = self.clusters
newmu = []
keys = sorted(self.clusters.keys())
for k in keys:
newmu.append(np.mean(clusters[k], axis = 0))
self.mu = newmu
def _has_converged(self):
K = len(self.oldmu)
return(set([tuple(a) for a in self.mu]) == \
set([tuple(a) for a in self.oldmu])\
and len(set([tuple(a) for a in self.mu])) == K)
def find_centers(self, K, method='random'):
self.method = method
X = self.X
K = self.K
# https://stackoverflow.com/questions/44372231/population-must-be-a-sequence-or-set-for-dicts-use-listd
self.oldmu = random.sample(list(X), K)
if method != '++':
# Initialize to K random centers
self.mu = random.sample(list(X), K)
while not self._has_converged():
self.oldmu = self.mu
# Assign all points in X to clusters
self._cluster_points()
# Reevaluate centers
self._reevaluate_centers()
def _dist_from_centers(self):
cent = self.mu
X = self.X
D2 = np.array([min([np.linalg.norm(x-c)**2 for c in cent]) for x in X])
self.D2 = D2
def _choose_next_center(self):
self.probs = self.D2/self.D2.sum()
self.cumprobs = self.probs.cumsum()
r = random.random()
ind = np.where(self.cumprobs >= r)[0][0]
return(self.X[ind])
def init_centers(self,K):
self.K = K
#self.mu = random.sample(self.X, 1)
self.mu = random.sample(list(self.X), 1)
while len(self.mu) < self.K:
self._dist_from_centers()
self.mu.append(self._choose_next_center())
def get_ak(self,k, Nd):
if k == 2:
return( 1 - 3.0 / (4.0 * Nd ) )
else:
previous_a = self.get_ak(k-1, Nd)
return ( previous_a + (1.0-previous_a)/6.0 )
def fK(self, thisk, Skm1=0):
X = self.X
Nd = len(X[0])
self.find_centers(thisk, method='++')
mu, clusters = self.mu, self.clusters
Sk = sum([np.linalg.norm(mu[i]-c)**2 \
for i in range(thisk) for c in clusters[i]])
if thisk == 1:
fs = 1
elif Skm1 == 0:
fs = 1
else:
fs = Sk/(self.get_ak(thisk,Nd)*Skm1)
return fs, Sk
def run(self, maxk):
ks = range(1,maxk)
fs = zeros(len(ks))
Wks,Wkbs,sks = zeros(len(ks)+1),zeros(len(ks)+1),zeros(len(ks)+1)
# Special case K=1
self.init_centers(1)
fs[0], Sk = self.fK(1)
# Rest of Ks
for k in ks[1:]:
self.init_centers(k)
fs[k-1], Sk = self.fK(k, Skm1=Sk)
self.fs = fs
And then run it on your data:
X = np.array([Variable1, Variable2, Variable3, Variable4, Variable5])
km = kmeans.KMeansFK(2, X)
km.run(5)
Now km.clusters has the result.