plotting with subplots in a loop - python

z = {'A': [0.3618426, 0.36146951], 'B': [1.8908799, 1.904695], 'C': [2.1813462e+08, 2.1833622e+08], 'D': [0.89925492, 0.89953589], 'E': [2.6356747, 2.6317911], 'F': [2.2250445e+08, 2.2501808e+08], 'G': [2.0806053e+08, 2.0691238e+08], 'H': [0.37242803, 0.37611806]}
k = [1,2]
for key in z:
plt.subplot(4,4,1)
plt.plot(k,[z[key][0],z[key][1]], 'ro-')
plt.show()
I will try to be clear. z is a dictionary which varies in size. What I would like to do is plot the dictionary quantities say 4 columns but the rows should increase based on how many plots are being generated, for examples if there are 16 keys to plot I should end up with a 4 row 4 column figures.How can I do this?

The basic form of drawing multiple graphs is the following method As a prerequisite, you need to decide on just the number of columns. cols=3
The rest of the looping process is completed by the number of dictionaries.
import matplotlib.pyplot as plt
import pandas as pd
import math
z = {'A': [0.3618426, 0.36146951], 'B': [1.8908799, 1.904695], 'C': [2.1813462e+08, 2.1833622e+08], 'D': [0.89925492, 0.89953589], 'E': [2.6356747, 2.6317911], 'F': [2.2250445e+08, 2.2501808e+08], 'G': [2.0806053e+08, 2.0691238e+08], 'H': [0.37242803, 0.37611806]}
k = [1,2]
cols = 3
rows = math.ceil(len(z) / cols)
fig, axes = plt.subplots(rows, cols, figsize=(16,12))
dict_keys = [k for k in z.keys()]
l = 0
for i in range(rows):
for j in range(cols):
if len(z) == l:
break
else:
key = dict_keys[i+j]
axes[i][j].plot(k, [z[key][0],z[key][1]], 'ro-')
l += 1
plt.show()

#r-beginners' answer is perfect if no duplicated subplots. I made two minor modifications:
import matplotlib.pyplot as plt
import math
z = {'A': [0.3618426, 0.36146951], 'B': [1.8908799, 1.904695], 'C': [2.1813462e+08, 2.1833622e+08], 'D': [0.89925492, 0.89953589],
'E': [2.6356747, 2.6317911], 'F': [2.2250445e+08, 2.2501808e+08], 'G': [2.0806053e+08, 2.0691238e+08], 'H': [0.37242803, 0.37611806]}
k = [1, 2]
cols = 3
rows = math.ceil(len(z) / cols)
fig, axes = plt.subplots(rows, cols, figsize=(16, 12))
dict_keys = [m for m in z.keys()]
l = 0
for i in range(rows):
for j in range(cols):
if len(z) == l:
break
else:
key = dict_keys[l] # modified
axes[i][j].plot(k, [z[key][0], z[key][1]], 'ro-')
l += 1 # modified
plt.show()

Here is a code which may help you:
from math import ceil
# 9 elements in my dict (4x4 + 1)
z = {'A': [0.3618426, 0.36146951], 'B': [1.8908799, 1.904695], 'C': [2.1813462e+08, 2.1833622e+08], 'D': [0.89925492, 0.89953589], 'E': [2.6356747, 2.6317911], 'F': [2.2250445e+08, 2.2501808e+08], 'G': [2.0806053e+08, 2.0691238e+08], 'H': [0.37242803, 0.37611806], 'X': [0.37242803, 0.37611806]}
k = [1,2]
plt.subplots(figsize=(16,8)) # optional
# fixed number of columns
cols = 4
# number of rows, based on cols
rows = ceil(len(z) / cols)
# iterate through indices and keys
for index, key in enumerate(z):
# new subplot with (i + 1)-th index laying on a grid
plt.subplot(rows, cols, index + 1)
# drawing the plot
plt.plot(k, [z[key][0], z[key][1]], 'ro-')
# render everything
plt.show()

Related

Two constraints setting together in optimization problem

I am working on an optimization problem, and facing difficulty setting up two constraints together in Python. Hereunder, I am simplifying my problem by calculation of area and volume. Only length can be changed, other parameters should remain the same.
Constraint 1: Maximum area should be 40000m2
Constraint 2: Minimum volume should be 50000m3
Here, I can set values in dataframe by following both constraints one-by-one, how to modify code so that both constraints (1 & 2) should meet given requirements?
Many Thanks for your time and support!
import pandas as pd
import numpy as np
df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D'],
'Length': [1000, 2000, 3000, 5000],
'Width': [5, 12, 14, 16],
'Depth': [15, 10, 15, 18]})
area = (df['Length'])*(df['Width'])
volume = (df['Length'])*(df['Width'])*(df['Depth'])
print(area)
print(volume)
#Width and Depth are constants, only Length can be change
#Constraint 1: Maximum area should be 40000m2
#Calculation of length parameter by using maximum area, with other given parameters
Constraint_length_a = 40000/ df['Width']
#Constraint 2: Minimum volume should be 50000m3
#Calculation of length parameter by using minimum area, with other given parameters
Constraint_length_v = 50000/ ((df['Width'])*(df['Depth']))
#Setting Length values considering constraint 1
df.at[0, 'Length']=Constraint_length_a[0]
df.at[1, 'Length']=Constraint_length_a[1]
df.at[2, 'Length']=Constraint_length_a[2]
df.at[2, 'Length']=Constraint_length_a[3]
#Setting Length values considering constraint 2
df.at[0, 'Length']=Constraint_length_v[0]
df.at[1, 'Length']=Constraint_length_v[1]
df.at[2, 'Length']=Constraint_length_v[2]
df.at[2, 'Length']=Constraint_length_v[3]
I believed the code below solve the current problem you are facing.
If I can help any further let me know.
import pandas as pd
import numpy as np
df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D'],
'Length': [1000, 2000, 3000, 5000],
'Width': [5, 12, 14, 16],
'Depth': [15, 10, 15, 18]})
area = (df['Length'])*(df['Width'])
volume = (df['Length'])*(df['Width'])*(df['Depth'])
def constraint1(df, col, n):
df.loc[:n,'lenght'] = 40000 / df.loc[:n, col]
df.drop('Length', axis=1, inplace=True)
return df
def constraint2(df, col, col1, n):
df.loc[:n, 'lenght'] = 50000/ ((df.loc[:n,col])*(df.loc[:n,col1]))
df.drop('Length', axis=1, inplace=True)
return df
If you want to performance it through the whole column then
def constraint1a(df, col):
df['lenght'] = 40000 / df[col]
df.drop('Length', axis=1, inplace=True)
return df
def constraint2a(df, col, col1):
df['lenght'] = 50000/ ((df[col])*(df[col1]))
df.drop('Length', axis=1, inplace=True)
return df
df = constraint1(df, 'Width', 3)
df1 = constraint2(df, 'Width','Depth', 3)
df2 = constraint1a(df, 'Width')
df3 = constraint2a(df, 'Width','Depth')
Adding the conditions I left out the first time
def constraint1(df, col, col1):
l = []
for x, w in zip(df[col], df[col1]):
if x > 40000:
l.append(40000 / w)
else:
l.append(x)
df[col] = l
return df
def constraint2(df, col, col1, col2):
l = []
for x, w, d in zip(df[col], df[col1], df[col2]):
if x <= 50000:
l.append(50000 / (w*d))
else:
l.append(x)
df[col] = l
return df
df1 = constraint1(df, 'Length', 'Width')
df2 = constraint2(df, 'Length', 'Width', 'Depth')

Weighted network between cluster centroids - Python

The following plots vectors derived from two sets of data points. I also measure and plot the centroid of these points using k-means clustering.
I'm hoping to measure some form of adjacency matrix to plot the network between each cluster based on the number of vectors, which also accounts for the amount of vectors between each cluster. So displaying the weight.
I was thinking the diagonal values of the adjacency matrix could indicate the number of vectors in the same cluster, while the non-diagonal values could indicate the number of vectors between different clusters, while considering the direction?
I'm hoping to produce an output to the one below. Where the nodes are the centroid of the cluster. The diameter of the node should indicate the number of vectors in the same cluster and the line thickness is the number of vectors between the two clusters.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
fig, ax = plt.subplots(figsize = (6,6))
df = pd.DataFrame(np.random.randint(-80,80,size=(500, 4)), columns=list('ABCD'))
A = df['A']
B = df['B']
C = df['C']
D = df['D']
Y_sklearn = df[['A','B','C','D']].values
ax.quiver(A, B, (C-A), (D-B), angles = 'xy', scale_units = 'xy', scale = 1, alpha = 0.5)
model = KMeans(n_clusters = 20)
model.fit(Y_sklearn)
model.cluster_centers_
cluster_centers = model.cluster_centers_
plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1],
color = 'black', s = 100,
alpha = 0.7, zorder = 2)
plt.scatter(Y_sklearn[:,0], Y_sklearn[:,1], color = 'blue', alpha = 0.2);
plt.scatter(Y_sklearn[:,2], Y_sklearn[:,3], color = 'red', alpha = 0.2);
Edit 2:
If if fix the data to get the intended network below, the following plots a total of 12 vectors. Two groups of 5 are overlapping, while two are unique.
df = pd.DataFrame({
'A' : [5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 5, 4],
'B' : [5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 2],
'C' : [7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 7],
'D' : [7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 7],
})
fig,ax = plt.subplots()
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
A = df['A']
B = df['B']
C = df['C']
D = df['D']
ax.quiver(A, B, (C-A), (D-B), angles = 'xy', scale_units = 'xy', scale = 1, alpha = 0.5)
If I just plot the scatter with cluster centroids, it should look like the following:
Y_sklearn = df[['A','B','C','D']].values
model = KMeans(n_clusters = 4)
model.fit(Y_sklearn)
model.cluster_centers_
cluster_centers = model.cluster_centers_
plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1],
color = 'black', s = 100,
alpha = 0.7, zorder = 2)
plt.scatter(cluster_centers[:, 2], cluster_centers[:, 3],
color = 'black', s = 100,
alpha = 0.7, zorder = 2)
This all works fine. The next step is where I'm having trouble. If I plot the network manually, it should look something like this. The thicker lines display 5 vectors between centroids, while the thinner lines display 1 vector.
The updated code produces the following network. The 5,5 - 7,7 line is correct, but I'm not getting the other lines that should replicate something similar to the network above.
fig,ax = plt.subplots()
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
def kmeans(arr,num_clusters):
model = KMeans(n_clusters = num_clusters)
model.fit(arr)
model.cluster_centers_
cluster_centers = model.cluster_centers_
all_labels = model.labels_
mem_count = Counter(all_labels)
return cluster_centers,all_labels,mem_count
nclusters_1,nclusters_2 = 2,2
points= df[['A','B','C','D']].values
cluster_one = kmeans(points[:,:2],nclusters_1)
cluster_two = kmeans(points[:,2:],nclusters_2)
# find connections between clusters
all_combs = [[n1,n2] for n1 in range(nclusters_1) for n2 in range(nclusters_2)]
num_connections = {}
for item in all_combs:
l1,l2 = cluster_one[1],cluster_two[1]
mask1 = np.where(l1==item[0])[0]
mask2 = np.where(l2==item[1])[0]
num_common = len(list(set(mask1).intersection(mask2)))
num_connections[(item[0],item[1]+nclusters_1)] = num_common
G = nx.Graph()
node_sizes = {}
node_colors = {}
for k,v in num_connections.items():
# the number of points in the two clusters
s1,s2 = cluster_one[2][k[0]],cluster_two[2][k[1]-nclusters_1]
G.add_node(k[0],pos=points[:,:2][k[0]])
G.add_node(k[1],pos=points[:,2:][k[1]])
G.add_edge(k[0],k[1],color='k',weight=v/3)
node_sizes[k[0]] = s1;node_sizes[k[1]] = s2
node_colors[k[0]] = 'k';node_colors[k[1]] = 'k'
edges = G.edges()
d = dict(G.degree)
pos=nx.get_node_attributes(G,'pos')
weights = [G[u][v]['weight'] for u,v in edges]
nx.draw(G,pos,edges=edges,
node_color=[node_colors[v] for v in d.keys()],
nodelist=d.keys(),
width=weights,
node_size=[node_sizes[v]*20 for v in d.keys()])
Before solving the problem, I guess the KMeans clustering should be performed for the first two columns of the df and the other two columns separately. If you apply KMeans clustering directly to the entire df, the connections between two clusters (A,B) and (C,D) will be trivial.
If it is possible to use networkx in your project, here is how you can achieve what you are looking for. First, prepare the data
import numpy as np
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
def kmeans(arr,num_clusters):
model = KMeans(n_clusters = num_clusters)
model.fit(arr)
model.cluster_centers_
cluster_centers = model.cluster_centers_
all_labels = model.labels_
mem_count = Counter(all_labels)
return cluster_centers,all_labels,mem_count
df = pd.DataFrame(np.random.randint(-80,80,size=(200, 4)), columns=list('ABCD'))
# tail
A,B = df['A'],df['B']
# end
C,D = df['C'],df['D']
nclusters_1,nclusters_2 = 20,20
points= df[['A','B','C','D']].values
cluster_one = kmeans(points[:,:2],nclusters_1)
cluster_two = kmeans(points[:,2:],nclusters_2)
# find connections between clusters
all_combs = [[n1,n2] for n1 in range(nclusters_1) for n2 in range(nclusters_2)]
num_connections = {}
for item in all_combs:
l1,l2 = cluster_one[1],cluster_two[1]
mask1 = np.where(l1==item[0])[0]
mask2 = np.where(l2==item[1])[0]
num_common = len(list(set(mask1).intersection(mask2)))
num_connections[(item[0],item[1]+nclusters_1)] = num_common
As you can see in the above code, I performed KMeans clustering to (A,B) and (C,D) separately, and you can use different number of clusters for the two sets of points by using different nclusters_1 and nclusters_2. After the data is prepared, we can now visualize it using networkx
# plot the graph
import networkx as nx
G = nx.Graph()
node_sizes = {}
node_colors = {}
for k,v in num_connections.items():
# the number of points in the two clusters
s1,s2 = cluster_one[2][k[0]],cluster_two[2][k[1]-nclusters_1]
G.add_node(k[0],pos=cluster_one[0][k[0]])
G.add_node(k[1],pos=cluster_two[0][k[1]-nclusters_1])
G.add_edge(k[0],k[1],color='k',weight=v/3)
node_sizes[k[0]] = s1;node_sizes[k[1]] = s2
node_colors[k[0]] = 'navy';node_colors[k[1]] = 'darkviolet'
edges = G.edges()
d = dict(G.degree)
pos=nx.get_node_attributes(G,'pos')
weights = [G[u][v]['weight'] for u,v in edges]
nx.draw(G,pos,edges=edges,
node_color=[node_colors[v] for v in d.keys()],
nodelist=d.keys(),
width=weights,
node_size=[node_sizes[v]*20 for v in d.keys()])
The output figure looks like
In this figure, the actual positions of the points are used for plotting, (A,B) are colored navy and (C,D) are colored violet. If you want to scale up the node size, just use a different number than 20 for this line
node_size=[node_sizes[v]*20 for v in d.keys()]
In order to adjust the width of the edges, use can use a different number other than 3 in this line
G.add_edge(k[0],k[1],color='k',weight=v/3)
UPDATE
In the script above, the keys of num_connections represent two graph nodes, and the corresponding values represent the number of connections. In order to extract adjacency matrix from num_connections, you can try
adj_mat = np.zeros((nclusters_1,nclusters_2))
for k,v in num_connections.items():
entry = (k[0],k[1]-nclusters_1)
adj_mat[entry] = v
UPDATE 2
To resolve the issue in OP's updated post,
add this pos=nx.get_node_attributes(G,'pos') to fix the node positions
change G.add_node(k[0],pos=points[:,:2][k[0]]) and G.add_node(k[1],pos=points[:,2:][k[1]]) to G.add_node(k[0],pos=cluster_one[0][k[0]]) and G.add_node(k[1],pos=cluster_two[0][k[1]-nclusters_1])
with these two modifications,
df = pd.DataFrame({
'A' : [5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 5, 4],
'B' : [5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 5, 2],
'C' : [7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 7],
'D' : [7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 7],
})
fig,ax = plt.subplots()
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
A = df['A']
B = df['B']
C = df['C']
D = df['D']
def kmeans(arr,num_clusters):
model = KMeans(n_clusters = num_clusters)
model.fit(arr)
model.cluster_centers_
cluster_centers = model.cluster_centers_
all_labels = model.labels_
mem_count = Counter(all_labels)
return cluster_centers,all_labels,mem_count
nclusters_1,nclusters_2 = 2,2
points= df[['A','B','C','D']].values
cluster_one = kmeans(points[:,:2],nclusters_1)
cluster_two = kmeans(points[:,2:],nclusters_2)
# find connections between clusters
all_combs = [[n1,n2] for n1 in range(nclusters_1) for n2 in range(nclusters_2)]
num_connections = {}
for item in all_combs:
l1,l2 = cluster_one[1],cluster_two[1]
mask1 = np.where(l1==item[0])[0]
mask2 = np.where(l2==item[1])[0]
num_common = len(list(set(mask1).intersection(mask2)))
num_connections[(item[0],item[1]+nclusters_1)] = num_common
G = nx.Graph()
node_sizes = {}
node_colors = {}
for k,v in num_connections.items():
# the number of points in the two clusters
s1,s2 = cluster_one[2][k[0]],cluster_two[2][k[1]-nclusters_1]
G.add_node(k[0],pos=cluster_one[0][k[0]])
G.add_node(k[1],pos=cluster_two[0][k[1]-nclusters_1])
G.add_edge(k[0],k[1],color='k',weight=v/3)
node_sizes[k[0]] = s1;node_sizes[k[1]] = s2
node_colors[k[0]] = 'k';node_colors[k[1]] = 'k'
edges = G.edges()
d = dict(G.degree)
pos=nx.get_node_attributes(G,'pos')
weights = [G[u][v]['weight'] for u,v in edges]
nx.draw(G,pos,edges=edges,
node_color=[node_colors[v] for v in d.keys()],
nodelist=d.keys(),
width=weights,
node_size=[node_sizes[v]*20 for v in d.keys()])

Verbose output for networkx pagerank

Suppose i create the following directed Graph using networkx and perform the pagerank algorithm on it
adj_lists={
'A': 'B C'.split(' '),
'B': 'C',
'C': 'A',
'D': 'C'
}
G=nx.DiGraph()
for k in adj_lists.keys():
G.add_node(k)
for k in adj_lists.keys():
G.add_edges_from([(k, t) for t in adj_lists[k]])
nx.pagerank(G, alpha=1)
Is ist possible to get a verbose output telling me the devolopment of each node's value or even to generate a list which shows their progress? I am thinking about something like this:
[
{'A:0.25, 'B':0.25, 'C':0.25, 'D':0.25},
{'A:0.25, 'B':0.125, 'C':0.625, 'D':0},
{'A:0.625, 'B':0.3125, 'C':0.4375, 'D':0},
...
]
I've made a direct modification of networkx.pagerank algorithm to store the values of each iteration in a list.
import networkx as nx
from networkx.utils import not_implemented_for
def verbose_pagerank(
G,
alpha=0.85,
personalization=None,
max_iter=100,
tol=1.0e-6,
nstart=None,
weight="weight",
dangling=None,
):
if len(G) == 0:
return {}
if not G.is_directed():
D = G.to_directed()
else:
D = G
# Create a copy in (right) stochastic form
W = nx.stochastic_graph(D, weight=weight)
N = W.number_of_nodes()
# Choose fixed starting vector if not given
if nstart is None:
x = dict.fromkeys(W, 1.0 / N)
else:
# Normalized nstart vector
s = float(sum(nstart.values()))
x = {k: v / s for k, v in nstart.items()}
if personalization is None:
# Assign uniform personalization vector if not given
p = dict.fromkeys(W, 1.0 / N)
else:
s = float(sum(personalization.values()))
p = {k: v / s for k, v in personalization.items()}
if dangling is None:
# Use personalization vector if dangling vector not specified
dangling_weights = p
else:
s = float(sum(dangling.values()))
dangling_weights = {k: v / s for k, v in dangling.items()}
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
# power iteration: make up to max_iter iterations
iterprogress = []
for i in range(max_iter):
xlast = x
iterprogress.append(x)
x = dict.fromkeys(xlast.keys(), 0)
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
for n in x:
# this matrix multiply looks odd because it is
# doing a left multiply x^T=xlast^T*W
for nbr in W[n]:
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)
# check convergence, l1 norm
err = sum([abs(x[n] - xlast[n]) for n in x])
if err < N * tol:
iterprogress.append(x)
return iterprogress
raise nx.PowerIterationFailedConvergence(max_iter)
Then use the function verbose_pagerank the same as you did with nx.pagerank
adj_lists={
'A': 'B C'.split(' '),
'B': 'C',
'C': 'A',
'D': 'C'
}
G=nx.DiGraph()
for k in adj_lists.keys():
G.add_node(k)
for k in adj_lists.keys():
G.add_edges_from([(k, t) for t in adj_lists[k]])
pr = verbose_pagerank(G, alpha=1)
for i in pr:
print(i)
Output:
{'A': 0.25, 'B': 0.25, 'C': 0.25, 'D': 0.25}
{'A': 0.25, 'B': 0.125, 'C': 0.625, 'D': 0.0}
{'A': 0.625, 'B': 0.125, 'C': 0.25, 'D': 0.0}
...
{'A': 0.40000057220458984, 'B': 0.20000028610229492, 'C': 0.39999914169311523, 'D': 0.0}

How can make a dataset of elements of matrices in dataframe?

I have dataset of 3 parameters 'A','B','C' in .TXT file and after I print them in 24x20 matrices I need to collect the 1st elements of 'A','B','C' put in long arrays in panda dataframe and then 2nd elements of each then 3rd and so on till 480th elements.
So my data is like this in text file:
my data is txt file is following:
id_set: 000
A: -2.46882615679
B: -2.26408246559
C: -325.004619528
I already made a panda dataframe includes 3 columns of 'A','B','C' and index and defined functions to print 24x20 matric in right way. Simple example via 2x2 matrices:
1st cycle: A = [1,2, B = [4,5, C = [8,9,
3,4] 6,7] 10,11]
2nd cycle: A = [0,8, B = [1,9, C = [10,1,
2,5] 4,8] 2,7]
Reshape to this form:
A(1,1),B(1,1),C(1,1),A(1,2),B(1,2),C(1,2),.....
Result= [1,4,8,2,5,9,3,6,10,4,7,11] #1st cycle
[0,1,10,8,9,1,2,4,2,5,8,7] #2nd cycle
My scripts are following:
import numpy as np
import pandas as pd
import os
def normalize(value, min_value, max_value, min_norm, max_norm):
new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
return new_value
dft = pd.read_csv('D:\mc25.TXT', header=None)
id_set = dft[dft.index % 4 == 0].astype('int').values
A = dft[dft.index % 4 == 1].values
B = dft[dft.index % 4 == 2].values
C = dft[dft.index % 4 == 3].values
data = {'A': A[:,0], 'B': B[:,0], 'C': C[:,0]}
df = pd.DataFrame(data, columns=['A','B','C'], index = id_set[:,0])
#next iteration create all plots, change the number of cycles
cycles = int(len(df)/480)
print(cycles)
for cycle in range(0,10):
count = '{:04}'.format(cycle)
j = cycle * 480
for i in df:
try:
os.mkdir(i)
except:
pass
min_val = df[i].min()
min_nor = -1
max_val = df[i].max()
max_nor = 1
ordered_data = mkdf(df.iloc[j:j+480][i])
csv = print_df(ordered_data)
#Print .csv files contains matrix of each parameters by name of cycles respectively
csv.to_csv(f'{i}/{i}{count}.csv', header=None, index=None)
if 'C' in i:
min_nor = -40
max_nor = 150
#Applying normalization for C between [-40,+150]
new_value3 = normalize(df['C'].iloc[j:j+480], min_val, max_val, -40, 150)
df3 = print_df(mkdf(new_value3))
df3.to_csv(f'{i}/norm{i}{count}.csv', header=None, index=None)
else:
#Applying normalization for A,B between [-1,+1]
new_value1 = normalize(df['A'].iloc[j:j+480], min_val, max_val, -1, 1)
new_value2 = normalize(df['B'].iloc[j:j+480], min_val, max_val, -1, 1)
df1 = print_df(mkdf(new_value1))
df2 = print_df(mkdf(new_value2))
df1.to_csv(f'{i}/norm{i}{count}.csv', header=None, index=None)
df2.to_csv(f'{i}/norm{i}{count}.csv', header=None, index=None)
Note2: I provided a dataset in text file for 3 cycles:
Text dataset
I am not sure if I understood your question fully but this is a solution:
Convert your data frame to a 2d numpy array using as_matrix() then use ravel() to get a vector of size 480 * 3 then cycle over your cycles and use vstack method for stacking rows over each other in your result, this is a code with your example data:
A = [[1,2,3,4], [10,20,30,40]]
B = [[4,5,6,7], [40,50,60,70]]
C = [[8,9,10,11], [80,90,100,110]]
cycles = 2
for cycle in range(cycles):
data = {'A': A[cycle], 'B': B[cycle], 'C': C[cycle]}
df = pd.DataFrame(data)
D = df.as_matrix().ravel()
if cycle == 0:
Results = np.array(D)
else:
Results = np.vstack((Results, D2))
# Output: Results= array([[ 1, 4, 8, 2, 5, 9, 3, 6, 10, 4, 7, 11], [ 10, 40, 80, 20, 50, 90, 30, 60, 100, 40, 70, 110]], dtype=int64)
np.savetxt("Results.csv", Results, delimiter=",")
Is this what you wanted?

Plotting a dictionary with multiple values per key

I have a dictionary that looks like this:
1: ['4026', '4024', '1940', '2912', '2916], 2: ['3139', '2464'], 3:['212']...
For a few hundred keys, I'd like to plot them with the key as y for its set of x values, I tried this bit of code which gives the error underneath:
for rank, structs in b.iteritems():
y = b.keys()
x = b.values()
ax.plot(x, y, 'ro')
plt.show()
ValueError: setting an array element with a sequence
I'm at a bit of a loss on how to proceed so any help would be greatly appreciated!
You need to construct your list of Xs and Ys manually:
In [258]: b={1: ['4026', '4024', '1940', '2912', '2916'], 2: ['3139', '2464'], 3:['212']}
In [259]: xs, ys=zip(*((int(x), k) for k in b for x in b[k]))
In [260]: xs, ys
Out[260]: ((4026, 4024, 1940, 2912, 2916, 3139, 2464, 212), (1, 1, 1, 1, 1, 2, 2, 3))
In [261]: plt.plot(xs, ys, 'ro')
...: plt.show()
resulting:
1) Repeat your x values
plot expects a list of x values and a list of y values which have to have the same length. That's why you have to repeat the rank value several times. itertools.repeat() can do that for you.
2) change your iterator
iteritems() already returns a tuple (key,value). You don't have to use keys() and items().
Here's the code:
import itertools
for rank, structs in b.iteritems():
x = list(itertools.repeat(rank, len(structs)))
plt.plot(x,structs,'ro')
3) combine the plots
Using your code, you'd produce one plot per item in the dictionary. I guess you rather want to plot them within a single graph. If so, change your code accrodingly:
import itertools
x = []
y = []
for rank, structs in b.iteritems():
x.extend(list(itertools.repeat(rank, len(structs))))
y.extend(structs)
plt.plot(x,y,'ro')
4) example
Here's an example using your data:
import itertools
import matplotlib.pyplot as plt
d = {1: ['4026', '4024', '1940', '2912', '2916'], 2: ['3139', '2464'], 3:['212']}
x= []
y= []
for k, v in d.iteritems():
x.extend(list(itertools.repeat(k, len(v))))
y.extend(v)
plt.xlim(0,5)
plt.plot(x,y,'ro')
This is because you mismatched your data entries.
Currently you have
1: ['4026', '4024', '1940', '2912', '2916']
2: ['3139', '2464'],
...
hence
x = [1,2,...]
y = [['4026', '4024', '1940', '2912', '2916'],['3139', '2464'],...
when you really need
x = [1, 1, 1, 1, 1, 2, 2, ...]
y = ['4026', '4024', '1940', '2912', '2916', '3139', '2464',...]
Try
for rank, structs in b.iteritems():
# This matches each value for a given key with the appropriate # of copies of the
# value and flattens the list
# Produces x = [1, 1, 1, 1, 1, 2, 2, ...]
x = [key for (key,values) in b.items() for _ in xrange(len(values))]
# Flatten keys list
# Produces y = ['4026', '4024', '1940', '2912', '2916, '3139', '2464',...]
y = [val for subl in b.values() for val in subl]
ax.plot(x, y, 'ro')
plt.show()

Categories

Resources