ISO Calendar week number - python

I am trying to generate week number for 12 weeks from today like weekly wise and i am seeing last 53 week is missing ..
ISO calendar week 2022-Jan-1 is giving week number 52
from datetime import datetime
current_dateTime = datetime.now()
temp_n=12
def week_num_gen():
num=0
global wknum_gen
wknum_gen=[]
for i in range(0,temp_n):
if i==0 :
# Weeknum7 = date.today() + timedelta(days=2)
Weeknum7=datetime(current_dateTime.year, 1,1)
# if i==0 and current_dateTime.year==2023:
# # Weeknum7 = date.today() + timedelta(days=2)
else:
Weeknum7 = Weeknum7 + timedelta(days=7)
wknum_gen.append(Weeknum7.isocalendar().week)
num=num+7
week_num_gen()
wklist_1=list(wknum_gen)
wklist_1
result is below
[52, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
expected is result is below
[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12]
and if i try to generate it from today for 12 weeks
Weeknum7=datetime(current_dateTime.year, 11,2)
result is below with error
[44, 45, 46, 47, 48, 49, 50, 51, 52, 1, 2, 3]
next 12 weeks
expected result is [ 45, 46, 47, 48, 49, 50, 51, 52, 53,1, 2,3]

Related

How to estimate similarity between sensor data based on the number of occurrence?

Following is my sample data:
data = {850.0: 6, -852.0: 5, 992.0: 29, -993.0: 25, 990.0: 27, -992.0: 28,
965.0: 127, 988.0: 37, -994.0: 24, 996.0: 14, -996.0: 19, -998.0: 19, 995.0: 17, 954.0: 71, -953.0: 64, 983.0: 48, 805.0: 20, 960.0: 97, 811.0: 23, 957.0: 98, 818.0: 9, -805.0: 10, -962.0: 128, 822.0: 5, 970.0: 115, 823.0: 6, 977.0: 86, 815.0: 11, 972.0: 118, -809.0: 3, -982.0: 77, 963.0: 129, 816.0: 15, 969.0: 131, 809.0: 13, -973.0: 115, 967.0: 141, 964.0: 110, 966.0: 141, -801.0: 11, -990.0: 33, 819.0: 8, 973.0: 113, -981.0: 71, 820.0: 16, 821.0: 10, -988.0: 42, 833.0: 7, 958.0: 92, -980.0: 98, 968.0: 138, -808.0: 5, -984.0: 57, 976.0: 108, 828.0: 3, -807.0: 6, 971.0: 134, -814.0: 3, 817.0: 13, -975.0: 112, 814.0: 12, 825.0: 6, 974.0: 90, -974.0: 125, -824.0: 2, -966.0: 131, -822.0: 4, 962.0: 108, -967.0: 121, -810.0: 3, 810.0: 11, 826.0: 7, 953.0: 74, -970.0: 140, -804.0: 6, -813.0: 2, 812.0: 18, 961.0: 126, -965.0: 159, -806.0: 5, 955.0: 74, -958.0: 93, -818.0: 6, 813.0: 18, 824.0: 6, 937.0: 25, -946.0: 51, -802.0: 8, 950.0: 48, -957.0: 91, 808.0: 11, 959.0: 116, -821.0: 3, -959.0: 108, 827.0: 4, -817.0: 4, 944.0: 47, -971.0: 126, -972.0: 104, -977.0: 96, 956.0: 92, 807.0: 10, 806.0: 21, 952.0: 60, 948.0: 51, 951.0: 67, 945.0: 47, -986.0: 37, 892.0: 13, 910.0: 23, 876.0: 6, -912.0: 18, 891.0: 8, 911.0: 22, -913.0: 13, 894.0: 7, 895.0: 12, 925.0: 15, 887.0: 6, 915.0: 16, 877.0: 7, 905.0: 14, 889.0: 7, -899.0: 10, 916.0: 17, -907.0: 11, -919.0: 17, 900.0: 20, 898.0: 9, 918.0: 16, 914.0: 18, 906.0: 18, 908.0: 17, -889.0: 7, 903.0: 16, 888.0: 5, -905.0: 9, -911.0: 19, 904.0: 20, -908.0: 12, 840.0: 2, -906.0: 16, 896.0: 11, -910.0: 17, -863.0: 3, 907.0: 27, -904.0: 10, -898.0: 13, 909.0: 19, -916.0: 20, 924.0: 24, 919.0: 20, -887.0: 6, 920.0: 12, 921.0: 12, 922.0: 15, 899.0: 14, -902.0: 9, -917.0: 12, 902.0: 14, 942.0: 46, 931.0: 23, 901.0: 22, -923.0: 14, -927.0: 15, 913.0: 18, -918.0: 16, 929.0: 22, 928.0: 13, -922.0: 7, -921.0: 16, 933.0: 22, 926.0: 13, 917.0: 18, 923.0: 16, 936.0: 24, 803.0: 30, -930.0: 10, 939.0: 33, -939.0: 24, 893.0: 8, 830.0: 5, 897.0: 8, 886.0: 8, -897.0: 4, -903.0: 12, -920.0: 9, -894.0: 3, -934.0: 14, 932.0: 23, -928.0: 16, 943.0: 40, 946.0: 45,
801.0: 17, -944.0: 35, 935.0: 23, 941.0: 30, -926.0: 11, -940.0: 38, 802.0: 16, 940.0: 43, -943.0: 38, -935.0: 24, 804.0: 23, -933.0: 9, -945.0: 36, 949.0: 56, 858.0: 2, -839.0: 3, -964.0: 108, -969.0: 111, -815.0: 2, 881.0: 3, -955.0: 74, -803.0: 3, 947.0: 50, -948.0: 57, -950.0: 58, -961.0: 133, -947.0: 43, -949.0: 54, -936.0: 20, 980.0: 75, -848.0: 3, -941.0: 27, -827.0: 5, -816.0: 7, -942.0: 37, 938.0:
29, -956.0: 81, -951.0: 59, -932.0: 11, -954.0: 71, -952.0: 64,
-811.0: 3, 979.0: 89, -963.0: 128, -892.0: 4, -960.0: 109, 871.0: 4, 978.0: 85, -968.0: 136, 865.0: 1, -856.0: 3, 930.0: 11, 843.0: 5, -844.0: 1, -929.0: 24, -925.0: 19, -931.0: 11, 981.0: 65, 912.0: 19, 927.0: 10, -924.0: 8, -938.0: 25, 989.0: 31, -819.0: 4, 934.0: 16, -976.0: 92, -915.0: 14, 975.0: 92, 869.0: 5, 998.0: 9, 870.0: 1, -826.0: 2, 834.0: 2, 882.0: 5, 839.0: 4, 829.0: 3, 846.0: 2, -978.0: 117, -991.0: 39, -983.0: 59, -989.0: 48, 832.0: 4, 860.0: 5, -937.0:
25, 859.0: 1, 842.0: 5, -857.0: 4, -891.0: 8, 837.0: 4, -868.0: 3,
-884.0: 4, 851.0: 4, 874.0: 8, 852.0: 6, 997.0: 14, -888.0: 3, 866.0: 6, -893.0: 6, -890.0: 6, 982.0: 45, 863.0: 2, 835.0: 3, -834.0: 3,
-979.0: 73, 853.0: 3, 984.0: 44, -985.0: 30, 985.0: 36, 991.0: 25, 986.0: 35, -987.0: 29, 994.0: 24, 993.0: 29, -995.0: 16, -997.0: 17, -880.0: 4, -830.0: 3, 847.0: 1, 884.0: 4, -877.0: 5, -840.0: 1, -846.0: 2, -896.0: 8, -866.0: 2, -851.0: 2, -871.0: 2, -885.0: 3, -832.0: 3, -878.0: 1, 890.0: 6, 987.0: 22, -847.0: 2, 878.0: 5, 879.0: 3, 885.0: 5, 848.0: 2, 841.0: 5, 856.0: 3, 857.0: 4, 864.0: 1, 831.0:
5, 849.0: 3, 844.0: 3, 875.0: 3, 836.0: 3, 999.0: 6, -999.0: 6,
-900.0: 7, 845.0: 2, 862.0: 1, 880.0: 4, 855.0: 2, -876.0: 1, -882.0: 2, -835.0: 2, -831.0: 5, -812.0: 1, -825.0: 2, -860.0: 3, -914.0: 12,
-855.0: 5, -870.0: 5, -881.0: 4, -823.0: 3, -901.0: 5, -909.0: 15, -886.0: 2, 873.0: 3, -879.0: 1, -869.0: 4, -883.0: 4, -895.0: 8, 868.0: 3, -836.0: 2, 883.0: 4, -861.0: 2, -859.0: 2, -837.0: 1, -864.0: 2, -829.0: 2, -875.0: 4, -858.0: 2, -843.0: 1, -862.0: 1, -872.0: 2, 854.0: 2, -842.0: 1, -845.0: 3, -833.0: 1, -853.0: 3, 861.0: 3, -820.0: 2, -850.0: 2, -867.0: 2, -854.0: 1, -841.0: 3, 867.0: 1, -865.0: 3, -849.0: 2, 838.0: 1, -838.0: 1, -873.0: 1}
It is the Key/Value of a dictionary in Python. The Keys are the sensors data and the Values are the number of occurrences. I need to find if the two Key/Value match as in the following example:
959.0: 116 and -959.0: 108
Here, the sensor data 959.0 and -959.0 are repeated (occurred) 116 and 108 times, respectively. In my system, I can assume that 959.0 is good data. But it's not always the ideal case. The sensor data can be 958, -955, 952, etc with their respective occurrence number. I need to find the good sensor data from my DB such that each data has similar opposite value and close number of occurrences are present.
My attempts:
At this moment, I'm solving it manually by plotting the data (x being the sensor data and y being the number of occurrence) and filtering it horizontally and vertically. For example:
for key in list(data.keys()): ## Filtering sensor data based on their difference on occurance times
if ((-1*key) in data.keys() and abs(data[key]-data[(-1*key)])<2):
#if (-1*key) in data.keys():
pass
else: del data[key]
#print(data)
for key in list(data.keys()): ##Horizontal filter (based on number of occurance)
if data[key] >20 or abs(key)>1000:
pass
else: del data[key]
lists = sorted(data.items()) # sorted by key, return a list of tuples
x, y = zip(*lists) # unpack a list of pairs into two tuples
plt.plot(x, y,marker="*")
plt.grid()
plt.show()
Is there any better statistical way to solve my problem in Python? Thank you.
If I understand correctly. You want to compare these two time-series data from sensors and do some analysis after that.
But it's not always the ideal case. The sensor data can be 958, -955, 952, etc with their respective occurrence number.
And this sentence shows that there may be statistical errors in the data.
Plotting these time series at first could help you choose a good method.
The negative data is shown in orange and the positive is in blue.
from scipy.signal import savgol_filter
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
data = np.array(list(data_dict.items()), dtype=int)
positive = np.zeros((np.abs(data[:, 0]).max() + 1), dtype=int)
negative = np.zeros_like(positive)
positive[data[data[:, 0] > 0, 0]] = data[data[:, 0] > 0, 1]
negative[-data[data[:, 0] < 0, 0]] = data[data[:, 0] < 0, 1]
sns.lineplot(x=np.arange(len(positive)), y=savgol_filter(positive, 11, 3))
sns.lineplot(x=np.arange(len(positive)), y=savgol_filter(-negative, 11, 3))
plt.show()
And the diff you can see, statistical error depends on the value.
We can try to add a filter like the Gaussian filter, but here I prefer the Savgol filter.
You can use is from scipy.
from scipy.signal import savgol_filter
savgol_filter(negative, 11, 3)
And here is the diff.
You can use the apply() method of a pandas dataframe to calculate the data useful to filter the desired sensor with different degree of precision. Setting axis=1 in this method allows to define a function that operates on each row.
For example you could use an approach similar to the one you are doing by hand:
Fix a threshold for sensors similarity
Fix a threshold for occurrences similarity
Fix a threshold for the number of similar sensor + occurrences that a single data point must have to be considered valid
For example, the first step can be performed as follow:
# The data variable is the one provided in the example
# Prepare Pandas dataframe
data_dict = {"sensor": list(), "occ": list()}
for k,v in data.items():
data_dict["sensor"].append(k)
data_dict["occ"].append(v)
df = pd.DataFrame(data_dict)
# Add support column for filtering
df["ct"] = pd.NaT
# Chose sensor similarity threshold
threshold = 2
# Populate the column
df["ct"] = df.apply(lambda x: get_sensor_count(x, threshold, df), axis=1)
Where the function get_sensor_count() is implemented as follows:
# Get the count of "similar" sensors
def get_sensor_count(row, threshold, df):
# First check if sensor hava similar value, then if they have opposite signs
return df[(abs(abs(df["sensor"]) - abs(row["sensor"])) < threshold) & (df["sensor"] * row["sensor"] < 0)]["sensor"].count()
In this way you can set the threshold for sensor similarity and obtain the count of similar sensor. To filter the sensor that do not have similar opposite values you can do the following:
# If at least one silimar sensor, keep it
df_good_sensors = df[df["ct"] > 0]
After that you can add arbitrary filter on this dataset, such as the one in your example:
# Filter occurrences
df_good_occ = df_good_sensors[(df["occ"] > 20) | (abs(df["sensor"] > 1000))]
Now you can check what are the sensors that measured similar occurrences by setting a new threshold for this part of the data:
# Chose occurrences similarity threshold
o_threshold = 5
df_good_occ["ct"] = pd.NaT
df_good_occ["ct"] = df_good_occ.apply(lambda x: get_occ_count(x, threshold, o_threshold, df_good_occ), axis=1)
Where the get_occ_count() function is implemented as follows:
def get_occ_count(row, s_threshold, o_threshold, df):
# Get similar sensors using the previous sensor threshold
to_check = df[(abs(abs(df["sensor"]) - abs(row["sensor"])) < s_threshold) & (df["sensor"] * row["sensor"] < 0)]
# Count only the occurrences values similar to the current sensors
return to_check[abs(to_check["occ"] - to_check["occ"]) < o_threshold]["sensor"].count()
Now for each sensor you have the number of opposite values that have a similar occurrences number. As a final filter, you can set how many similar data points each final point must have to be considered:
# Chose number of similar sensors to chose how many to keep
count_threshold = 2
df_final = df_good_occ[df_good_occ["ct"] > count_threshold]
# Drop support column
df_final.drop(["ct"], axis=1)
With this approach you have 3 possible parameters to set:
the sensor threshold
the occurrence threshold
the number of similar data points
You can mix these 3 variables and see what gives you the better results. To test this, you can follow a process like the following:
generate the 3 variables
use a dataset in which you already know what data points must be kept
see the % of data points that have been correctly kept

How to generate sequential subsets of integers?

I have the following start and end values:
start = 0
end = 54
I need to generate subsets of 4 sequential integers starting from start until end with a space of 20 between each subset. The result should be this one:
0, 1, 2, 3, 24, 25, 26, 27, 48, 49, 50, 51
In this example, we obtained 3 subsets:
0, 1, 2, 3
24, 25, 26, 27
48, 49, 50, 51
How can I do it using numpy or pandas?
If I do r = [i for i in range(0,54,4)], I get [0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52].
This should get you what you want:
j = 20
k = 4
result = [split for i in range(0,55, j+k) for split in range(i, k+i)]
print (result)
Output:
[0, 1, 2, 3, 24, 25, 26, 27, 48, 49, 50, 51]
Maybe something like this:
r = [j for i in range(0, 54, 24) for j in range(i, i + 4)]
print(r)
[0, 1, 2, 3, 24, 25, 26, 27, 48, 49, 50, 51]
you can use numpy.arange which returns an ndarray object containing evenly spaced values within a given range
import numpy as np
r = np.arange(0, 54, 4)
print(r)
Result
[0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52]
Numpy approach
You can use np.arange to generate number with a step value of 20 + 4, where 20 is for space between each interval and 4 for each sequential sub array.
start = 0
end = 54
out = np.arange(0, 54, 24) # array([ 0, 24, 48]) These are the starting points
# for each subarray
step = np.tile(np.arange(4), (len(out), 1))
# [[0 1 2 3]
# [0 1 2 3]
# [0 1 2 3]]
res = out[:, None] + step
# array([[ 0, 1, 2, 3],
# [24, 25, 26, 27],
# [48, 49, 50, 51]])
This can be done with plane python:
rangeStart = 0
rangeStop = 54
setLen = 4
step = 20
stepTot = step + setLen
a = list( list(i+s for s in range(setLen)) for i in range(rangeStart,rangeStop,stepTot))
In this case you will get the subsets as sublists in the array.
I dont think you need to use numpy or pandas to do what you want. I achieved it with a simple while loop
num = 0
end = 54
sequence = []
while num <= end:
sequence.append(num)
num += 1
if num%4 == 0: //If four numbers have been added
num += 20
//output: [0, 1, 2, 3, 24, 25, 26, 27, 48, 49, 50, 51]

how to iterate over hours in timestamp in python

I'm trying to iterate over the number of hours between two timestamps. for example:
a = 2018-01-19 12:35:00
b = 2018-01-19 18:50:00
for hour in range(a.hour, b.hour +1):
print(hour)
This will reult in: 12, 13, 14, 15, 16, 17, 18
Later on I want to use the 'hour' var, so I need it to count how many hours difference is there, and not the hours themselves..
The result I want is: 0, 1, 2, 3, 4, 5, 6
There another issue when getting timestamps like those:
c = 2018-01-16 17:59:00
d = 2018-01-17 00:14:00
because the hour in: 00:14:00 is 0.
in this case I want to get: 0, 1, 2, 3, 4, 5, 6, 7
I don't know how to do this.. can anyone help please?
The object you want is a "timedelta" object- it represents the duration between 2 timestamps. Say you wanted to start at a date object, and then do something every one hour after that. Don't try to figure out the interval logic yourself, use the built in stuff.
>>> a = datetime.now()
>>> a
datetime.datetime(2020, 8, 17, 6, 33, 25, 529995)
>>> a + timedelta(hours=1)
datetime.datetime(2020, 8, 17, 7, 33, 25, 529995)
>>> a + timedelta(hours=1)
datetime.datetime(2020, 8, 17, 7, 33, 25, 529995)
>>> a + timedelta(hours=2)
datetime.datetime(2020, 8, 17, 8, 33, 25, 529995)
Try this
from datetime import datetime
def date_range(x, y):
fmt = '%Y-%m-%d %H:%M:%S'
x, y = datetime.strptime(x, fmt), datetime.strptime(y, fmt)
duration = y.replace(minute=59) - x.replace(minute=0)
days, seconds = duration.days, duration.seconds
hours = days * 24 + seconds // 3600
return list(range(hours + 1))
a = '2018-01-19 12:35:00'
b = '2018-01-19 18:50:00'
c = '2018-01-16 17:59:00'
d = '2018-01-17 00:14:00'
print(date_range(a, b))
print(date_range(c, d))
Output:
[0, 1, 2, 3, 4, 5, 6]
[0, 1, 2, 3, 4, 5, 6, 7]

Generate random list of timestamps within multiple time intervals in python

Is there any efficient way to generate a list of N random timeframes which do not intersect each other given the initial lower and upper bounds as well as the time intervals that these time periods should have. For example in the following case I want 10 timestamps between 09:00-17:00:
Initial start time: {datetime} YYYY-MM-DD 09:00:00
Initial end time: {datetime} YYYY-MM-DD 17:00:00
Timestamp intervals (in minutes): [32 24 4 20 40 8 27 18 3 4]
where the first time period 32 minutes long, the next 24 and so on.
The way I am doing it at the moment is by using more or less the following code snippet:
def random_time(start, end, timeframe=None):
sec_diff = int((end - start).total_seconds())
secs_to_add = random.randint(0, sec_diff)
return start + timedelta(seconds=secs_to_add)
def in_datetimes_range(self, x, starts, ends):
return np.any((starts <= x) & (x <= ends))
n = 10
dadate = datetime.now()
year = self.dadate.year
month = self.dadate.month
day = self.dadate.day
start = datetime(year, month, day, 9, 0, 0)
end = datetime(year, month, day, 17, 0, 0)
timeframe = [32 24 4 20 40 8 27 18 3 4]
startTimes = []
endTimes = []
for i in range(0, n):
while True:
startTime = random_time(start, end)
endTime = startTime + timedelta(minutes=int(timeframe[i]))
if startTimes:
startTimesAsNpArray = np.array(startTimes)
endTimesAsNpArray = np.array(endTimes)
#check if new time period falls inside existing timeframes or if existing timeframes fall within new time period
inner_bound = np.logical_or(in_datetimes_range(startTime, startTimesAsNpArray, endTimesAsNpArray), in_datetimes_range(endTime, startTimesAsNpArray, endTimesAsNpArray))
outer_bound = np.logical_or(in_datetimes_range(startTimesAsNpArray, startTime, endTime), in_datetimes_range(endTimesAsNpArray, startTime, endTime))
if not inner_bound and not outer_bound:
startTimes.append(startTime)
endTimes.append(endTime)
break
but this is really inefficient and I was looking for something more reliable if possible.
Here is a way to do it: the idea is that if we remove the total duration of the periods from the time available, generate start times in the period that is left, and then postpone them with the cumulated periods before them, we are sure that the intervals won't overlap.
from datetime import datetime, timedelta
import random
def generate_periods(start, end, durations):
durations = [timedelta(minutes=m) for m in durations]
total_duration = sum(durations, timedelta())
nb_periods = len(durations)
open_duration = (end - start) - total_duration
delays = sorted(timedelta(seconds=s)
for s in random.sample(range(0, int(open_duration.total_seconds())), nb_periods))
periods = []
periods_before = timedelta()
for delay, duration in zip(delays, durations):
periods.append((start + delay + periods_before,
start + delay + periods_before + duration))
periods_before += duration
return periods
Sample run:
durations = [32, 24, 4, 20, 40, 8, 27, 18, 3, 4]
start_time = datetime(2019, 9, 2, 9, 0, 0)
end_time = datetime(2019, 9, 2, 17, 0, 0)
generate_periods(start_time, end_time, durations)
# [(datetime.datetime(2019, 9, 2, 9, 16, 1),
# datetime.datetime(2019, 9, 2, 9, 48, 1)),
# (datetime.datetime(2019, 9, 2, 9, 58, 57),
# datetime.datetime(2019, 9, 2, 10, 22, 57)),
# (datetime.datetime(2019, 9, 2, 10, 56, 41),
# datetime.datetime(2019, 9, 2, 11, 0, 41)),
# (datetime.datetime(2019, 9, 2, 11, 2, 37),
# datetime.datetime(2019, 9, 2, 11, 22, 37)),
# (datetime.datetime(2019, 9, 2, 11, 48, 17),
# datetime.datetime(2019, 9, 2, 12, 28, 17)),
# (datetime.datetime(2019, 9, 2, 13, 4, 28),
# datetime.datetime(2019, 9, 2, 13, 12, 28)),
# (datetime.datetime(2019, 9, 2, 15, 13, 3),
# datetime.datetime(2019, 9, 2, 15, 40, 3)),
# (datetime.datetime(2019, 9, 2, 16, 6, 44),
# datetime.datetime(2019, 9, 2, 16, 24, 44)),
# (datetime.datetime(2019, 9, 2, 16, 37, 42),
# datetime.datetime(2019, 9, 2, 16, 40, 42)),
# (datetime.datetime(2019, 9, 2, 16, 42, 50),
# datetime.datetime(2019, 9, 2, 16, 46, 50))]
Like this?
import pandas as pd
from datetime import datetime
date = datetime.now()
start = datetime(date.year, date.month, date.day, 9, 0, 0)
end = datetime(date.year, date.month, date.day, 17, 0, 0)
interval = 32
periods = (end-start).seconds/60/interval
times = pd.date_range(start.strftime("%m/%d/%Y, %H:%M:%S"), periods=periods, freq=str(interval)+'min')
or like this
# =============================================================================
# or if you want the results as a dataframe
# =============================================================================
def xyz(interval):
date = datetime.now()
start = datetime(date.year, date.month, date.day, 9, 0, 0)
end = datetime(date.year, date.month, date.day, 17, 0, 0)
periods = (end-start).seconds/60/interval
return pd.date_range(start.strftime("%m/%d/%Y, %H:%M:%S"), periods=periods, freq=str(interval)+'min')
timeframes = [32,24,4,20,40,8,27,18,3,4]
df_output=pd.DataFrame(index=timeframes, data=[xyz(x) for x in timeframes])

Find largest value from multiple colums in each group of row index in Python, arrange those values diagonally in matrix, and find determinant

I am new to Python. I want to find the largest values from all the columns for repetitive row indexes (i.e. 5 to 130), and also show its row and column index label in output.The largest values should be absolute. (Irrespective of + or - sign). There should not be duplicates for row indexes in different groups.
After finding largest from each group,I want to arrange those values diagonally in square matrix. Then fill the remaining array with the corresponding values of indexes for each group from the main dataframe and find its Determinant.
df=pd.DataFrame(
{'0_deg': [43, 50, 45, -17, 5, 19, 11, 32, 36, 41, 19, 11, 32, 36, 1, 19, 7, 1, 36, 10],
'10_deg': [47, 41, 46, -18, 4, 16, 12, 34, -52, 31, 16, 12, 34, -71, 2, 9, 52, 34, -6, 9],
'20_deg': [46, 43, -56, 29, 6, 14, 13, 33, 43, 6, 14, 13, 37, 43, 3, 14, 13, 25, 40, 8],
'30_deg': [-46, 16, -40, -11, 9, 15, 33, -39, -22, 21, 15, 63, -39, -22, 4, 6, 25, -39, -22, 7]
}, index=[5, 10, 12, 101, 130, 5, 10, 12, 101, 130, 5, 10, 12, 101, 130, 5, 10, 12, 101, 130]
)
Data set :
Expected Output:
My code is showing only till output 1.
Actual Output:
Code:
df = pd.read_csv ('Matrixfile.csv')
df = df.set_index('Number')
def f(x):
x1 = x.abs().stack()
x2 = x.stack()
x = x2.iloc[np.argsort(-x1)].head(1)
return x
groups = (df.index == 5).cumsum()
df1 = df.groupby(groups).apply(f).reset_index(level=[1,2])
df1.columns = ['Number','Angle','Value']
print (df1)
df1.to_csv('Matrix_OP.csv', encoding='utf-8', index=True)
I am not sure about #piRSquared output from what I understood from your question. There might be some errors in there, for instance, in group 2, max(abs(values)) = 52 (underline in red in picture) but 41 is displayed on left...
Here is a less elegant way of doing it but maybe easier for you to understand :
import numpy as np
# INPUT
data_dict ={'0_deg': [43, 50, 45, -17, 5, 19, 11, 32, 36, 41, 19, 11, 32, 36, 1, 19, 7, 1, 36, 10],
'10_deg': [47, 41, 46, -18, 4, 16, 12, 34, -52, 31, 16, 12, 34, -71, 2, 9, 52, 34, -6, 9],
'20_deg': [46, 43, -56, 29, 6, 14, 13, 33, 43, 6, 14, 13, 37, 43, 3, 14, 13, 25, 40, 8],
'30_deg': [-46, 16, -40, -11, 9, 15, 33, -39, -22, 21, 15, 63, -39, -22, 4, 6, 25, -39, -22, 7],
}
# Row idx of a group in this list
idx = [5, 10, 12, 101, 130]
# Getting some dimensions and sorting the data
row_idx_length = len(idx)
group_length = len(data_dict['0_deg'])
number_of_groups = len(data_dict.keys())
idx = idx*number_of_groups
data_arr = np.zeros((group_length,number_of_groups),dtype=np.int32)
#
col = 0
keys = []
for key in sorted(data_dict):
data_arr[:,col] = data_dict[key]
keys.append(key)
col+=1
def get_extrema_value_group(arr):
# function to find absolute extrema value of a 2d array
extrema = 0
for i in range(0, len(arr)):
max_value = max(arr[i])
min_value = min(arr[i])
if (abs(min_value) > max_value) and (abs(extrema) < abs(min_value)):
extrema = min_value
elif (abs(min_value) < max_value) and (abs(extrema) < max_value):
extrema = max_value
return extrema
# For output 1
max_values = []
for i in range(0,row_idx_length*number_of_groups,row_idx_length):
# get the max value for the current group
value = get_extrema_value_group(data_arr[i:i+row_idx_length])
# get the row and column idx associated with the max value
idx_angle_number = np.nonzero(abs(data_arr[i:i+row_idx_length,:])==value)
print('Group number : ' + str(i//row_idx_length+1))
print('Number : '+ str(idx[idx_angle_number[0][0]]))
print('Angle : '+ keys[idx_angle_number[1][0]])
print('Absolute extrema value : ' + str(value))
print('------')
max_values.append(value)
# Arrange those values diagonally in square matrix for output 2
A = np.diag(max_values)
print('A = ' + str(A))
# Fill A with desired values
for i in range(0,number_of_groups,1):
A[i,0] = data_arr[i*row_idx_length+2,2] # 20 deg 12
A[i,1:3] = data_arr[i*row_idx_length+3,1] # x2 : 10 deg 101
A[i,3] = data_arr[i*row_idx_length+1,1] # 10 deg 10
# Final output
# replace the diagonal of A with max values
# get the idx of diag
A_di = np.diag_indices(number_of_groups)
# replace with max values
A[A_di] = max_values
print ('A = ' + str(A))
# Compute determinant of A
det_A = np.linalg.det(A)
print ('det(A) = '+str(det_A))
Output 1:
Group number : 1
Number : 12
Angle : 20_deg
Absolute extrema value : -56
------
Group number : 2
Number : 101
Angle : 10_deg
Absolute extrema value : -52
------
Group number : 3
Number : 101
Angle : 10_deg
Absolute extrema value : -71
------
Group number : 4
Number : 10
Angle : 10_deg
Absolute extrema value : 52
------
Output 2 :
A = [[-56 0 0 0]
[ 0 -52 0 0]
[ 0 0 -71 0]
[ 0 0 0 52]]
Output 3 :
A = [[-56 -18 -18 41]
[ 33 -52 -52 12]
[ 37 -71 -71 12]
[ 25 -6 -6 52]]
det(A) = -5.4731330578761246e-11

Categories

Resources