How to exchange dates from loop in to an array in python? - python

I would like to ask how to exchange dates from loop in to an array in python?
I need an array of irregular, random dates with hours. So, I prepared a solution:
import datetime
import radar
r2 =()
for a in range(1,10):
r2 = r2+(radar.random_datetime(start='1985-05-01', stop='1985-05-04'),)
r3 = list(r2)
print(r3)
As the result I get a list like:
[datetime.datetime(1985, 5, 3, 17, 59, 13), datetime.datetime(1985, 5, 2, 15, 58, 30), datetime.datetime(1985, 5, 2, 9, 46, 35), datetime.datetime(1985, 5, 3, 10, 5, 45), datetime.datetime(1985, 5, 2, 4, 34, 43), datetime.datetime(1985, 5, 3, 9, 52, 51), datetime.datetime(1985, 5, 2, 22, 7, 17), datetime.datetime(1985, 5, 1, 15, 28, 14), datetime.datetime(1985, 5, 3, 13, 33, 56)]
But I need strings in the list like:
list2 = ['1985-05-02 08:48:46','1985-05-02 10:47:56','1985-05-03 22:07:11', '1985-05-03 22:07:11','1985-05-01 03:23:43']

You can convert the datetime to a string with str() like:
Code:
str(radar.random_datetime(start='1985-05-01', stop='1985-05-04'))
Test Code:
import radar
r2 = ()
for a in range(1, 10):
r2 = r2 + (str(
radar.random_datetime(start='1985-05-01', stop='1985-05-04')),)
r3 = list(r2)
print(r3)
Results:
['1985-05-01 21:06:29', '1985-05-01 04:43:11', '1985-05-02 13:51:03',
'1985-05-03 03:20:44', '1985-05-03 19:59:14', '1985-05-02 21:50:34',
'1985-05-01 04:13:50', '1985-05-03 23:28:36', '1985-05-02 15:56:23']

Use strftime to convert the date generated by radar before adding it to the list.
e.g.
import datetime
import radar
r2 =()
for a in range(1,10):
t=datetime.datetime(radar.random_datetime(start='1985-05-01', stop='1985-05-04'))
r2 = r2+(t.strftime('%Y-%m-%d %H:%M:%S'),)
r3 = list(r2)
print(r3)

Related

Can the tuples be changed in rtc.datetime()?

import network, ntptime, time
from machine import RTC
# dictionary that maps string date names to indexes in the RTC's
datetime tuple
DATETIME_ELEMENTS = {
"year": 0,
"month": 1,
"day": 2,
"day_of_week": 3,
"hour": 4,
"minute": 5,
"second": 6,
"millisecond": 7
}
def connect_to_wifi(wlan, ssid, password):
if not wlan.isconnected():
print("Connecting to network...")
wlan.connect(ssid, password)
while not wlan.isconnected():
pass
# set an element of the RTC's datetime to a different value
def set_datetime_element(rtc, datetime_element, value):
date = list(rtc.datetime())
date[DATETIME_ELEMENTS[datetime_element]] = value
rtc.datetime(date)
wlan = network.WLAN(network.STA_IF)
wlan.active(True)
connect_to_wifi(wlan, "SSID", "Password")
rtc = RTC()
ntptime.settime()
set_datetime_element(rtc, "hour", 8) # I call this to change the hour to 8am for me
print(rtc.datetime()) # print the updated RTC time
Prints results:
(2022, 4, 28, 3, 18, 50, 27, 0)
(2022, 4, 28, 3, 8, 50, 27, 0)
I'm trying to get:
(2022, 4, 28, 8, 50, 27)
I don't want the day or microseconds. Any suggestions?
If you only want to print a subset of the fields in the tuple, you can use Python's slicing operating (see e.g. the examples here to select only those fields:
>>> now=(2022, 4, 28, 3, 18, 50, 27, 0)
>>> print(now)
(2022, 4, 28, 3, 18, 50, 27, 0)
>>> print(now[:3] + now[5:7])
(2022, 4, 28, 50, 27)

How to estimate similarity between sensor data based on the number of occurrence?

Following is my sample data:
data = {850.0: 6, -852.0: 5, 992.0: 29, -993.0: 25, 990.0: 27, -992.0: 28,
965.0: 127, 988.0: 37, -994.0: 24, 996.0: 14, -996.0: 19, -998.0: 19, 995.0: 17, 954.0: 71, -953.0: 64, 983.0: 48, 805.0: 20, 960.0: 97, 811.0: 23, 957.0: 98, 818.0: 9, -805.0: 10, -962.0: 128, 822.0: 5, 970.0: 115, 823.0: 6, 977.0: 86, 815.0: 11, 972.0: 118, -809.0: 3, -982.0: 77, 963.0: 129, 816.0: 15, 969.0: 131, 809.0: 13, -973.0: 115, 967.0: 141, 964.0: 110, 966.0: 141, -801.0: 11, -990.0: 33, 819.0: 8, 973.0: 113, -981.0: 71, 820.0: 16, 821.0: 10, -988.0: 42, 833.0: 7, 958.0: 92, -980.0: 98, 968.0: 138, -808.0: 5, -984.0: 57, 976.0: 108, 828.0: 3, -807.0: 6, 971.0: 134, -814.0: 3, 817.0: 13, -975.0: 112, 814.0: 12, 825.0: 6, 974.0: 90, -974.0: 125, -824.0: 2, -966.0: 131, -822.0: 4, 962.0: 108, -967.0: 121, -810.0: 3, 810.0: 11, 826.0: 7, 953.0: 74, -970.0: 140, -804.0: 6, -813.0: 2, 812.0: 18, 961.0: 126, -965.0: 159, -806.0: 5, 955.0: 74, -958.0: 93, -818.0: 6, 813.0: 18, 824.0: 6, 937.0: 25, -946.0: 51, -802.0: 8, 950.0: 48, -957.0: 91, 808.0: 11, 959.0: 116, -821.0: 3, -959.0: 108, 827.0: 4, -817.0: 4, 944.0: 47, -971.0: 126, -972.0: 104, -977.0: 96, 956.0: 92, 807.0: 10, 806.0: 21, 952.0: 60, 948.0: 51, 951.0: 67, 945.0: 47, -986.0: 37, 892.0: 13, 910.0: 23, 876.0: 6, -912.0: 18, 891.0: 8, 911.0: 22, -913.0: 13, 894.0: 7, 895.0: 12, 925.0: 15, 887.0: 6, 915.0: 16, 877.0: 7, 905.0: 14, 889.0: 7, -899.0: 10, 916.0: 17, -907.0: 11, -919.0: 17, 900.0: 20, 898.0: 9, 918.0: 16, 914.0: 18, 906.0: 18, 908.0: 17, -889.0: 7, 903.0: 16, 888.0: 5, -905.0: 9, -911.0: 19, 904.0: 20, -908.0: 12, 840.0: 2, -906.0: 16, 896.0: 11, -910.0: 17, -863.0: 3, 907.0: 27, -904.0: 10, -898.0: 13, 909.0: 19, -916.0: 20, 924.0: 24, 919.0: 20, -887.0: 6, 920.0: 12, 921.0: 12, 922.0: 15, 899.0: 14, -902.0: 9, -917.0: 12, 902.0: 14, 942.0: 46, 931.0: 23, 901.0: 22, -923.0: 14, -927.0: 15, 913.0: 18, -918.0: 16, 929.0: 22, 928.0: 13, -922.0: 7, -921.0: 16, 933.0: 22, 926.0: 13, 917.0: 18, 923.0: 16, 936.0: 24, 803.0: 30, -930.0: 10, 939.0: 33, -939.0: 24, 893.0: 8, 830.0: 5, 897.0: 8, 886.0: 8, -897.0: 4, -903.0: 12, -920.0: 9, -894.0: 3, -934.0: 14, 932.0: 23, -928.0: 16, 943.0: 40, 946.0: 45,
801.0: 17, -944.0: 35, 935.0: 23, 941.0: 30, -926.0: 11, -940.0: 38, 802.0: 16, 940.0: 43, -943.0: 38, -935.0: 24, 804.0: 23, -933.0: 9, -945.0: 36, 949.0: 56, 858.0: 2, -839.0: 3, -964.0: 108, -969.0: 111, -815.0: 2, 881.0: 3, -955.0: 74, -803.0: 3, 947.0: 50, -948.0: 57, -950.0: 58, -961.0: 133, -947.0: 43, -949.0: 54, -936.0: 20, 980.0: 75, -848.0: 3, -941.0: 27, -827.0: 5, -816.0: 7, -942.0: 37, 938.0:
29, -956.0: 81, -951.0: 59, -932.0: 11, -954.0: 71, -952.0: 64,
-811.0: 3, 979.0: 89, -963.0: 128, -892.0: 4, -960.0: 109, 871.0: 4, 978.0: 85, -968.0: 136, 865.0: 1, -856.0: 3, 930.0: 11, 843.0: 5, -844.0: 1, -929.0: 24, -925.0: 19, -931.0: 11, 981.0: 65, 912.0: 19, 927.0: 10, -924.0: 8, -938.0: 25, 989.0: 31, -819.0: 4, 934.0: 16, -976.0: 92, -915.0: 14, 975.0: 92, 869.0: 5, 998.0: 9, 870.0: 1, -826.0: 2, 834.0: 2, 882.0: 5, 839.0: 4, 829.0: 3, 846.0: 2, -978.0: 117, -991.0: 39, -983.0: 59, -989.0: 48, 832.0: 4, 860.0: 5, -937.0:
25, 859.0: 1, 842.0: 5, -857.0: 4, -891.0: 8, 837.0: 4, -868.0: 3,
-884.0: 4, 851.0: 4, 874.0: 8, 852.0: 6, 997.0: 14, -888.0: 3, 866.0: 6, -893.0: 6, -890.0: 6, 982.0: 45, 863.0: 2, 835.0: 3, -834.0: 3,
-979.0: 73, 853.0: 3, 984.0: 44, -985.0: 30, 985.0: 36, 991.0: 25, 986.0: 35, -987.0: 29, 994.0: 24, 993.0: 29, -995.0: 16, -997.0: 17, -880.0: 4, -830.0: 3, 847.0: 1, 884.0: 4, -877.0: 5, -840.0: 1, -846.0: 2, -896.0: 8, -866.0: 2, -851.0: 2, -871.0: 2, -885.0: 3, -832.0: 3, -878.0: 1, 890.0: 6, 987.0: 22, -847.0: 2, 878.0: 5, 879.0: 3, 885.0: 5, 848.0: 2, 841.0: 5, 856.0: 3, 857.0: 4, 864.0: 1, 831.0:
5, 849.0: 3, 844.0: 3, 875.0: 3, 836.0: 3, 999.0: 6, -999.0: 6,
-900.0: 7, 845.0: 2, 862.0: 1, 880.0: 4, 855.0: 2, -876.0: 1, -882.0: 2, -835.0: 2, -831.0: 5, -812.0: 1, -825.0: 2, -860.0: 3, -914.0: 12,
-855.0: 5, -870.0: 5, -881.0: 4, -823.0: 3, -901.0: 5, -909.0: 15, -886.0: 2, 873.0: 3, -879.0: 1, -869.0: 4, -883.0: 4, -895.0: 8, 868.0: 3, -836.0: 2, 883.0: 4, -861.0: 2, -859.0: 2, -837.0: 1, -864.0: 2, -829.0: 2, -875.0: 4, -858.0: 2, -843.0: 1, -862.0: 1, -872.0: 2, 854.0: 2, -842.0: 1, -845.0: 3, -833.0: 1, -853.0: 3, 861.0: 3, -820.0: 2, -850.0: 2, -867.0: 2, -854.0: 1, -841.0: 3, 867.0: 1, -865.0: 3, -849.0: 2, 838.0: 1, -838.0: 1, -873.0: 1}
It is the Key/Value of a dictionary in Python. The Keys are the sensors data and the Values are the number of occurrences. I need to find if the two Key/Value match as in the following example:
959.0: 116 and -959.0: 108
Here, the sensor data 959.0 and -959.0 are repeated (occurred) 116 and 108 times, respectively. In my system, I can assume that 959.0 is good data. But it's not always the ideal case. The sensor data can be 958, -955, 952, etc with their respective occurrence number. I need to find the good sensor data from my DB such that each data has similar opposite value and close number of occurrences are present.
My attempts:
At this moment, I'm solving it manually by plotting the data (x being the sensor data and y being the number of occurrence) and filtering it horizontally and vertically. For example:
for key in list(data.keys()): ## Filtering sensor data based on their difference on occurance times
if ((-1*key) in data.keys() and abs(data[key]-data[(-1*key)])<2):
#if (-1*key) in data.keys():
pass
else: del data[key]
#print(data)
for key in list(data.keys()): ##Horizontal filter (based on number of occurance)
if data[key] >20 or abs(key)>1000:
pass
else: del data[key]
lists = sorted(data.items()) # sorted by key, return a list of tuples
x, y = zip(*lists) # unpack a list of pairs into two tuples
plt.plot(x, y,marker="*")
plt.grid()
plt.show()
Is there any better statistical way to solve my problem in Python? Thank you.
If I understand correctly. You want to compare these two time-series data from sensors and do some analysis after that.
But it's not always the ideal case. The sensor data can be 958, -955, 952, etc with their respective occurrence number.
And this sentence shows that there may be statistical errors in the data.
Plotting these time series at first could help you choose a good method.
The negative data is shown in orange and the positive is in blue.
from scipy.signal import savgol_filter
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
data = np.array(list(data_dict.items()), dtype=int)
positive = np.zeros((np.abs(data[:, 0]).max() + 1), dtype=int)
negative = np.zeros_like(positive)
positive[data[data[:, 0] > 0, 0]] = data[data[:, 0] > 0, 1]
negative[-data[data[:, 0] < 0, 0]] = data[data[:, 0] < 0, 1]
sns.lineplot(x=np.arange(len(positive)), y=savgol_filter(positive, 11, 3))
sns.lineplot(x=np.arange(len(positive)), y=savgol_filter(-negative, 11, 3))
plt.show()
And the diff you can see, statistical error depends on the value.
We can try to add a filter like the Gaussian filter, but here I prefer the Savgol filter.
You can use is from scipy.
from scipy.signal import savgol_filter
savgol_filter(negative, 11, 3)
And here is the diff.
You can use the apply() method of a pandas dataframe to calculate the data useful to filter the desired sensor with different degree of precision. Setting axis=1 in this method allows to define a function that operates on each row.
For example you could use an approach similar to the one you are doing by hand:
Fix a threshold for sensors similarity
Fix a threshold for occurrences similarity
Fix a threshold for the number of similar sensor + occurrences that a single data point must have to be considered valid
For example, the first step can be performed as follow:
# The data variable is the one provided in the example
# Prepare Pandas dataframe
data_dict = {"sensor": list(), "occ": list()}
for k,v in data.items():
data_dict["sensor"].append(k)
data_dict["occ"].append(v)
df = pd.DataFrame(data_dict)
# Add support column for filtering
df["ct"] = pd.NaT
# Chose sensor similarity threshold
threshold = 2
# Populate the column
df["ct"] = df.apply(lambda x: get_sensor_count(x, threshold, df), axis=1)
Where the function get_sensor_count() is implemented as follows:
# Get the count of "similar" sensors
def get_sensor_count(row, threshold, df):
# First check if sensor hava similar value, then if they have opposite signs
return df[(abs(abs(df["sensor"]) - abs(row["sensor"])) < threshold) & (df["sensor"] * row["sensor"] < 0)]["sensor"].count()
In this way you can set the threshold for sensor similarity and obtain the count of similar sensor. To filter the sensor that do not have similar opposite values you can do the following:
# If at least one silimar sensor, keep it
df_good_sensors = df[df["ct"] > 0]
After that you can add arbitrary filter on this dataset, such as the one in your example:
# Filter occurrences
df_good_occ = df_good_sensors[(df["occ"] > 20) | (abs(df["sensor"] > 1000))]
Now you can check what are the sensors that measured similar occurrences by setting a new threshold for this part of the data:
# Chose occurrences similarity threshold
o_threshold = 5
df_good_occ["ct"] = pd.NaT
df_good_occ["ct"] = df_good_occ.apply(lambda x: get_occ_count(x, threshold, o_threshold, df_good_occ), axis=1)
Where the get_occ_count() function is implemented as follows:
def get_occ_count(row, s_threshold, o_threshold, df):
# Get similar sensors using the previous sensor threshold
to_check = df[(abs(abs(df["sensor"]) - abs(row["sensor"])) < s_threshold) & (df["sensor"] * row["sensor"] < 0)]
# Count only the occurrences values similar to the current sensors
return to_check[abs(to_check["occ"] - to_check["occ"]) < o_threshold]["sensor"].count()
Now for each sensor you have the number of opposite values that have a similar occurrences number. As a final filter, you can set how many similar data points each final point must have to be considered:
# Chose number of similar sensors to chose how many to keep
count_threshold = 2
df_final = df_good_occ[df_good_occ["ct"] > count_threshold]
# Drop support column
df_final.drop(["ct"], axis=1)
With this approach you have 3 possible parameters to set:
the sensor threshold
the occurrence threshold
the number of similar data points
You can mix these 3 variables and see what gives you the better results. To test this, you can follow a process like the following:
generate the 3 variables
use a dataset in which you already know what data points must be kept
see the % of data points that have been correctly kept

Check which value from my list is not in my dataframe column

I need to check if any of the values in my list is missing in my df column. I used this:
data_xls['date'].isin([datetime(2015, 7, 20, 11,7),datetime(2015, 7, 20, 11,13),datetime(2015, 7, 20, 11,14),datetime(2015, 7, 20, 11,16)])
But I also want to know which one amongst my list is missing. How can I do that?
You need the ~ symbol to index the dates that are not in that list:
lst = [datetime(2015, 7, 20, 11,7),datetime(2015, 7, 20, 11,13),datetime(2015, 7, 20, 11,14),datetime(2015, 7, 20, 11,16)]
data_xls['date'][~data_xls['date'].isin(lst)]
But since you want the dates in your list missing in data_xls, you can find that by:
set(lst).difference(data_xls['date'])
If need difference between dates and data_xls['date'] columns use:
data_xls = pd.DataFrame({'date': pd.date_range(datetime(2015, 7, 20, 11,11),
freq='1Min', periods=5)})
print (data_xls)
date
0 2015-07-20 11:11:00
1 2015-07-20 11:12:00
2 2015-07-20 11:13:00
3 2015-07-20 11:14:00
4 2015-07-20 11:15:00
dates = [datetime(2015, 7, 20, 11,7),datetime(2015, 7, 20, 11,13),
datetime(2015, 7, 20, 11,14),datetime(2015, 7, 20, 11,16)]
missing = [x for x in dates if x not in set(data_xls['date'])]
print (missing)
[datetime.datetime(2015, 7, 20, 11, 7), datetime.datetime(2015, 7, 20, 11, 16)]
missing = list(set(dates) - set(data_xls['date']))
print (missing)
[datetime.datetime(2015, 7, 20, 11, 7), datetime.datetime(2015, 7, 20, 11, 16)]

How to create a nested list conditioned on a parameter in python

I have generated a day-wise nested list and want to calculate total duration between login and logout sessions and store that value individually in a duration nested list, organized by the day in which the login happened.
My python script is:
import datetime
import itertools
Logintime = [
datetime.datetime(2021,1,1,8,10,10),
datetime.datetime(2021,1,1,10,25,19),
datetime.datetime(2021,1,2,8,15,10),
datetime.datetime(2021,1,2,9,35,10)
]
Logouttime = [
datetime.datetime(2021,1,1,10,10,11),
datetime.datetime(2021,1,1,17,0,10),
datetime.datetime(2021,1,2,9,30,10),
datetime.datetime(2021,1,2,17,30,12)
]
Logintimedaywise = [list(group) for k, group in itertools.groupby(Logintime,
key=datetime.datetime.toordinal)]
Logouttimedaywise = [list(group) for j, group in itertools.groupby(Logouttime,
key=datetime.datetime.toordinal)]
print(Logintimedaywise)
print(Logouttimedaywise)
# calculate total duration
temp = []
l = []
for p,q in zip(Logintimedaywise,Logouttimedaywise):
for a,b in zip(p, q):
tdelta = (b-a)
diff = int(tdelta.total_seconds()) / 3600
if diff not in temp:
temp.append(diff)
l.append(temp)
print(l)
this script generating the following output (the duration in variable l is coming out as a flat list inside a singleton list):
[[datetime.datetime(2021, 1, 1, 8, 10, 10), datetime.datetime(2021, 1, 1, 10, 25, 19)], [datetime.datetime(2021, 1, 2, 8, 15, 10), datetime.datetime(2021, 1, 2, 9, 35, 10)]]
[[datetime.datetime(2021, 1, 1, 10, 10, 11), datetime.datetime(2021, 1, 1, 17, 0, 10)], [datetime.datetime(2021, 1, 2, 9, 30, 10), datetime.datetime(2021, 1, 2, 17, 30, 12)]]
[[2.000277777777778, 6.5808333333333335, 1.25, 7.917222222222223]]
But my desired output format is the following nested list of durations (each item in the list should be the list of durations for a given login day):
[[2.000277777777778, 6.5808333333333335] , [1.25, 7.917222222222223]]
anyone can help how can i store total duration as a nested list according to the login day?
thanks in advance.
Try changing this peace of code:
# calculate total duration
temp = []
l = []
for p,q in zip(Logintimedaywise,Logouttimedaywise):
for a,b in zip(p, q):
tdelta = (b-a)
diff = int(tdelta.total_seconds()) / 3600
if diff not in temp:
temp.append(diff)
l.append(temp)
print(l)
To:
# calculate total duration
l = []
for p,q in zip(Logintimedaywise,Logouttimedaywise):
l.append([])
for a,b in zip(p, q):
tdelta = (b-a)
diff = int(tdelta.total_seconds()) / 3600
if diff not in l[-1]:
l[-1].append(diff)
print(l)
Then the output would be:
[[datetime.datetime(2021, 1, 1, 8, 10, 10), datetime.datetime(2021, 1, 1, 10, 25, 19)], [datetime.datetime(2021, 1, 2, 8, 15, 10), datetime.datetime(2021, 1, 2, 9, 35, 10)]]
[[datetime.datetime(2021, 1, 1, 10, 10, 11), datetime.datetime(2021, 1, 1, 17, 0, 10)], [datetime.datetime(2021, 1, 2, 9, 30, 10), datetime.datetime(2021, 1, 2, 17, 30, 12)]]
[[2.000277777777778, 6.5808333333333335], [1.25, 7.917222222222223]]
I add a new sublist for every iteration.
Your solution and the answer by #U11-Forward will break if login and logout for the same session happen in different days, since the inner lists in Logintimedaywise and Logouttimedaywise will have different number of elements.
To avoid that, a way simpler solution is if you first calculate the duration for all pairs of login, logout, then you create the nested lists based only on the login day (or logout day if you wish), like this:
import datetime
import itertools
import numpy
# define the login and logout times
Logintime = [datetime.datetime(2021,1,1,8,10,10),datetime.datetime(2021,1,1,10,25,19),datetime.datetime(2021,1,2,8,15,10),datetime.datetime(2021,1,2,9,35,10)]
Logouttime = [datetime.datetime(2021,1,1,10,10,11),datetime.datetime(2021,1,1,17,0,10), datetime.datetime(2021,1,2,9,30,10),datetime.datetime(2021,1,2,17,30,12) ]
# calculate the duration and the unique days in the set
duration = [ int((logout - login).total_seconds())/3600 for login,logout in zip(Logintime,Logouttime) ]
login_days = numpy.unique([login.day for login in Logintime])
# create the nested list of durations
# each inner list correspond to a unique login day
Logintimedaywise = [[ login for login in Logintime if login.day == day ] for day in login_days ]
Logouttimedaywise = [[ logout for login,logout in zip(Logintime,Logouttime) if login.day == day ] for day in login_days ]
duration_daywise = [[ d for d,login in zip(duration,Logintime) if login.day == day ] for day in login_days ]
# check
print(Logintimedaywise)
print(Logouttimedaywise)
print(duration_daywise)
Outputs
[[datetime.datetime(2021, 1, 1, 8, 10, 10), datetime.datetime(2021, 1, 1, 10, 25, 19)], [datetime.datetime(2021, 1, 2, 8, 15, 10), datetime.datetime(2021, 1, 2, 9, 35, 10)]]
[[datetime.datetime(2021, 1, 1, 10, 10, 11), datetime.datetime(2021, 1, 1, 17, 0, 10)], [datetime.datetime(2021, 1, 2, 9, 30, 10), datetime.datetime(2021, 1, 2, 17, 30, 12)]]
[[2.000277777777778, 6.5808333333333335], [1.25, 7.917222222222223]]

Python f-string and append()

I hope y'all doing fine!
So I want to make 5 groups of 6 people randomly chosen from a list and then append those 6 chosen names to the special group.
Example: If a, b, c, d, e, f, are the first six chosen names -> append those names to group1;
after the group1 contains 6 names, then the next 6 names -> append to group2; and so and so till I have 5 groups of 6 people.
I hope you understand me and that you can help :)
My code:
import random
names = [30 names i dont wanna share]
group1 = list()
group2 = list()
group3 = list()
group4 = list()
group5 = list()
def choosegroup():
def chooserandom():
return(random.choice(names))
def creategroup():
for i in range(1,7):
chosed = chooserandom()
names.remove(chosed)
#while(chosed in group1):
#print('Ups')
#print(chosed + ' already chosed')
# chosed = chooserandom()
#print(chosed)
group1.append(chosed)
#print('Group 1:' + '\n' + str(group1) + '\n')
createdgroup = creategroup()
print(group1)
for i in range(1,6):
print(f'Group {i}')
choosegroup()
group1.clear()
random.shuffle(names)
groups = [ names[i:i+6] for i in range(0, len(names), 6) ]
Now groups[0], groups[1] etc. are your 6-person groups.
Once you have your list of names, to split them into random groups, I would instead use numpy
import numpy as np
groups = np.array(names)
np.shuffle(groups)
groups = np.reshape(groups, (5,6))
As an example with numbers instead of names
>>> names = np.arange(30)
>>> names
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])
>>> np.random.shuffle(names)
>>> names
array([ 8, 18, 23, 7, 25, 14, 11, 20, 13, 24, 15, 26, 19, 21, 12, 17, 0,
6, 3, 10, 29, 9, 16, 28, 22, 5, 1, 4, 27, 2])
>>> np.reshape(names, (5,6))
array([[ 8, 18, 23, 7, 25, 14],
[11, 20, 13, 24, 15, 26],
[19, 21, 12, 17, 0, 6],
[ 3, 10, 29, 9, 16, 28],
[22, 5, 1, 4, 27, 2]])
You can access them from globals as such:
globals()[f"group{i}"]
though storing and retrieving them from a dictionary is preferable.
You can rewrite your code as follows:
import random
from collections import defaultdict
names = [30 names i dont wanna share]
groups = defaultdict(list)
def choosegroup(group_name):
def chooserandom():
return(random.choice(names))
def creategroup(group_name):
for i in range(1,7):
chosed = chooserandom()
names.remove(chosed)
groups[group_name].append(chosed)
createdgroup = creategroup()
print(group_name, "\n", group[group_name])
for i in range(1,6):
print(f'Group {i}')
group_name = f"group{i}"
choosegroup(group_name)
groups[group_name].clear()

Categories

Resources