I'm new to Python, and have some problems with creating random lists.
I'm using random.sample(range(x, x), y).
I want to get 4 lists with unique numbers, from 1-4, so I have been using this
a = random.sample(range(1, 5), 4)
b = random.sample(range(1, 5), 4)
c = random.sample(range(1, 5), 4)
d = random.sample(range(1, 5), 4)
So I get for example
a = 1, 3, 2, 4
b = 1, 4, 3, 2
c = 2, 3, 1, 4
d = 4, 2, 3, 1
How can I make it that the column are also unique?
Absent a clear mathematical theory, I distrust anything other than a somewhat hit-and-miss approach. In particular, backtracking approaches can introduce a subtle bias:
from random import shuffle
def isLatin(square):
#assumes that square is an nxn list
#where each row is a permutation of 1..n
n = len(square[0])
return all(len(set(col)) == n for col in zip(*square))
def randSquare(n):
row = [i for i in range(1,1+n)]
square = []
for i in range(n):
shuffle(row)
square.append(row[:])
return square
def randLatin(n):
#uses a hit and miss approach
while True:
square = randSquare(n)
if isLatin(square): return square
Typical output:
>>> s = randLatin(4)
>>> for r in s: print(r)
[4, 1, 3, 2]
[2, 3, 4, 1]
[1, 4, 2, 3]
[3, 2, 1, 4]
Totally random then:
def gen_matrix():
first_row = random.sample(range(1, 5), 4)
tmp = first_row + first_row
rows = []
for i in range(4):
rows.append(tmp[i:i+4])
return random.sample(rows, 4)
Create a list of all the elements, and as will filling the line, remove the used element.
import random
def fill_line(length):
my_list = list(range(length))
to_return = []
for i in range(length):
x = random.choice(my_list)
to_return.append(x)
my_list.remove(x)
return to_return
x = [fill_line(4)
for i in range(4)]
print(x)
Probably the simplest way is to create a valid matrix, and then shuffle the rows, and then shuffle the columns:
import random
def random_square(U):
U = list(U)
rows = [U[i:] + U[:i] for i in range(len(U))]
random.shuffle(rows)
rows_t = [list(i) for i in zip(*rows)]
random.shuffle(rows_t)
return rows_t
Usage:
>>> random_square(range(1, 1+4))
[[2, 3, 4, 1], [4, 1, 2, 3], [3, 4, 1, 2], [1, 2, 3, 4]]
This should be able to create any valid matrix with equal probability. After doing some reading it seems that this still has bias, although I don't fully comprehend why yet.
I would build a random latin square by 1) start with a single random permutation, 2) populate the rows with rotations 3) shuffle the rows 4) transpose the square 5) shuffle the rows again:
from collections import deque
from random import shuffle
def random_latin_square(elements):
elements = list(elements)
shuffle(elements)
square = []
for i in range(len(elements)):
square.append(list(elements))
elements = elements[1:] + [elements[0]]
shuffle(square)
square[:] = zip(*square)
shuffle(square)
return square
if __name__ == '__main__':
from pprint import pprint
square = random_latin_square('ABCD')
pprint(square)
Related
I am trying to make a function that counts the number of cycles within a permutated list.
I do sometimes get the right answer when running the code, but most times I receive an error message - and I am unable to figure out why.
My code is as follows:
def count_cycles(n):
cycle_count = 0
copy_list = []
for element in n:
copy_list.append(element)
while len(copy_list) != 0:
ran_num = random.choice(copy_list)
while True:
if n[ran_num] == ran_num:
cycle_count = circle_count + 1
if int(ran_num) in copy_list:
copy_list.remove(ran_num)
break
else:
n.insert(ran_num, ran_num)
print(n, ran_num, copy_list)
ran_num = n[ran_num + 1]
print(ran_num)
copy_list.remove(ran_num)
n.remove(ran_num)
continue
return print(cycle_count, n)
What I use is that I test with this permutated list with 3 cycles [2, 6, 0, 3, 1, 4, 5].
Picture of output from a correct and incorrect run
I used print(n, ran_num, copy_list) to assess the output as per the picture.
Here is one possibility:
p = [2, 6, 0, 3, 1, 4, 5]
cycles = set()
elts = set(range(len(p)))
while elts:
cycle = []
x0 = elts.pop()
cycle.append(x0)
x = p[x0]
while x != x0:
cycle.append(x)
x = p[x]
elts -= set(cycle)
cycles.add(tuple(cycle))
print(cycles)
It gives:
{(0, 2), (1, 6, 5, 4), (3,)}
Then to get the number of cycles you can use len(cycles).
In addition to the existing answer, sympy provides some functionality to work with permutations. In this case, you could use the following:
from sympy.combinatorics import Permutation
p = Permutation([2, 6, 0, 3, 1, 4, 5])
num_cycles = p.cycles # 3
Given a list of data, I'm trying to create a new list in which the value at position i is the length of the longest run starting from position i in the original list. For instance, given
x_list = [1, 1, 2, 3, 3, 3]
Should return:
run_list = [2, 1, 1, 3, 2, 1]
My solution:
freq_list = []
current = x_list[0]
count = 0
for num in x_list:
if num == current:
count += 1
else:
freq_list.append((current,count))
current = num
count = 1
freq_list.append((current,count))
run_list = []
for i in freq_list:
z = i[1]
while z > 0:
run_list.append(z)
z -= 1
Firstly I create a list freq_list of tuples, where every tuple's first element is the element from x_list, and where the second element is the number of the total run.
In this case:
freq_list = [(1, 2), (2, 1), (3, 3)]
Having this, I create a new list and append appropriate values.
However, I was wondering if there is a shorter way/another way to do this?
Here's a simple solution that iterates over the list backwards and increments a counter each time a number is repeated:
last_num = None
result = []
for num in reversed(x_list):
if num != last_num:
# if the number changed, reset the counter to 1
counter = 1
last_num = num
else:
# if the number is the same, increment the counter
counter += 1
result.append(counter)
# reverse the result
result = list(reversed(result))
Result:
[2, 1, 1, 3, 2, 1]
This is possible using itertools:
from itertools import groupby, chain
x_list = [1, 1, 2, 3, 3, 3]
gen = (range(len(list(j)), 0, -1) for _, j in groupby(x_list))
res = list(chain.from_iterable(gen))
Result
[2, 1, 1, 3, 2, 1]
Explanation
First use itertools.groupby to group identical items in your list.
For each item in your groupby, create a range object which counts backwards from the length of the number of consecutive items to 1.
Turn this all into a generator to avoid building a list of lists.
Use itertools.chain to chain the ranges from the generator.
Performance note
Performance will be inferior to #Aran-Fey's solution. Although itertools.groupby is O(n), it makes heavy use of expensive __next__ calls. These do not scale as well as iteration in simple for loops. See itertools docs for groupby pseudo-code.
If performance is your main concern, stick with the for loop.
You are performing a reverse cumulative count on contiguous groups. We can create a Numpy cumulative count function with
import numpy as np
def cumcount(a):
a = np.asarray(a)
b = np.append(False, a[:-1] != a[1:])
c = b.cumsum()
r = np.arange(len(a))
return r - np.append(0, np.flatnonzero(b))[c] + 1
and then generate our result with
a = np.array(x_list)
cumcount(a[::-1])[::-1]
array([2, 1, 1, 3, 2, 1])
I would use a generator for this kind of task because it avoids building the resulting list incrementally and can be used lazily if one wanted:
def gen(iterable): # you have to think about a better name :-)
iterable = iter(iterable)
# Get the first element, in case that fails
# we can stop right now.
try:
last_seen = next(iterable)
except StopIteration:
return
count = 1
# Go through the remaining items
for item in iterable:
if item == last_seen:
count += 1
else:
# The consecutive run finished, return the
# desired values for the run and then reset
# counter and the new item for the next run.
yield from range(count, 0, -1)
count = 1
last_seen = item
# Return the result for the last run
yield from range(count, 0, -1)
This will also work if the input cannot be reversed (certain generators/iterators cannot be reversed):
>>> x_list = (i for i in range(10)) # it's a generator despite the variable name :-)
>>> ... arans solution ...
TypeError: 'generator' object is not reversible
>>> list(gen((i for i in range(10))))
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
And it works for your input:
>>> x_list = [1, 1, 2, 3, 3, 3]
>>> list(gen(x_list))
[2, 1, 1, 3, 2, 1]
This can actually be made simpler by using itertools.groupby:
import itertools
def gen(iterable):
for _, group in itertools.groupby(iterable):
length = sum(1 for _ in group) # or len(list(group))
yield from range(length, 0, -1)
>>> x_list = [1, 1, 2, 3, 3, 3]
>>> list(gen(x_list))
[2, 1, 1, 3, 2, 1]
I also did some benchmarks and according to these Aran-Feys solution is the fastest except for long lists where piRSquareds solution wins:
This was my benchmarking setup if you want to confirm the results:
from itertools import groupby, chain
import numpy as np
def gen1(iterable):
iterable = iter(iterable)
try:
last_seen = next(iterable)
except StopIteration:
return
count = 1
for item in iterable:
if item == last_seen:
count += 1
else:
yield from range(count, 0, -1)
count = 1
last_seen = item
yield from range(count, 0, -1)
def gen2(iterable):
for _, group in groupby(iterable):
length = sum(1 for _ in group)
yield from range(length, 0, -1)
def mseifert1(iterable):
return list(gen1(iterable))
def mseifert2(iterable):
return list(gen2(iterable))
def aran(x_list):
last_num = None
result = []
for num in reversed(x_list):
if num != last_num:
counter = 1
last_num = num
else:
counter += 1
result.append(counter)
return list(reversed(result))
def jpp(x_list):
gen = (range(len(list(j)), 0, -1) for _, j in groupby(x_list))
res = list(chain.from_iterable(gen))
return res
def cumcount(a):
a = np.asarray(a)
b = np.append(False, a[:-1] != a[1:])
c = b.cumsum()
r = np.arange(len(a))
return r - np.append(0, np.flatnonzero(b))[c] + 1
def pirsquared(x_list):
a = np.array(x_list)
return cumcount(a[::-1])[::-1]
from simple_benchmark import benchmark
import random
funcs = [mseifert1, mseifert2, aran, jpp, pirsquared]
args = {2**i: [random.randint(0, 5) for _ in range(2**i)] for i in range(1, 20)}
bench = benchmark(funcs, args, "list size")
%matplotlib notebook
bench.plot()
Python 3.6.5, NumPy 1.14
Here's a simple iterative approach to achieve it using collections.Counter:
from collections import Counter
x_list = [1, 1, 2, 3, 3, 3]
x_counter, run_list = Counter(x_list), []
for x in x_list:
run_list.append(x_counter[x])
x_counter[x] -= 1
which will return you run_list as:
[2, 1, 1, 3, 2, 1]
As an alternative, here's one-liner to achieve this using list comprehension with enumerate but it is not performance efficient due to iterative usage of list.index(..):
>>> [x_list[i:].count(x) for i, x in enumerate(x_list)]
[2, 1, 1, 3, 2, 1]
You can count the consecutive equal items and then add a countdown from count-of-items to 1 to the result:
def runs(p):
old = p[0]
n = 0
q = []
for x in p:
if x == old:
n += 1
else:
q.extend(range(n, 0, -1))
n = 1
old = x
q.extend(range(n, 0, -1))
return q
(A couple of minutes later) Oh, that's the same as MSeifert's code but without the iterable aspect. This version seems to be almost as fast as the method shown by Aran-Fey.
I am trying to write a for loop such that it only contains numbers between 1 and 7 (both inclusive) and the numbers get stored in a list.
The length of the list should be 8629.
This is the code which i try to run. I get this error: SyntaxError: invalid syntax
random_list = []
for i in range(0,8628):
x = i % 7
random_list[i].append(x+1)
while i == i + 1
print(random_list)
if your requirement is :
The length of the list should be 8629 and write a for loop such that it only contains numbers between 1 and 7. shouldn't it be like this?
from random import *
random_list = []
while len(random_list) < 5:
random_list.append(randint(1, 7))
print(random_list)
random_list = [i % 7 + 1 for i in range(8628)]
print(random_list)
The list will be :[1,2,3,4,5,6,7,...........]
This will give random integers in the range you specified:
Code
import random
random_list = [random.randint(1, 7) for _ in range(8629)]
Test
random_list[:10]
# [3, 4, 4, 5, 3, 1, 3, 7, 7, 6]
min(random_list), max(random_list)
# (1, 7)
len(random_list)
# 8629
For a cycle of consecutive numbers, try itertools:
Code
import itertools as it
cycled_list = list(it.islice(it.cycle(range(1, 8)), 8629))
Test
cycled_list[:10]
# [1, 2, 3, 4, 5, 6, 7, 1, 2, 3]
min(cycled_list), max(cycled_list)
# (1, 7)
len(cycled_list)
# 8629
See #Ming Chu's post for list compression variant.
Q: A run is a sequence of adjacent repeated values. Given a list, write a function to
determine the length of the longest run. For example, for the sequence [1, 2, 5, 5, 3, 1, 2, 4, 3, 2, 2, 2, 2, 3, 6, 5, 5, 6, 3, 1], the longest run is 4.
I am having trouble with this, I've written a code that finds the longest run consist of the number '2' but have yet to get the length of the run which is 4.
Here is my code so far (i've commented out a part that i was working on but don't pay attention to it):
# longestrun.py
# A function to determine the length of the longest run
# A run is a sequence of adjacent repeated values.
def longestrun(myList):
result = None
prev = None
size = 0
max_size = 0
for i in myList:
if i == prev:
size += 1
if size > max_size:
result = i
max_size = size
else:
size = 0
prev = i
return result
def main():
print("This program finds the length of the longest run within a given list.")
print("A run is a sequence of adjacent repeated values.")
myString = input("Please enter a list of objects (numbers, words, etc.) separated by
commas: ")
myList = myString.split(',')
longest_run = longestrun(myList)
print(">>>", longest_run, "<<<")
main()
Help please!!! :(((
You can do this in one line using itertools.groupby:
import itertools
max(sum(1 for _ in l) for n, l in itertools.groupby(lst))
This should work if you do not want to use itertools and imports.
a=[1, 2, 5, 5, 3, 1, 2, 4, 3, 2, 2, 2, 2, 3, 6, 5, 5, 6, 3, 1]
def longestrun(myList):
result = None
prev = None
size = 0
max_size = 0
for i in myList:
if i == prev:
print (i)
size += 1
if size > max_size:
print ('******* '+ str(max_size))
max_size = size
else:
size = 0
prev = i
print (max_size+1)
return max_size+1
longestrun(a)
Just another way of doing it:
def longestrun(myList):
sett = set()
size = 1
for ind, elm in enumerate(myList):
if ind > 0:
if elm == myList[ind - 1]:
size += 1
else:
sett.update([size])
size = 1
sett.update([size])
return max(sett)
myList = [1, 2, 5, 5, 3, 1, 2, 4, 3, 2, 2, 2, 2, 3, 6, 5, 5, 6, 3, 1]
print longestrun(myList)
def getSublists(L,n):
outL=[]
for i in range(0,len(L)-n+1):
outL.append(L[i:i+n])
return outL
def longestRun(L):
for n in range(len(L), 0, -1):
temp=getSublists(L,n)
for subL in temp:
if subL==sorted(subL):
return len(subL)
def longestrun(myList):
size = 1
max_size = 0
for i in range(len(myList)-1):
if myList[i+1] = myList[i]:
size += 1
else:
size = 1
if max_size<size:
max_size = size
return size
Remove the .split() from myList in main() and you're good to go with this.
As an update to David Robinson's answer, it is now (Python 3.4) possible to return 0 on an empty sequence (instead of raising ValueError):
import itertools
max((sum(1 for _ in l) for n, l in itertools.groupby(lst)), default=0)
I have a sparse matrix. I need to sort this matrix row-by-row and create another [sparse] matrix.
Code may explain it better:
# for `rand` function, you need newer version of scipy.
from scipy.sparse import *
m = rand(6,6, density=0.6)
d = m.getrow(0)
print d
Output1
(0, 5) 0.874881629788
(0, 4) 0.352559852239
(0, 2) 0.504791645463
(0, 1) 0.885898140175
I have this m matrix. I want to create a new matrix with sorted version of m. The new matrix
contains 0'th row like this.
new_d = new_m.getrow(0)
print new_d
Output2
(0, 1) 0.885898140175
(0, 5) 0.874881629788
(0, 2) 0.504791645463
(0, 4) 0.352559852239
So I can obtain which column is bigger etc:
print new_d.indices
Output3
array([1, 5, 2, 4])
Of course every row should be sorted like above independently.
I have one solution for this problem but it is not elegant.
If you're willing to ignore the zero-value elements of the matrix, the code below should work. It is also much faster than implementations that use the getrow method, which is rather slow.
from itertools import izip
def sort_coo(m):
tuples = izip(m.row, m.col, m.data)
return sorted(tuples, key=lambda x: (x[0], x[2]))
For example:
>>> from numpy.random import rand
>>> from scipy.sparse import coo_matrix
>>>
>>> d = rand(10, 20)
>>> d[d > .05] = 0
>>> s = coo_matrix(d)
>>> sort_coo(s)
[(0, 2, 0.004775589084940246),
(3, 12, 0.029941507166614145),
(5, 19, 0.015030386789436245),
(7, 0, 0.0075044957259399192),
(8, 3, 0.047994403933129481),
(8, 5, 0.049401058471327031),
(9, 15, 0.040011608000125043),
(9, 8, 0.048541825332137023)]
Depending on your needs you may want to tweak the sort keys in the lambda or further process the output. If you want everything in a row indexed dictionary you could do:
from collections import defaultdict
sorted_rows = defaultdict(list)
for i in sort_coo(m):
sorted_rows[i[0]].append((i[1], i[2]))
My bad solution is like this:
from scipy.sparse import coo_matrix
import numpy as np
a = []
for i in xrange(m.shape[0]): # assume m is square matrix.
d = m.getrow(i)
n = len(d.indices)
s = zip([i]*n, d.indices, d.data)
sorted_s = sorted(s, key=lambda v: v[2], reverse=True)
a.extend(sorted_s)
a = np.array(a)
new_m = coo_matrix((a[:,2], (a[:,0], a[:,1])), m.shape)
There can be some simple mistakes above because I have not checked it yet. But the idea is intuitive, I guess. Is there any good solution?
Edit
This new matrix creation may be useless because if you call getrow method then the order is broken again.
Only coo_matrix.col keeps the order.
Another Solution
This one is not exact solution but it may be helpful:
def sortSparseMatrix(m, rev=True, only_indices=True):
""" Sort a sparse matrix and return column index dictionary
"""
col_dict = dict()
for i in xrange(m.shape[0]): # assume m is square matrix.
d = m.getrow(i)
s = zip(d.indices, d.data)
sorted_s = sorted(s, key=lambda v: v[1], reverse=True)
if only_indices:
col_dict[i] = [element[0] for element in sorted_s]
else:
col_dict[i] = sorted_s
return col_dict
>>> print sortSparseMatrix(m)
{0: [5, 1, 0],
1: [1, 3, 5],
2: [1, 2, 3, 4],
3: [1, 5, 2, 4],
4: [0, 3, 5, 1],
5: [3, 4, 2]}