I want to implement below logic in c++ using python.
struct hash_string ///
{
hash_string() {}
uint32_t operator ()(const std::string &text) const
{
//std::cout << text << std::endl;
static const uint32_t primes[16] =
{
0x01EE5DB9, 0x491408C3, 0x0465FB69, 0x421F0141,
0x2E7D036B, 0x2D41C7B9, 0x58C0EF0D, 0x7B15A53B,
0x7C9D3761, 0x5ABB9B0B, 0x24109367, 0x5A5B741F,
0x6B9F12E9, 0x71BA7809, 0x081F69CD, 0x4D9B740B,
};
//std::cout << text.size() << std::endl;
uint32_t sum = 0;
for (size_t i = 0; i != text.size(); i ++) {
sum += primes[i & 15] * (unsigned char)text[i];
//std::cout << text[i] <<std::endl;
// std::cout << (unsigned char)text[i] << std::endl;
}
return sum;
}
};
python version is like this, which is not completed yet, since I haven't found a way to convert text to unsigned char. So, please help!
# -*- coding: utf-8 -*-
text = u'连衣裙女韩范'
primes = [0x01EE5DB9, 0x491408C3, 0x0465FB69, 0x421F0141,
0x2E7D036B, 0x2D41C7B9, 0x58C0EF0D, 0x7B15A53B,
0x7C9D3761, 0x5ABB9B0B, 0x24109367, 0x5A5B741F,
0x6B9F12E9, 0x71BA7809, 0x081F69CD, 0x4D9B740B]
//*text[i] does not work (of course), but how to mimic the logic above
rand = [primes[i & 15]***text[i]** for i in range(len(text))]
print rand
sum_agg = sum(rand)
print sum_agg
Take text=u'连衣裙女韩范' for example, c++ version returns 18 for text.size() and sum is 2422173716, while, in python, I don't know how to make it 18.
The equality of text size is essential, as a start at least.
Because you are using unicode, for an exact reproduction you will need to turn text in a series of bytes (chars in c++).
bytes_ = text.encode("utf8")
# when iterated over this will yield ints (in python 3)
# or single character strings in python 2
You should use more pythonic idioms for iterating over a pair of sequences
pairs = zip(bytes_, primes)
What if bytes_ is longer than primes? Use itertools.cycle
from itertools import cycle
pairs = zip(bytes_, cycle(primes))
All together:
from itertools import cycle
text = u'连衣裙女韩范'
primes = [0x01EE5DB9, 0x491408C3, 0x0465FB69, 0x421F0141,
0x2E7D036B, 0x2D41C7B9, 0x58C0EF0D, 0x7B15A53B,
0x7C9D3761, 0x5ABB9B0B, 0x24109367, 0x5A5B741F,
0x6B9F12E9, 0x71BA7809, 0x081F69CD, 0x4D9B740B]
# if python 3
rand = [byte * prime for byte, prime in zip(text.encode("utf8"), cycle(primes))]
# else if python 2 (use ord to convert single character string to int)
rand = [ord(byte) * prime for byte, prime in zip(text.encode("utf8"), cycle(primes))]
hash_ = sum(rand)
Related
Can anyone tell me why these two programs have a huge difference in run time? I am simply multiplying two large complex arrays and comparing the time in python (numpy) and c++. I am using the -O3 flag with g++ to compile this C++ code. I find that the huge difference comes only when I use complex floats in C++, its more than 20 times faster in numpy.
python code:
import numpy as np
import time
if __name__ == "__main__":
# check the data type is the same
a = np.zeros((1), dtype=np.complex128)
a[0] = np.complex(3.4e38,3.5e38)
print(a)
b = np.zeros((1), dtype=np.complex64)
b[0] = np.complex(3.4e38,3.5e38)
print(b) # imaginary part is infinity
length = 5000;
A = np.ones((length), dtype=np.complex64) * np.complex(1,1)
B = np.ones((length), dtype=np.complex64) * np.complex(1,0)
num_iterations = 1000000
time1 = time.time()
for _ in range(num_iterations):
A *= B
time2 = time.time()
duration = ((time2 - time1)*1e6)/num_iterations
print(duration)
C++ code:
#include <iostream>
#include <complex>
#include <chrono>
using namespace std::chrono;
using namespace std;
int main()
{
// check the data type is the same
complex<double> a = complex<double>(3.4e38, 3.5e38);
cout << a << endl;
complex<float> b = complex<float>(3.4e38, 3.5e38);
cout << b << endl; // imaginary part is infinity
const int length = 5000;
static complex<float> A[length];
static complex<float> B[length];
for(int i=0; i < length; i++) {
A[i] = complex<float>(1,1);
B[i] = complex<float>(1,0);
}
int num_iterations = 1000000;
auto time1 = high_resolution_clock::now();
for(int k=0; k < num_iterations; k++)
for(int i=0; i < length; i++)
A[i] *= B[i];
auto time2 = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(time2 - time1);
cout << "average time:" << duration.count() / num_iterations << endl;
}
The C++ compiler is doing some extra checking gymnastics for you in order to properly handle NaNs and other such "standard" behavior.
If you add the -ffast-math optimization flag, you'll get more sane speed, but less "standard" behavior. e.g. complex<float>(inf,0)*complex<float>(inf,0) won't be evaluated as complex<float>(inf,0). Do you really care?
numpy is doing what makes sense, not hindered by a narrow reading of the C++ standard.
e.g. until very recent g++ versions, the latter of the following functions is much faster unless -ffast-math is used.
complex<float> mul1( complex<float> a,complex<float> b)
{
return a*b;
}
complex<float> mul2( complex<float> a,complex<float> b)
{
float * fa = reinterpret_cast<float*>(&a);
const float * fb = reinterpret_cast<float*>(&b);
float cr = fa[0]*fb[0] - fa[1]*fb[1];
float ci = fa[0]*fb[1] + fa[1]*fb[0];
return complex<float>(cr,ci);
}
You can experiment with this on https://godbolt.org/z/kXPgCh for the assembly output and how the former function defaults to calling __mulsc3
P.S. Ready for another wave of anger at what the C++ standard says about std::complex<T>? Can you guess how std::norm must be implemented by default? Play along. Follow the link and spend ten seconds thinking about it.
Spoiler: it probably is using a sqrt then squaring it.
I'm experiencing a slightly bizarre performance discrepancy between two equatable programs and I cannot reason about the difference for any real reason.
I'm solving Project Euler Problem 46. Both code solutions (one in Python and one in Cpp) get the right answer. However, the python solution seems to be more performant, which is contradictory to what I was expecting.
Do not worry about the actual algorithm being optimal - all I care about is that they are two equatable programs. I'm sure there is a more optimal algorithm.
Python Solution
import math
import time
UPPER_LIMIT = 1000000
HIT_COUNT = 0
def sieveOfErato(number):
sieve = [True] * number
for i in xrange(2, int(math.ceil(math.sqrt(number)))):
if sieve[i]:
for j in xrange(i**2, number, i):
sieve[j] = False
primes = [i for i, val in enumerate(sieve) if i > 1 and val == True]
return set(primes)
def isSquare(number):
ans = math.sqrt(number).is_integer()
return ans
def isAppropriateGolbachNumber(number, possiblePrimes):
global HIT_COUNT
for possiblePrime in possiblePrimes:
if possiblePrime < number:
HIT_COUNT += 1
difference = number - possiblePrime
if isSquare(difference / 2):
return True
return False
if __name__ == '__main__':
start = time.time()
primes = sieveOfErato(UPPER_LIMIT)
answer = -1
for odd in xrange(3, UPPER_LIMIT, 2):
if odd not in primes:
if not isAppropriateGolbachNumber(odd, primes):
answer = odd
break
print('Hit Count: {}'.format(HIT_COUNT))
print('Loop Elapsed Time: {}'.format(time.time() - start))
print('Answer: {}'.format(answer))
C++ Solution
#include <iostream>
#include <unordered_set>
#include <vector>
#include <math.h>
#include <cstdio>
#include <ctime>
int UPPER_LIMIT = 1000000;
std::unordered_set<int> sieveOfErato(int number)
{
std::unordered_set<int> primes;
bool sieve[number+1];
memset(sieve, true, sizeof(sieve));
for(int i = 2; i * i <= number; i++)
{
if (sieve[i] == true)
{
for (int j = i*i; j < number; j+=i)
{
sieve[j] = false;
}
}
}
for(int i = 2; i < number; i++)
{
if (sieve[i] == true)
{
primes.insert(i);
}
}
return primes;
}
bool isPerfectSquare(const int& number)
{
int root(round(sqrt(number)));
return number == root * root;
}
int hitCount = 0;
bool isAppropriateGoldbachNumber(const int& number, const std::unordered_set<int>& primes)
{
int difference;
for (const auto& prime : primes)
{
if (prime < number)
{
hitCount++;
difference = (number - prime)/2;
if (isPerfectSquare(difference))
{
return true;
}
}
}
return false;
}
int main(int argc, char** argv)
{
std::clock_t start;
double duration;
start = std::clock();
std::unordered_set<int> primes = sieveOfErato(UPPER_LIMIT);
int answer = -1;
for(int odd = 3; odd < UPPER_LIMIT; odd+=2)
{
if (primes.find(odd) == primes.end())
{
if (!isAppropriateGoldbachNumber(odd, primes))
{
answer = odd;
break;
}
}
}
duration = (std::clock() - start) / (double) CLOCKS_PER_SEC;
std::cout << "Hit Count: " << hitCount << std::endl;
std::cout << std::fixed << "Loop Elapsed Time: " << duration << std::endl;
std::cout << "Answer: " << answer << std::endl;
}
I'm compiling my cpp code by g++ -std=c++14 file.cpp and then executing with just ./a.out.
On a couple of test runs just using the time command from the command line, I get:
Python
Hit Count: 128854
Loop Elapsed Time: 0.393740177155
Answer: 5777
real 0m0.525s
user 0m0.416s
sys 0m0.049s
C++
Hit Count: 90622
Loop Elapsed Time: 0.993970
Answer: 5777
real 0m1.027s
user 0m0.999s
sys 0m0.013s
Why would there be more hits in the python version and it still be returning more quickly? I would think that more hits, means more iterations, means slower (and it's in python). I'm guessing that there's just a performance blunder in my cpp code, but I haven't found it yet. Any ideas?
I concur with Kunal Puri's answer that a better algorithm and data-structure can improve performance, but it does not answer the core question: Why does the same algorithm, that uses the same data-structure, runs faster with python.
It all boils down to the difference between std::unordered_set and python's set. Note that the same C++ code with std::set runs faster than python's alternative, and if optimization is enabled (with -O2) then C++ code with std::set runs more than 10 times faster than python.
There are several works showing that, and why, std::unordered_set is broken performance-wise. For example you can watch C++Now 2018: You Can Do Better than std::unordered_map: New Improvements to Hash Table Performance. It seems that python does not suffer from these design flaws in its set.
One of the things that make std::unordered_set so poor is the big amount of indirections it mandates to simply reach an element. For example, during iteration, the iterator points to a bucket before the current bucket. Another thing to consider is the poorer cache locality. The set of python seems to prefer to retain the original order of elements, but the GCC's std::unordered_set tends to create a random order. This is the cause of the difference in HIT_COUNT between C++ and python. Once the code starts to use std::set then the HIT_COUNT becomes the same for C++ and python. Retaining the original order during iteration tends to improves the cache locality of nodes in a new process, since they are iterated in the same order as they are allocated (and two adjacent allocations, of a new process, have higher chance to be allocated in consecutive memory addresses).
Apart from compiler optimization as suggested by DYZ, I have some more observations regarding optimization.
1) Use std::vector instead of std::unordered_set.
In your code, you are doing this:
std::unordered_set<int> sieveOfErato(int number)
{
std::unordered_set<int> primes;
bool sieve[number+1];
memset(sieve, true, sizeof(sieve));
for(int i = 2; i * i <= number; i++)
{
if (sieve[i] == true)
{
for (int j = i*i; j < number; j+=i)
{
sieve[j] = false;
}
}
}
for(int i = 2; i < number; i++)
{
if (sieve[i] == true)
{
primes.insert(i);
}
}
return primes;
}
I don't see any reason of using std::unordered_set here. Instead, you could do this:
std::vector<int> sieveOfErato(int number)
{
bool sieve[number+1];
memset(sieve, true, sizeof(sieve));
int numPrimes = 0;
for(int i = 2; i * i <= number; i++)
{
if (sieve[i] == true)
{
for (int j = i*i; j < number; j+=i)
{
sieve[j] = false;
}
numPrimes++;
}
}
std::vector<int> primes(numPrimes);
int j = 0;
for(int i = 2; i < number; i++)
{
if (sieve[i] == true)
{
primes[j++] = i;
}
}
return primes;
}
As far as find() is concerned, you may do this:
int j = 0;
for(int odd = 3; odd < UPPER_LIMIT; odd+=2)
{
while (j < primes.size() && primes[j] < odd) {
j++;
}
if (primes[j] != odd)
{
if (!isAppropriateGoldbachNumber(odd, primes))
{
answer = odd;
break;
}
}
}
2) Pre Compute perfect squares in a std::vector before hand instead of calling sqrt always.
Efficient way to count number of 1s in the binary representation of a number in O(1) if you have enough memory to play with. This is an interview question I found on an online forum, but it had no answer. Can somebody suggest something, I cant think of a way to do it in O(1) time?
That's the Hamming weight problem, a.k.a. population count. The link mentions efficient implementations. Quoting:
With unlimited memory, we could simply create a large lookup table of the Hamming weight of every 64 bit integer
I've got a solution that counts the bits in O(Number of 1's) time:
bitcount(n):
count = 0
while n > 0:
count = count + 1
n = n & (n-1)
return count
In worst case (when the number is 2^n - 1, all 1's in binary) it will check every bit.
Edit:
Just found a very nice constant-time, constant memory algorithm for bitcount. Here it is, written in C:
int BitCount(unsigned int u)
{
unsigned int uCount;
uCount = u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111);
return ((uCount + (uCount >> 3)) & 030707070707) % 63;
}
You can find proof of its correctness here.
Please note the fact that: n&(n-1) always eliminates the least significant 1.
Hence we can write the code for calculating the number of 1's as follows:
count=0;
while(n!=0){
n = n&(n-1);
count++;
}
cout<<"Number of 1's in n is: "<<count;
The complexity of the program would be: number of 1's in n (which is constantly < 32).
I saw the following solution from another website:
int count_one(int x){
x = (x & (0x55555555)) + ((x >> 1) & (0x55555555));
x = (x & (0x33333333)) + ((x >> 2) & (0x33333333));
x = (x & (0x0f0f0f0f)) + ((x >> 4) & (0x0f0f0f0f));
x = (x & (0x00ff00ff)) + ((x >> 8) & (0x00ff00ff));
x = (x & (0x0000ffff)) + ((x >> 16) & (0x0000ffff));
return x;
}
public static void main(String[] args) {
int a = 3;
int orig = a;
int count = 0;
while(a>0)
{
a = a >> 1 << 1;
if(orig-a==1)
count++;
orig = a >> 1;
a = orig;
}
System.out.println("Number of 1s are: "+count);
}
countBits(x){
y=0;
while(x){
y += x & 1 ;
x = x >> 1 ;
}
}
thats it?
Below are two simple examples (in C++) among many by which you can do this.
We can simply count set bits (1's) using __builtin_popcount().
int numOfOnes(int x) {
return __builtin_popcount(x);
}
Loop through all bits in an integer, check if a bit is set and if it is then increment the count variable.
int hammingDistance(int x) {
int count = 0;
for(int i = 0; i < 32; i++)
if(x & (1 << i))
count++;
return count;
}
That will be the shortest answer in my SO life: lookup table.
Apparently, I need to explain a bit: "if you have enough memory to play with" means, we've got all the memory we need (nevermind technical possibility). Now, you don't need to store lookup table for more than a byte or two. While it'll technically be Ω(log(n)) rather than O(1), just reading a number you need is Ω(log(n)), so if that's a problem, then the answer is, impossible—which is even shorter.
Which of two answers they expect from you on an interview, no one knows.
There's yet another trick: while engineers can take a number and talk about Ω(log(n)), where n is the number, computer scientists will say that actually we're to measure running time as a function of a length of an input, so what engineers call Ω(log(n)) is actually Ω(k), where k is the number of bytes. Still, as I said before, just reading a number is Ω(k), so there's no way we can do better than that.
Below will work as well.
nofone(int x) {
a=0;
while(x!=0) {
x>>=1;
if(x & 1)
a++;
}
return a;
}
The following is a C solution using bit operators:
int numberOfOneBitsInInteger(int input) {
int numOneBits = 0;
int currNum = input;
while (currNum != 0) {
if ((currNum & 1) == 1) {
numOneBits++;
}
currNum = currNum >> 1;
}
return numOneBits;
}
The following is a Java solution using powers of 2:
public static int numOnesInBinary(int n) {
if (n < 0) return -1;
int j = 0;
while ( n > Math.pow(2, j)) j++;
int result = 0;
for (int i=j; i >=0; i--){
if (n >= Math.pow(2, i)) {
n = (int) (n - Math.pow(2,i));
result++;
}
}
return result;
}
The function takes an int and returns the number of Ones in binary representation
public static int findOnes(int number)
{
if(number < 2)
{
if(number == 1)
{
count ++;
}
else
{
return 0;
}
}
value = number % 2;
if(number != 1 && value == 1)
count ++;
number /= 2;
findOnes(number);
return count;
}
I came here having a great belief that I know beautiful solution for this problem. Code in C:
short numberOfOnes(unsigned int d) {
short count = 0;
for (; (d != 0); d &= (d - 1))
++count;
return count;
}
But after I've taken a little research on this topic (read other answers:)) I found 5 more efficient algorithms. Love SO!
There is even a CPU instruction designed specifically for this task: popcnt.
(mentioned in this answer)
Description and benchmarking of many algorithms you can find here.
The best way in javascript to do so is
function getBinaryValue(num){
return num.toString(2);
}
function checkOnces(binaryValue){
return binaryValue.toString().replace(/0/g, "").length;
}
where binaryValue is the binary String eg: 1100
There's only one way I can think of to accomplish this task in O(1)... that is to 'cheat' and use a physical device (with linear or even parallel programming I think the limit is O(log(k)) where k represents the number of bytes of the number).
However you could very easily imagine a physical device that connects each bit an to output line with a 0/1 voltage. Then you could just electronically read of the total voltage on a 'summation' line in O(1). It would be quite easy to make this basic idea more elegant with some basic circuit elements to produce the output in whatever form you want (e.g. a binary encoded output), but the essential idea is the same and the electronic circuit would produce the correct output state in fixed time.
I imagine there are also possible quantum computing possibilities, but if we're allowed to do that, I would think a simple electronic circuit is the easier solution.
I have actually done this using a bit of sleight of hand: a single lookup table with 16 entries will suffice and all you have to do is break the binary rep into nibbles (4-bit tuples). The complexity is in fact O(1) and I wrote a C++ template which was specialized on the size of the integer you wanted (in # bits)… makes it a constant expression instead of indetermined.
fwiw you can use the fact that (i & -i) will return you the LS one-bit and simply loop, stripping off the lsbit each time, until the integer is zero — but that’s an old parity trick.
The below method can count the number of 1s in negative numbers as well.
private static int countBits(int number) {
int result = 0;
while(number != 0) {
result += number & 1;
number = number >>> 1;
}
return result;
}
However, a number like -1 is represented in binary as 11111111111111111111111111111111 and so will require a lot of shifting. If you don't want to do so many shifts for small negative numbers, another way could be as follows:
private static int countBits(int number) {
boolean negFlag = false;
if(number < 0) {
negFlag = true;
number = ~number;
}
int result = 0;
while(number != 0) {
result += number & 1;
number = number >> 1;
}
return negFlag? (32-result): result;
}
In python or any other convert to bin string then split it with '0' to get rid of 0's then combine and get the length.
len(''.join(str(bin(122011)).split('0')))-1
By utilizing string operations of JS one can do as follows;
0b1111011.toString(2).split(/0|(?=.)/).length // returns 6
or
0b1111011.toString(2).replace("0","").length // returns 6
I had to golf this in ruby and ended up with
l=->x{x.to_s(2).count ?1}
Usage :
l[2**32-1] # returns 32
Obviously not efficient but does the trick :)
Ruby implementation
def find_consecutive_1(n)
num = n.to_s(2)
arr = num.split("")
counter = 0
max = 0
arr.each do |x|
if x.to_i==1
counter +=1
else
max = counter if counter > max
counter = 0
end
max = counter if counter > max
end
max
end
puts find_consecutive_1(439)
Two ways::
/* Method-1 */
int count1s(long num)
{
int tempCount = 0;
while(num)
{
tempCount += (num & 1); //inc, based on right most bit checked
num = num >> 1; //right shift bit by 1
}
return tempCount;
}
/* Method-2 */
int count1s_(int num)
{
int tempCount = 0;
std::string strNum = std::bitset< 16 >( num ).to_string(); // string conversion
cout << "strNum=" << strNum << endl;
for(int i=0; i<strNum.size(); i++)
{
if('1' == strNum[i])
{
tempCount++;
}
}
return tempCount;
}
/* Method-3 (algorithmically - boost string split could be used) */
1) split the binary string over '1'.
2) count = vector (containing splits) size - 1
Usage::
int count = 0;
count = count1s(0b00110011);
cout << "count(0b00110011) = " << count << endl; //4
count = count1s(0b01110110);
cout << "count(0b01110110) = " << count << endl; //5
count = count1s(0b00000000);
cout << "count(0b00000000) = " << count << endl; //0
count = count1s(0b11111111);
cout << "count(0b11111111) = " << count << endl; //8
count = count1s_(0b1100);
cout << "count(0b1100) = " << count << endl; //2
count = count1s_(0b11111111);
cout << "count(0b11111111) = " << count << endl; //8
count = count1s_(0b0);
cout << "count(0b0) = " << count << endl; //0
count = count1s_(0b1);
cout << "count(0b1) = " << count << endl; //1
A Python one-liner
def countOnes(num):
return bin(num).count('1')
I am using scipy's weave.inline to perform computationally expensive tasks. I have problems returning an one-dimensional array back into the python scope. Weave.inline uses a special argument called "return_val" for the purpose of returning values back into the python scope.
The following example returning an integer value works well:
>>> from scipy.weave import inline
>>> print inline(r'''int N = 10; return_val = N;''')
10
However the following example, which indeed compiles without prompting an error, does not return the array i would expect:
>>> from scipy.weave import inline
>>> code =\
r'''
int* pairs;
int lenght = 0;
for (int i=0;i<N;i++){
lenght += 1;
pairs = (int *)malloc(sizeof(int)*lenght);
pairs[i] = i;
std::cout << pairs[i] << std::endl;
}
return_val = pairs;
'''
>>> N = 5
>>> R = inline(code,['N'])
>>> print "RETURN_VAL:",R
0
1
2
3
4
RETURN_VAL: 1
I need to reallocate the size of the array "pairs" dynamically which is why I can't pass a numpy.array or python list per se.
All you need to do is use the raw python c-api calls, or if you're looking for something a bit more convenient, the built in scipy weave wrappers.
No guarantees about leaks or efficiency, but it should look something a bit like this:
from scipy.weave import inline
code = r'''
py::list ret;
for(int i = 0; i < N; i++) {
py::list item;
for(int j = 0; j < i; j++) {
item.append(j);
}
ret.append(item);
}
return_val = ret;
'''
N = 5
R = inline(code,['N'])
print R
If you absolutely don't know the size of the output array in advance, you must create it in your inline code. I'm pretty sure that your array allocated by using malloc will result in leaked memory since you have no way of controlling when this memory is to be freed.
The solution is to create a numpy array, fill it with your function's results and return it.
import scipy.weave
code = r"""
npy_intp dims[1] = {n};
PyObject* out_array = PyArray_SimpleNew(1, dims, NPY_DOUBLE);
double* data = (double*) ((PyArrayObject*) out_array)->data;
for (int i=0; i<n; ++i) data[i] = i;
return_val = out_array;
Py_XDECREF(out_array);
"""
n = 5
out_array = scipy.weave.inline(code, ["n"])
print "Array:", out_array
I have to do a program that gives all permutations of n numbers {1,2,3..n} using backtracking. I managed to do it in C, and it works very well, here is the code:
int st[25], n=4;
int valid(int k)
{
int i;
for (i = 1; i <= k - 1; i++)
if (st[k] == st[i])
return 0;
return 1;
}
void bktr(int k)
{
int i;
if (k == n + 1)
{
for (i = 1; i <= n; i++)
printf("%d ", st[i]);
printf("\n");
}
else
for (i = 1; i <= n; i++)
{
st[k] = i;
if (valid(k))
bktr(k + 1);
}
}
int main()
{
bktr(1);
return 0;
}
Now I have to write it in Python. Here is what I did:
st=[]
n=4
def bktr(k):
if k==n+1:
for i in range(1,n):
print (st[i])
else:
for i in range(1,n):
st[k]=i
if valid(k):
bktr(k+1)
def valid(k):
for i in range(1,k-1):
if st[k]==st[i]:
return 0
return 1
bktr(1)
I get this error:
list assignment index out of range
at st[k]==st[i].
Python has a "permutations" functions in the itertools module:
import itertools
itertools.permutations([1,2,3])
If you need to write the code yourself (for example if this is homework), here is the issue:
Python lists do not have a predetermined size, so you can't just set e.g. the 10th element to 3. You can only change existing elements or add to the end.
Python lists (and C arrays) also start at 0. This means you have to access the first element with st[0], not st[1].
When you start your program, st has a length of 0; this means you can not assign to st[1], as it is not the end.
If this is confusing, I recommend you use the st.append(element) method instead, which always adds to the end.
If the code is done and works, I recommend you head over to code review stack exchange because there are a lot more things that could be improved.