counting sort - negative integers

counting sort - negative integers - python

I'm trying to do a counting_sort on negative numbers, the following code only seems to work on positive integers. Any pointers on how I can fix it to work on negative numbers?
def count_sort(l):
output = [0] * len(l)
low = min(l)
if low >= 0:
low = 0
high = max(l)
working = [0 for i in range(low, high+1)]
for i in l:
workng[i] += 1
for j in range(1, len(working)):
working[j] += working[j-1]
for k in reversed(ul):
output[working[k] - 1] = k
working[k] -= 1
return output

Counting sort can work for any bounded range of integers, they don't all need to be positive. To make your code work for negative values (and work without wasting lots of memory for positive values that are all far from zero, e.g. [1e10, 1e10+1, 1e10+2]), just offset your indexes into count_array by the minimum value in the input:
def counting_sort(unsorted):
result = [0] * len(unsorted)
low = min(unsorted) # we don't care if this is positive or negative any more!
high = max(unsorted)
count_array = [0 for i in range(low, high+1)]
for i in unsorted:
count_array[i-low] += 1 # use an offset index
for j in range(1, len(count_array)):
count_array[j] += count_array[j-1]
for k in reversed(unsorted):
result[count_array[k-low] - 1] = k # here too
count_array[k-low] -= 1 # and here
return result

When you index count_array with negative elements, your algorithm doesn't work. Python will interpret count_array[-1] as the last element of count_array
So you can either change the code to take that into account or shift all elements before sorting
unsorted = [e - low for e in unsorted]
and at the end change it back
result = [e + low for e in result]

Related

Making the complexity smaller (better)

I have an algorithm that looks for the good pairs in a list of numbers. A good pair is being considered as index i being less than j and arr[i] < arr[j]. It currently has a complexity of O(n^2) but I want to make it O(nlogn) based on divide and conquering. How can I go about doing that?
Here's the algorithm:
def goodPairs(nums):
count = 0
for i in range(0,len(nums)):
for j in range(i+1,len(nums)):
if i < j and nums[i] < nums[j]:
count += 1
j += 1
j += 1
return count
Here's my attempt at making it but it just returns 0:
def goodPairs(arr):
count = 0
if len(arr) > 1:
# Finding the mid of the array
mid = len(arr)//2
# Dividing the array elements
left_side = arr[:mid]
# into 2 halves
right_side = arr[mid:]
# Sorting the first half
goodPairs(left_side)
# Sorting the second half
goodPairs(right_side)
for i in left_side:
for j in right_side:
if i < j:
count += 1
return count

The current previously accepted answer by Fire Assassin doesn't really answer the question, which asks for better complexity. It's still quadratic, and about as fast as a much simpler quadratic solution. Benchmark with 2000 shuffled ints:
387.5 ms original
108.3 ms pythonic
104.6 ms divide_and_conquer_quadratic
4.1 ms divide_and_conquer_nlogn
4.6 ms divide_and_conquer_nlogn_2
Code (Try it online!):
def original(nums):
count = 0
for i in range(0,len(nums)):
for j in range(i+1,len(nums)):
if i < j and nums[i] < nums[j]:
count += 1
j += 1
j += 1
return count
def pythonic(nums):
count = 0
for i, a in enumerate(nums, 1):
for b in nums[i:]:
if a < b:
count += 1
return count
def divide_and_conquer_quadratic(arr):
count = 0
left_count = 0
right_count = 0
if len(arr) > 1:
mid = len(arr) // 2
left_side = arr[:mid]
right_side = arr[mid:]
left_count = divide_and_conquer_quadratic(left_side)
right_count = divide_and_conquer_quadratic(right_side)
for i in left_side:
for j in right_side:
if i < j:
count += 1
return count + left_count + right_count
def divide_and_conquer_nlogn(arr):
mid = len(arr) // 2
if not mid:
return 0
left = arr[:mid]
right = arr[mid:]
count = divide_and_conquer_nlogn(left)
count += divide_and_conquer_nlogn(right)
i = 0
for r in right:
while i < mid and left[i] < r:
i += 1
count += i
arr[:] = left + right
arr.sort() # linear, as Timsort takes advantage of the two sorted runs
return count
def divide_and_conquer_nlogn_2(arr):
mid = len(arr) // 2
if not mid:
return 0
left = arr[:mid]
right = arr[mid:]
count = divide_and_conquer_nlogn_2(left)
count += divide_and_conquer_nlogn_2(right)
i = 0
arr.clear()
append = arr.append
for r in right:
while i < mid and left[i] < r:
append(left[i])
i += 1
append(r)
count += i
arr += left[i:]
return count
from timeit import timeit
from random import shuffle
arr = list(range(2000))
shuffle(arr)
funcs = [
original,
pythonic,
divide_and_conquer_quadratic,
divide_and_conquer_nlogn,
divide_and_conquer_nlogn_2,
]
for func in funcs:
print(func(arr[:]))
for _ in range(3):
print()
for func in funcs:
arr2 = arr[:]
t = timeit(lambda: func(arr2), number=1)
print('%5.1f ms ' % (t * 1e3), func.__name__)

One of the most well-known divide-and-conquer algorithms is merge sort. And merge sort is actually a really good foundation for this algorithm.
The idea is that when comparing two numbers from two different 'partitions', you already have a lot of information about the remaining part of these partitions, as they're sorted in every iteration.
Let's take an example!
Consider the following partitions, which has already been sorted individually and "good pairs" have been counted.
Partition x: [1, 3, 6, 9].
Partition y: [4, 5, 7, 8].
It is important to note that the numbers from partition x is located further to the left in the original list than partition y. In particular, for every element in x, it's corresponding index i must be smaller than some index j for every element in y.
We will start of by comparing 1 and 4. Obviously 1 is smaller than 4. But since 4 is the smallest element in partition y, 1 must also be smaller than the rest of the elements in y. Consequently, we can conclude that there is 4 additional good pairs, since the index of 1 is also smaller than the index of the remaining elements of y.
The exact same thing happens with 3, and we can add 4 new good pairs to the sum.
For 6 we will conclude that there is two new good pairs. The comparison between 6 and 4 did not yield a good pair and likewise for 6 and 5.
You might now notice how these additional good pairs would be counted? Basically if the element from x is less than the element from y, add the number of elements remaining in y to the sum. Rince and repeat.
Since merge sort is an O(n log n) algorithm, and the additional work in this algorithm is constant, we can conclude that this algorithm is also an O(n log n) algorithm.
I will leave the actual programming as an exercise for you.

#niklasaa has added an explanation for the merge sort analogy, but your implementation still has an issue.
You are partitioning the array and calculating the result for either half, but
You haven't actually sorted either half. So when you're comparing their elements, your two pointer approach isn't correct.
You haven't used their results in the final computation. That's why you're getting an incorrect answer.
For point #1, you should look at merge sort, especially the merge() function. That logic is what will give you the correct pair count without having O(N^2) iteration.
For point #2, store the result for either half first:
# Sorting the first half
leftCount = goodPairs(left_side)
# Sorting the second half
rightCount = goodPairs(right_side)
While returning the final count, add these two results as well.
return count + leftCount + rightCount

Like #Abhinav Mathur stated, you have most of the code down, your problem is with these lines:
# Sorting the first half
goodPairs(left_side)
# Sorting the second half
goodPairs(right_side)
You want to store these in variables that should be declared before the if statement. Here's an updated version of your code:
def goodPairs(arr):
count = 0
left_count = 0
right_count = 0
if len(arr) > 1:
mid = len(arr) // 2
left_side = arr[:mid]
right_side = arr[mid:]
left_count = goodPairs(left_side)
right_count = goodPairs(right_side)
for i in left_side:
for j in right_side:
if i < j:
count += 1
return count + left_count + right_count
Recursion can be difficult at times, look into the idea of merge sort and quick sort to get better ideas on how the divide and conquer algorithms work.

Python lowest missing integer: Codility Demo Task. What am I missing?

I'm playing with the Codality Demo Task. It's asking to design a function which determines the lowest missing integer greater than zero in an array.
I wrote a function that works, but Codility tests it as 88% (80% correctness). I can't think of instances where it would fail.
def solution(A):
#If there are negative values, set any negative values to zero
if any(n < 0 for n in A):
A = [(i > 0) * i for i in A]
count = 0
else: count = 1
#Get rid of repeating values
A = set(A)
#At this point, we may have only had negative #'s or the same # repeated.
#If negagive #'s, or repeated zero, then answer is 1
#If repeated 1's answer is 2
#If any other repeated #'s answer is 1
if (len(A) == 1):
if (list(A)[0] == 1):
return 2
else:
return 1
#Sort the array
A = sorted(A)
for j in range(len(A)):
#Test to see if it's greater than zero or >= to count. If so, it exists and is not the lowest integer.
#This fails if the first # is zero and the second number is NOT 1
if (A[j] <= count or A[j] == 0): #If the number is lt or equal to the count or zero then continue the count
count = count + 1
elif (j == 1 and A[j] > 1): return 1
else:
return count
return count
UPDATE:
I got this to 88% with the fixes above. It still fails with some input. I wish Codility would give the inputs that fail. Maybe it does with a full subscription. I'm just playing with the test.
UPDATE 2: Got this to 100% with Tranbi's suggestion.
def solution(A):
#Get rid of all zero and negative #'s
A = [i for i in A if i > 0]
#At this point, if there were only zero, negative, or combination of both, the answer is 1
if (len(A) == 0): return 1
count = 1
#Get rid of repeating values
A = set(A)
#At this point, we may have only had the same # repeated.
#If repeated 1's answer is 2
#If any other repeated #'s only, then answer is 1
if (len(A) == 1):
if (list(A)[0] == 1):
return 2
else:
return 1
#Sort the array
A = sorted(A)
for j in range(len(A)):
#Test to see if it's >= to count. If so, it exists and is not the lowest integer.
if (A[j] <= count): #If the number is lt or equal to the count then continue the count
count = count + 1
else:
return count
return count

Besides that bug for len=1, you also fail for example solution([0, 5]), which returns 2.
Anyway... Since you're willing to create a set, why not just make this really simple?
def solution(A):
A = set(A)
count = 1
while count in A:
count += 1
return count

I don't think this is true:
#At this point, we may have only had negative #'s or the same # repeated. If so, then the answer is 1+ the integer.
if (len(A) == 1):
return list(A)[0]+1
If A = [2] you should return 1 not 3.
Your code is quite confusing though. I think you could replace
if any(n < 0 for n in A):
A = [(i > 0) * i for i in A]
with
A = [i for i in A if i > 0]
What's the point of keeping 0 values?

I don't think this is needed:
if (len(A) == 1):
if (list(A)[0] == 1):
return 2
else:
return 1
It's already taken into account afterwards :)

Finally got a 100% score.
def solution(A):
# 1 isn't there
if 1 not in A:
return 1
# it's easier to sort
A.sort()
# positive "hole" in the array
prev=A[0]
for e in A[1:]:
if e>prev+1>0:
return prev+1
prev=e
# no positive "hole"
# 1 is in the middle
return A[-1]+1

Finding the "centered average" of a list

"Return the "centered" average of a list of integers, which we'll say is the mean average of the values, except ignoring the largest and smallest values in the list. If there are multiple copies of the smallest value, ignore just one copy, and likewise for the largest value. Use integer division to produce the final average. You may assume that the list is length 3 or more."
This is a problem I have from my homework assignment and I am stumped at how to find the the largest/smallest numbers and cut them out of the list. Here is what I have so far. and It works for 10/14 the scenarios that I have to pass.. I think it is just because it grabs the median
def centered_average(nums):
x = 0
for i in range(len(nums)):
x = i + 0
y = x + 1
if y%2 == 0:
return (nums[y/2] + nums[(y/2)+1]) / 2
else:
return nums[y/2]

Sorting the array is certainly terser code, here's an alternative with a manual loop
max_value = nums[0]
min_value = nums[0]
sum = 0
for x in nums:
max_value = max(max_value, x)
min_value = min(min_value, x)
sum += x
return (sum - max_value - min_value) / (len(nums) - 2)
This just adds everything in and removes the max and min at the end.

If the list isn't too long, it shouldn't be too computationally expensive to sort the list:
sorted(nums)
Then you can create a new list without the first and last entries, which will be the smallest and largest values:
new_nums = sorted(nums)[1:-1] # from index 1 to the next-to-last entry

Before i start i know there are easier ways mentioned in the other answers using the function sort, yes that is true but i believe your teacher must have iven you this to able to master loops and use them logically.
First pick your first number and assign it to high and low, don't worry it will make sense afterwards.
def centered average(nums):
high = nums[0]
small = nums[0]
Here is were the magic happens, you loop through your list and if the number your on in the loop is larger then the previous ones then you can replace the variable high with it, let me demonstrate.
for count in nums:
if count > high:
high = count
if count < low:
low = count
Now you have the low and the high all you do is add the values of the loop together minus the high and the low (as you said you do not need them).Then divide that answer by len of nums.
for count in nums:
sum = count + sum
sum = sum - (high + low)
return sum

New here. I like to check my solutions with solutions found on the internet and did not see my code here yet (hence the post). I found this challenge on https://codingbat.com/prob/p126968. And here is my solution:
** This is done in Python 3.9.1.
First the min and max are popped from the list with the index method. After it's just a simple avg calculation.
def centered_average(nums):
nums.pop(nums.index(max(nums)))
nums.pop(nums.index(min(nums)))
return sum(nums)/len(nums)

If I understand the question, this should work:
def centered_average(nums):
trim = sorted(nums)[1:-1]
return sum(trim) / len(trim)

def centered_average(nums):
nums = sorted(nums)
for i in range(len(nums)):
if len(nums)%2 != 0:
return nums[len(nums)/2]
else:
return ((nums[len(nums)/2] + nums[len(nums)/2 - 1]) / 2)

This is a very sub standard solution to the problem. This code is a bad code that does not take into account any consideration for complexity and space. But I think the thought process to be followed is similar to the steps in the code. This then can be refined.
def centered_average(nums):
#Find max and min value from the original list
max_value = max(nums)
min_value = min(nums)
#counters for counting the number of duplicates of max and min values.
mx = 0
mn = 0
sum = 0
#New list to hold items on which we can calculate the avg
new_nums = []
#Find duplicates of max and min values
for num in nums:
if num == max_value:
mx += 1
if num == min_value:
mn += 1
#Append max and min values only once in the new list
if mx > 1:
new_nums.append(max_value)
if mn > 1:
new_nums.append(min_value)
#Append all other numbers in the original to new list
for num in nums:
if num != max_value and num != min_value:
new_nums.append(num)
#Calculate the sum of all items in the list
for new in new_nums:
sum += new
#Calculate the average value.
avg = sum/len(new_nums)
return avg

def centered_average(nums):
min1=nums[0]
max1=nums[0]
for item in nums:
if item > max1:
max1 = item
if item < min1:
min1 = item
sum1=(sum(nums)-(min1+max1))/(len(nums)-2)
return sum1

simple solution
def centered_average(nums):
b=nums
ma=max(b)
mi=min(b)
l=(len(b)-2)
s=sum(b)-(ma+mi)
av=int(s/l)
return av

use sum function to sum the array
max and min functions to get the biggest and smallest number
def centered_average(nums):
return (sum(nums) - max(nums) - min(nums)) / (len(nums) - 2)

def centered_average(nums):
sorted_list = sorted(nums)
return sum(sorted_list[1:-1])/(len(nums)-2)
This will get the job done.

Python 3 Solution using list.index, list.pop, min and max functions.
def solution(input):
average = 0
minimum = min(input)
maximum = max(input)
input.pop(input.index(minimum))
input.pop(input.index(maximum))
average = round(sum(input) / len(input))
return average

def centered_average(nums):
nums.remove((min(nums)))
nums.remove((max(nums)))
new_nums=nums
count = 0
for i in range(len(new_nums)):
count+=1
ans=sum(new_nums)//count
return ans

def centered_average(nums):
maximums = []
minimums = []
sum_of_numbers = 0
length =len(nums) + (len(minimums)-1) + (len(maximums)-1)
for i in nums:
if i == max(nums):
maximums.append(i)
elif i == min(nums):
minimums.append(i)
else:
sum_of_numbers += i
if len(maximums)>=2 or len(minimums)>=2:
sum_of_numbers = sum_of_numbers + (max(nums)*(len(maximums)-1))(min(nums)*(len(minimums)-1))
return sum_of_numbers / length

loop finding "bouncy" numbers

Consider the following definitions of positive numbers:
A number is nondecreasing if its digits never get smaller as you go from left to right. For example, 12345
and 3388 are nondecreasing.
A number is nonincreasing if its digits never larger as you go from left to right. For example, 987542 and
881 are nonincreasing.
A number is bouncy if it is neither nondecreasing nor nonincreasing. For example, 12134 and 98462 are
bouncy.
Write a Python function bouncy that consumes a positive natural number (called n) and produces the
percentage of numbers between 1 and n, inclusive, which are bouncy. The result should be produced as a
natural number between 0 and 100, inclusive. Use round to convert the floating point percentage to an
integer.
def bouncy(input):
list1 = [0 for i in range(input)]
list1[0] = 0
for x in range(1, input-1):
if x < 100:
list1[x] = list1[x - 1]
else:
n=x
a = [0 for i in range(x)]
i = 0
while n > 0:
a[i]=n % 10
n/= 10
i+=1
flag = 1
for k in range(1, len(a) - 2):
if not ((a[k - 1] < a[k] < a[k + 1]) or (a[k - 1] > a[k] > a[k + 1])):
flag = 0
break
if flag == 0:
list1[x]==list[x-1]+ 1
return list1[input-1]
when i ran my code, it displays builtins.IndexError: list assignment index out of range.
Anyone got an idea?

You don't have to do any of that. Just turn the number into a string. If it's sorted it's nondecreasing, if it's reverse sorted it's nonincreasing, otherwise it's bouncy.
def bouncy(n):
return round(sum(list(i) not in (sorted(i), sorted(i, reverse=True)) for i in map(str, range(1, n+1)))/n*100)
This map()s each number in the range to a string, then checks whether a list() of that string is not found in a sorted() version of that string (either increasing or decreasing). Then it adds together how many numbers match that, divides by n, multiplies by 100, round()s that, and returns it.

Using Python for quasi randomization

Here's the problem: I try to randomize n times a choice between two elements (let's say [0,1] -> 0 or 1), and my final list will have n/2 [0] + n/2 [1]. I tend to have this kind of result: [0 1 0 0 0 1 0 1 1 1 1 1 1 0 0, until n]: the problem is that I don't want to have serially 4 or 5 times the same number so often. I know that I could use a quasi randomisation procedure, but I don't know how to do so (I'm using Python).

To guarantee that there will be the same number of zeros and ones you can generate a list containing n/2 zeros and n/2 ones and shuffle it with random.shuffle.
For small n, if you aren't happy that the result passes your acceptance criteria (e.g. not too many consecutive equal numbers), shuffle again. Be aware that doing this reduces the randomness of the result, not increases it.
For larger n it will take too long to find a result that passes your criteria using this method (because most results will fail). Instead you could generate elements one at a time with these rules:
If you already generated 4 ones in a row the next number must be zero and vice versa.
Otherwise, if you need to generate x more ones and y more zeros, the chance of the next number being one is x/(x+y).

You can use random.shuffle to randomize a list.
import random
n = 100
seq = [0]*(n/2) + [1]*(n-n/2)
random.shuffle(seq)
Now you can run through the list and whenever you see a run that's too long, swap an element to break up the sequence. I don't have any code for that part yet.

Having 6 1's in a row isn't particularly improbable -- are you sure you're not getting what you want?
There's a simple Python interface for a uniformly distributed random number, is that what you're looking for?

Here's my take on it. The first two functions are the actual implementation and the last function is for testing it.
The key is the first function which looks at the last N elements of the list where N+1 is the limit of how many times you want a number to appear in a row. It counts the number of ones that occur and then returns 1 with (1 - N/n) probability where n is the amount of ones already present. Note that this probability is 0 in the case of N consecutive ones and 1 in the case of N consecutive zeros.
Like a true random selection, there is no guarantee that the ratio of ones and zeros will be the 1 but averaged out over thousands of runs, it does produce as many ones as zeros.
For longer lists, this will be better than repeatedly calling shuffle and checking that it satisfies your requirements.
import random
def next_value(selected):
# Mathematically, this isn't necessary but it accounts for
# potential problems with floating point numbers.
if selected.count(0) == 0:
return 0
elif selected.count(1) == 0:
return 1
N = len(selected)
selector = float(selected.count(1)) / N
if random.uniform(0, 1) > selector:
return 1
else:
return 0
def get_sequence(N, max_run):
lim = min(N, max_run - 1)
seq = [random.choice((1, 0)) for _ in xrange(lim)]
for _ in xrange(N - lim):
seq.append(next_value(seq[-max_run+1:]))
return seq
def test(N, max_run, test_count):
ones = 0.0
zeros = 0.0
for _ in xrange(test_count):
seq = get_sequence(N, max_run)
# Keep track of how many ones and zeros we're generating
zeros += seq.count(0)
ones += seq.count(1)
# Make sure that the max_run isn't violated.
counts = [0, 0]
for i in seq:
counts[i] += 1
counts[not i] = 0
if max_run in counts:
print seq
return
# Print the ratio of zeros to ones. This should be around 1.
print zeros/ones
test(200, 5, 10000)

Probably not the smartest way, but it works for "no sequential runs", while not generating the same number of 0s and 1s. See below for version that fits all requirements.
from random import choice
CHOICES = (1, 0)
def quasirandom(n, longest=3):
serial = 0
latest = 0
result = []
rappend = result.append
for i in xrange(n):
val = choice(CHOICES)
if latest == val:
serial += 1
else:
serial = 0
if serial >= longest:
val = CHOICES[val]
rappend(val)
latest = val
return result
print quasirandom(10)
print quasirandom(100)
This one below corrects the filtering shuffle idea and works correctly AFAICT, with the caveat that the very last numbers might form a run. Pass debug=True to check that the requirements are met.
from random import random
from itertools import groupby # For testing the result
try: xrange
except: xrange = range
def generate_quasirandom(values, n, longest=3, debug=False):
# Sanity check
if len(values) < 2 or longest < 1:
raise ValueError
# Create a list with n * [val]
source = []
sourcelen = len(values) * n
for val in values:
source += [val] * n
# For breaking runs
serial = 0
latest = None
for i in xrange(sourcelen):
# Pick something from source[:i]
j = int(random() * (sourcelen - i)) + i
if source[j] == latest:
serial += 1
if serial >= longest:
serial = 0
guard = 0
# We got a serial run, break it
while source[j] == latest:
j = int(random() * (sourcelen - i)) + i
guard += 1
# We just hit an infinit loop: there is no way to avoid a serial run
if guard > 10:
print("Unable to avoid serial run, disabling asserts.")
debug = False
break
else:
serial = 0
latest = source[j]
# Move the picked value to source[i:]
source[i], source[j] = source[j], source[i]
# More sanity checks
check_quasirandom(source, values, n, longest, debug)
return source
def check_quasirandom(shuffled, values, n, longest, debug):
counts = []
# We skip the last entries because breaking runs in them get too hairy
for val, count in groupby(shuffled):
counts.append(len(list(count)))
highest = max(counts)
print('Longest run: %d\nMax run lenght:%d' % (highest, longest))
# Invariants
assert len(shuffled) == len(values) * n
for val in values:
assert shuffled.count(val) == n
if debug:
# Only checked if we were able to avoid a sequential run >= longest
assert highest <= longest
for x in xrange(10, 1000):
generate_quasirandom((0, 1, 2, 3), 1000, x//10, debug=True)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.