combining a range for pandas - python

I have a data in excel, one of the variable is temp (ranges from -3 to 3), the other is wind_speed
for my catplot how to I set the range(temp) for the x axis
g = sns.catplot(x="temp", y="wind_speed", data=ieq_data, kind="bar")
when I input this code the x axis is showing -3, -2, -1, 0, 1, 2, 3
I want to combine it to become (-3,-2), (-1), (0), (1), (2,3)
edited:
def temp(x):
if -3 <= x < -2:
return "(-3,-2)"
elif -2 <= x < 0:
return "(-1)"
elif 0 <= x < 2:
return "(1)"
elif 2 <= x <= 3:
return "(2,3)"
ieq_data["new_Thermal"] = ieq_data.temp.apply(temp)
g = sns.catplot (x="new_Thermal", y="wind_speed", data=ieq_data, kind="bar")

May be create an extra column like below and use this column as x.
You'll have to revisit the categorization in the function. This is as per my understanding of your question.
def grouping_func(val):
if -3 <= val < -2:
return "(-3, -2)"
elif -2 <= val < 0:
return "(-1)"
elif 0 <= val < 2:
return "(1)"
elif 2 <= val <=3:
return "(2, 3)"
ieq_data["new_col"] = ieq_data.temp.apply(grouping_func)
g = sns.catplot(x="new_col", y="wind_speed", data=ieq_data, kind="bar")

You can explicitly replace the ticks using your arbitrary list.
g.set(xticks=my_ticks_list)

Related

Counting Shifts in Merge Sort Error with one array

I was trying to count the number of shifts that happen with merge sort when I ran into a problem. When I run the code with multiple arrays, for some reason one of the arrays states that 3 shifts happened when in reality its 4. I will greatly appreciate it if anyone can help me figure out what the problem is. Thanks
def mergeSort(arr):
count = x = y = 0
result =[]
arrayLength = len(arr)
if arrayLength <= 1:
return count
middle = arrayLength // 2
left = arr[:middle]
right = arr[middle:]
leftLength = len(left)
rightLength = len(right)
count += mergeSort(left)
count += mergeSort(right)
while x < leftLength and y < rightLength:
if left[x] <= right[y]:
result.append(left[x])
x += 1
else:
result.append(right[y])
y += 1
count += len(left[x:])-x
return count
arr = [1,20,6,4,5]
print(mergeSort(arr))
arr2 = [4,3,2,1]
print(mergeSort(arr2))
arr3=[1, 1, 1, 2, 2]
print(mergeSort(arr3))
arr4=[2, 1, 3, 1, 2]
print(mergeSort(arr4))
arr5 = [12,15,1,5,6,14,11]
print(mergeSort(arr5))
arr6=[3, 5, 7, 11, 9]
print(mergeSort(arr6))
result = mergeSort(arr)
print(result)
You have two bugs:
Your len(left[x:])-x subtracts x twice.
You're not actually sorting the given array but just building a result that you never use. The sorting is important for the upper call levels to count correctly.
Fixed and with better testing (Try it online!):
from itertools import combinations
def mergeSort(arr):
count = x = y = 0
arrayLength = len(arr)
if arrayLength <= 1:
return count
middle = arrayLength // 2
left = arr[:middle]
right = arr[middle:]
leftLength = len(left)
rightLength = len(right)
count += mergeSort(left)
count += mergeSort(right)
for write in range(arrayLength):
if y == rightLength or x < leftLength and left[x] <= right[y]:
arr[write] = left[x]
x += 1
else:
arr[write] = right[y]
y += 1
count += len(left) - x
return count
def naive(arr):
return sum(a > b for a, b in combinations(arr, 2))
def test(arr):
expect = naive(arr)
result = mergeSort(arr)
print(result == expect, expect, result, arr)
test([1, 20, 6, 4, 5])
test([4, 3, 2, 1])
test([1, 1, 1, 2, 2])
test([2, 1, 3, 1, 2])
test([12, 15, 1, 5, 6, 14, 11])
test([3, 5, 7, 11, 9])
I presume your question really isn't about counting the recursion and more about figuring out why your algorithm is not correct. When you're checking for while x < leftLength and y < rightLength: you are dropping items at the end of one of the lists. This should be an or not an and to make sure you are doing ALL items in both left and right lists. Something like this:
while x < leftLength or y < rightLength:
if x == leftLength:
result.append(right[y])
y += 1
continue
if y == rightLength:
result.append(left[x])
x += 1
continue
if left[x] <= right[y]:
result.append(left[x])
x += 1
else:
result.append(right[y])
y += 1
return result
and you can't return the counts like Frank said because you stop doing the merge sort, and writing back to arr doesn't work in python as it's not an in/out variable. You would have to have a global variable outside the class to do the counting.

Python - problem with changing values to groups

I have a dataset that has different attributes. One of these attributes is temperature. My temperature range is from about -30 to about 30 degrees. I want to do a machine learning study and I wanted to group the temperature into different groups. On a principle: below -30: 0, -30 to -10: 1 and so on. I wrote the code below, but it doesn't work the way I want it to. The data type is: int32, I converted it with float64.
dane = [treningowy_df]
for zbior in dane:
zbior['temperatura'] = zbior['temperatura'].astype(int)
zbior.loc[ zbior['temperatura'] <= -30, 'temperatura'] = 0
zbior.loc[(zbior['temperatura'] > -30) & (zbior['temperatura'] <= -10), 'temperatura'] = 1
zbior.loc[(zbior['temperatura'] > -10) & (zbior['temperatura'] <= 0), 'temperatura'] = 2
zbior.loc[(zbior['temperatura'] > 0) & (zbior['temperatura'] <= 10), 'temperatura'] = 3
zbior.loc[(zbior['temperatura'] > 10) & (zbior['temperatura'] <= 20), 'temperatura'] = 4
zbior.loc[(zbior['temperatura'] > 20) & (zbior['temperatura'] <= 30), 'temperatura'] = 5
zbior.loc[ zbior['temperatura'] > 30, 'temperatura'] = 6
For example: before the code is executed, record 1 has a temperature: -3, and after the code is applied, record 1 has a temperature: 3. why? A record with a temperature before a change: 22 after the change: 5, i.e. the assignment was executed correctly.
it looks like you're manipulating a dataframe. have you tried using the apply function?
Personally I would go about this as such (in fact, with a new column).
1. Write a function to process the value
def _check_temperature_range(x):
if x <= -30:
return 0
elif x <= -10:
return 1
# so on and so forth...
2. Apply the function onto the column of the dataframe
df[new_column] = df[column].apply(lambda x: _check_temperature_range(x))
The results should then be reflected in the new_column or old column should you use back the same column
I think your code is applying multiple times on the same row.
With you're exemple with the first line :
temp = -3 gives 2
but then temp = 2 gives 3
So I recommend to create a new column in your dataframe
I believe it has to do with the sequence of your code.
A record with temperature -3, gets assigned as 2 -
zbior.loc[(zbior['temperatura'] > -10) & (zbior['temperatura'] <= 0), 'temperatura'] = 2
Then in the next line, it is found again as being between 0 and 10, and so assigned again as 3 -
zbior.loc[(zbior['temperatura'] > 0) & (zbior['temperatura'] <= 10), 'temperatura'] = 3
One solution is to assign a number that doesn't make you "jump" a category.
So, for -3, I'd assign 0 so it sticks around.
After that you can do another pass, and change to the actual numbers you wanted, eg 0->3 etc.
If zbior is a pandas.DataFrame, you can use the map function
def my_func(x):
if x <= -30:
return 0
elif x <= -10:
return 1
elif x <= 0:
return 2
elif x <= 10:
return 3
elif x <= 20:
return 4
elif x <= 30:
return 5
else:
return 6
zbior.temperatura=zbior.temperatura.map(my_func)

Find the subset of a set of integers that has the maximum product

Let A be a non-empty set of integers. Write a function find that outputs a non-empty subset of A that has the maximum product. For example, find([-1, -2, -3, 0, 2]) = 12 = (-2)*(-3)*2
Here's what I think: divide the list into a list of positive integers and a list of negative integers:
If we have an even number of negative integers, multiply everything in both list and we have the answer.
If we have an odd number of negative integers, find the largest and remove it from the list. Then multiply everything in both lists.
If the list has only one element, return this element.
Here's my code in Python:
def find(xs):
neg_int = []
pos_int = []
if len(xs) == 1:
return str(xs[0])
for i in xs:
if i < 0:
neg_int.append(i)
elif i > 0:
pos_int.append(i)
if len(neg_int) == 1 and len(pos_int) == 0 and 0 in xs:
return str(0)
if len(neg_int) == len(pos_int) == 0:
return str(0)
max = 1
if len(pos_int) > 0:
for x in pos_int:
max=x*max
if len(neg_int) % 2 == 1:
max_neg = neg_int[0]
for j in neg_int:
if j > max_neg:
max_neg = j
neg_int.remove(max_neg)
for k in neg_int:
max = k*max
return str(max)
Am I missing anything? P.S. This is a problem from Google's foobar challenge, I am apparently missing one case but I don't know which.
Now here's actual problem:
from functools import reduce
from operator import mul
def find(array):
negative = []
positive = []
zero = None
removed = None
def string_product(iterable):
return str(reduce(mul, iterable, 1))
for number in array:
if number < 0:
negative.append(number)
elif number > 0:
positive.append(number)
else:
zero = str(number)
if negative:
if len(negative) % 2 == 0:
return string_product(negative + positive)
removed = max(negative)
negative.remove(removed)
if negative:
return string_product(negative + positive)
if positive:
return string_product(positive)
return zero or str(removed)
You can simplify this problem with reduce (in functools in Py3)
import functools as ft
from operator import mul
def find(ns):
if len(ns) == 1 or len(ns) == 2 and 0 in ns:
return str(max(ns))
pos = filter(lambda x: x > 0, ns)
negs = sorted(filter(lambda x: x < 0, ns))
return str(ft.reduce(mul, negs[:-1 if len(negs)%2 else None], 1) * ft.reduce(mul, pos, 1))
>>> find([-1, -2, -3, 0, 2])
'12'
>>> find([-3, 0])
'0'
>>> find([-1])
'-1'
>>> find([])
'1'
Here's a solution in one loop:
def max_product(A):
"""Calculate maximal product of elements of A"""
product = 1
greatest_negative = float("-inf") # greatest negative multiplicand so far
for x in A:
product = max(product, product*x, key=abs)
if x <= -1:
greatest_negative = max(x, greatest_negative)
return max(product, product // greatest_negative)
assert max_product([2,3]) == 6
assert max_product([-2,-3]) == 6
assert max_product([-1, -2, -3, 0, 2]) == 12
assert max_product([]) == 1
assert max_product([-5]) == 1
Extra credit: what if the integer constraint were relaxed? What extra information do you need to collect during the loop?
Here is another solution that doesn't require libraries :
def find(l):
if len(l) <= 2 and 0 in l: # This is the missing case, try [-3,0], it should return 0
return max(l)
l = [e for e in l if e != 0] # remove 0s
r = 1
for e in l: # multiply all
r *= e
if r < 0: # if the result is negative, remove biggest negative number and retry
l.remove(max([e for e in l if e < 0]))
r = find(l)
return r
print(find([-1, -2, -3, 0, 2])) # 12
print(find([-3, 0])) # 0
EDIT :
I think I've found the missing case which is when there are only two elements in the list, and the highest is 0.

python checking surrounding points list of lists index out of range

I'm working on a school project and i'm trying to go through list of lists containing numbers. I'm trying to check all 8 surrounding "block" but i'm getting index out of range exception
filling the list:
for i in range(0, self.sizeX):
temp = []
for j in range(0, self.sizeY):
temp.append(random.randint(0, 100))
self.map.append(temp)
checking the surrounding
def check(self, i, j):
count = 0
if j-1 >= 0 & self.map[i][j-1] > 50:
count += 1
if (j+1) < len(self.map[i]) & self.map[i][j+1] > 50:
count += 1
if i-1 >= 0 & self.map[i-1][j] > 50:
count += 1
if i+1 < self.sizeX & self.map[i+1][j] > 50:
count += 1
if i-1 >= 0 & j-1 >= 0 & self.map[i-1][j-1] > 50:
count += 1
if i+1 < self.sizeX & j-1 >= 0 & self.map[i+1][j-1] > 50:
count += 1
if i-1 >= 0 & j+1 < self.sizeY & self.map[i-1][j+1] > 50:
count += 1
if i+1 < self.sizeX & j+1 < self.sizeY & self.map[i+1][j+1] > 50:
count += 1
return count
it looks like the conditions which check >=0 work but the once which check the size limit don't
btw i had this exact thing working in php with no problem
Your conditions seem to be correct, apart from substituting & (bitwise and) for and (logical and).
I also suggest pulling the tests out as variables; it helps make your code easier to read.
Also note that you are using columnar indexing, ie map[x][y]; it is more common to use row-aligned indexing ie map[y][x]. It is not "wrong" per se, but may be a bit clumsier to work with.
from random import randint
class Map:
def __init__(self, width, height):
self.width = width
self.height = height
self.map = [[randint(0, 100) for y in range(height)] for x in range(width)]
def neighbor_fn(self, i, j, fn=lambda x: x > 50):
left = i - 1
right = i + 1
above = j - 1
below = j + 1
do_left = left >= 0
do_right = right < self.width
do_above = above >= 0
do_below = below < self.height
return (
(do_left and do_above and fn(self.map[left][above]))
+ (do_above and fn(self.map[i][above]))
+ (do_right and do_above and fn(self.map[right][above]))
+ (do_left and fn(self.map[left][j]))
+ (do_right and fn(self.map[right][j]))
+ (do_left and do_below and fn(self.map[left][below]))
+ (do_below and fn(self.map[i][below]))
+ (do_right and do_below and fn(self.map[right][below]))
)

Find enclosed spaces in 2D array

So, I'm generating an array of spaces, which have the property that they can be either red or black. However, I want to prevent red from being enclosed by black. I have some examples to show exactly what I mean:
0 0 0 0 0 0 0 1
0 1 0 0 0 0 1 0
1 0 1 0 0 0 0 1
0 1 0 0 1 1 1 0
0 0 0 0 1 0 1 0
1 1 1 0 1 1 1 0
0 0 1 0 0 0 0 0
0 0 1 0 0 0 0 0
If red is 0 and black is 1, then this example contains four enclosures, all of which I want to avoid when I generate the array. The inputs I have are the size of the array and the number of 1s I can generate.
How would I go about doing this?
Does this code fits well for you?
Basically I fill a matrix from left to right, from top to bottom.
When I have to assign 0 or 1 to a cell, I check (north and west) if adding a 1 could enclose a 0; in this case I put a 0, else a random 0 or 1.
import sys, random
n = int(sys.argv[1])
m = int(sys.argv[2])
# fill matrix with zeroes
matrix = [[0 for _ in xrange(m)] for _ in xrange(n)]
# functions to get north, south, west and east
# cell wrt this cell.
# If we are going out of bounds, we suppose the matrix
# is sorrounded by 1s.
def get_n(r, c):
if r <= 0: return 1
return matrix[r - 1][c]
def get_s(r, c):
if r >= n - 1: return 1
return matrix[r + 1][c]
def get_w(r, c):
if c <= 0: return 1
return matrix[r][c - 1]
def get_e(r, c):
if c >= m - 1: return 1
return matrix[r][c + 1]
# Checks if the cell is already enclosed by 3 1s.
def enclosed(r, c):
enclosing = get_n(r, c) + get_s(r, c) + get_w(r, c) + get_e(r, c)
if (enclosing > 3): raise Exception('Got a 0 enclosed by 1s')
return enclosing == 3
for r in xrange(n):
for c in xrange(m):
# check west and north
if enclosed(r, c - 1) or enclosed(r - 1, c):
matrix[r][c] = 0
else:
matrix[r][c] = random.randint(0, 1)
print str(matrix[r][c]) + ' ',
print ''
Sample run: python spaces.py 10 10
So you can do the following:
Fill array with zeroes
Randomly select a point
If the condition holds, flip color
Repeat from step 2 or exit
The condition holds for all-zeros array. It is hold on any iteration. So, by induction, it is also true for the final array.
In the step 4 you can decide whether to stop or continue by doing, say N=a*b*1000 iterations or whether the ratio red/black is close to 1. In both cases, the result would be slightly biased since you start from all zeros.
Now, what is the condition. You have to ensure that all black points connected and all red points connected as well. In other words, there's maximum 2 connected clusters. Flipping a color could create more connected clusters, so you flip only when the its number is one or two. You can do the check quite efficiently using Union-Find algorithm, described here.
Edit: if however you want to permit black points to be surrounded by red ones but not vice-versa, you may change the condition to have any number of black clusters but only 0 or 1 of red clusters.
This would be a possible way to check the condition:
def: findStart(myArr):
for i in range(len(myArr)):
for j in range(len(myArr[0])):
if(myArr[i][j] == 0):
return (i,j)
def: checkCon(myArr, number_Ones):
width = len(myArr[0])
height = len(myArr)
pen = [] #A list of all points that are waiting to get a visit
vis = [] #A list of all points that are already visited
x = findStart(myArr)
while(len(pen) != 0): #Visit points as long as there are points left
p = pen.pop() #Pick a point to visit
if p in vis:
#do nothing since this point already was visited
else:
vis.append(p)
x,y = p
#A vertical check
if(x == 0 and myArr[x+1][y] == 0):
pen.append((x+1,y))
elif(x == (height-1) and myArr[x-1][y] == 0):
pen.append((x-1,y))
else:
if(myArr[x-1][y] == 0 and x-1 >= 0):
pen.append((x-1,y))
if(myArr[x+1][y] == 0):
pen.append((x+1,y))
#A horizontal check
if(y == 0 and myArr[x][y+1] == 0):
pen.append((x,y+1))
elif(y == (width-1) and myArr[x][y-1] == 0):
pen.append((x,y-1))
else:
if(myArr[x][y+1] == 0):
pen.append((x,y+1))
if(myArr[x][y-1] == 0 and y-1 >= 0):
pen.append((x,y-1))
print((height*width-number_Ones) == len(vis)) #if true, alle Zeros are connected and not enclosed
To clarify this is just a concept to check the condition. The idea is to visit all connected zeros and see if there are any left (that are not connected). If that is the case, there are some enclosed.
This method also doesn't work when the 1's form a frame around the matrix like this:
1 1 1 1
1 0 0 1
1 0 0 1
1 1 1 1
Again, just a concept :)
The problem has two parts actually. Generating the board state, and then checking if it is correct. I realised that checking the correctness was actually worse than just being sure correct states were always generated. This is what I did:
Note that I have defined self.WallSpaces to be an array equal in length to the height of my array, comprised of integers with the number of bits equal to the width of my array. self.Width and self.Height provide the end indices for the array. Basically, Intersects works by checking all the spaces surrounding a point for 1s, except the direction the space was "built from" (see below) and returning True if any of these are the edge of the array or a 1.
def Intersects(self, point, direction):
if (point[0] > 0):
if (direction != [1, 0] and self.WallSpaces[point[0] - 1] & (1 << point[1]) != 0):
return True
if (point[1] == 0 or self.WallSpaces[point[0] - 1] & (1 << (point[1] - 1)) != 0):
return True
if (point[1] == self.Width or self.WallSpaces[point[0] - 1] & (1 << (point[1] + 1)) != 0):
return True
else:
return True
if (point[0] < self.Height):
if (direction != [-1, 0] and self.WallSpaces[point[0] + 1] & (1 << point[1]) != 0):
return True
if (point[1] == 0 or self.WallSpaces[point[0] + 1] & (1 << (point[1] - 1)) != 0):
return True
if (point[1] == self.Width or self.WallSpaces[point[0] + 1] & (1 << (point[1] + 1)) != 0):
return True
else:
return True
if (point[1] == 0 or (direction != [0, 1] and self.WallSpaces[ point[0] ] & (1 << (point[1] - 1)) != 0)):
return True
if (point[1] == self.Width or (direction != [0, -1] and self.WallSpaces[ point[0] ] & (1 << (point[1] + 1)) != 0)):
return True
return False
The directions GPacW.Left, GPacW.Right, GPackW.Up, and GPacW.Down represent the cardinal directions for movement. This function works by constructing "walls" in the array from random points, which can turn in random directions, ending when they have intersected twice.
def BuildWalls(self):
numWalls = 0
directions = [ [GPacW.Left, GPacW.Right], [GPacW.Up, GPacW.Down] ]
start = [ random.randint(0, self.Height), random.randint(0, self.Width) ]
length = 0
horizontalOrVertical = random.randint(0, 1)
direction = random.randint(0, 1)
d = directions[horizontalOrVertical][direction]
intersected = False
while (numWalls < self.Walls):
while (start == [0, 0] or start == [self.Height, self.Width] or self.Intersects(start, d)):
start = [ random.randint(0, self.Height), random.randint(0, self.Width) ]
if (length == 0):
horizontalOrVertical = not horizontalOrVertical
direction = random.randint(0, 1)
length = random.randint(3, min(self.Height, self.Width))
d = directions[horizontalOrVertical][direction]
if (self.WallSpaces[ start[0] ] & (1 << start[1] ) == 0):
self.WallSpaces[ start[0] ] |= 1 << start[1]
numWalls += 1
length -= 1
if (0 <= (start[0] + d[0]) <= self.Height and 0 <= (start[1] + d[1]) <= self.Width):
start[0] += d[0]
start[1] += d[1]
else:
start = [0,0]
if (self.Intersects(start, d)):
if (intersected):
intersected = False
start = [0,0]
length = 0
else:
intersected = True
return

Categories

Resources