Finding 1st order neighbors using shapefile polygons - python

I am looking a efficient way to find the 1st order neighbors of a given polygon. My data are in shapefile format.
My first idea was to calculate the x and y coordinates of the polygons' centroids in order to find the neighbor's centroids.
import pysal
from pysal.common import *
import pysal.weights
import numpy as np
from scipy import sparse,float32
import scipy.spatial
import os, gc, operator
def get_points_array_from_shapefile(inFile):
"""
Gets a data array of x and y coordinates from a given shape file
Parameters
----------
shapefile: string name of a shape file including suffix
Returns
-------
points: array (n,2) a data array of x and y coordinates
Notes
-----
If the given shape file includes polygons,
this function returns x and y coordinates of the polygons' centroids
Examples
--------
Point shapefile
>>> from pysal.weights.util import get_points_array_from_shapefile
>>> xy = get_points_array_from_shapefile('../examples/juvenile.shp')
>>> xy[:3]
array([[ 94., 93.],
[ 80., 95.],
[ 79., 90.]])
Polygon shapefile
>>> xy = get_points_array_from_shapefile('../examples/columbus.shp')
>>> xy[:3]
array([[ 8.82721847, 14.36907602],
[ 8.33265837, 14.03162401],
[ 9.01226541, 13.81971908]])
(source: https://code.google.com/p/pysal/source/browse/trunk/pysal/weights/util.py?r=1013)
"""
f = pysal.open(inFile)
shapes = f.read()
if f.type.__name__ == 'Polygon':
data = np.array([shape.centroid for shape in shapes])
elif f.type.__name__ == 'Point':
data = np.array([shape for shape in shapes])
f.close()
return data
inFile = "../examples/myshapefile.shp"
my_centr = get_points_array_from_shapefile(inFile)
This approach could be valid for a regular grid but in my case, I need to find a "more general" solution. The figure shows the problem. Consider the yellow polygon has the referee. The neighbor's polygons are the gray polygons. Using the centroids-neighbors approach, the clear blue polygon is considered a neighbor but it doesn't have a side in common with the yellow polygon.
A recent solution modified from Efficiently finding the 1st order neighbors of 200k polygons can be the following:
from collections import defaultdict
inFile = 'C:\\MultiShapefile.shp'
shp = osgeo.ogr.Open(inFile)
layer = shp.GetLayer()
BlockGroupVertexDictionary = dict()
for index in xrange(layer.GetFeatureCount()):
feature = layer.GetFeature(index)
FID = str(feature.GetFID())
geometry = feature.GetGeometryRef()
pts = geometry.GetGeometryRef(0)
# delete last points because is the first (see shapefile polygon topology)
for p in xrange(pts.GetPointCount()-1):
PointText = str(pts.GetX(p))+str(pts.GetY(p))
# If coordinate is already in dictionary, append this BG's ID
if PointText in BlockGroupVertexDictionary:
BlockGroupVertexDictionary[PointText].append(FID)
# If coordinate is not already in dictionary, create new list with this BG's ID
else:
BlockGroupVertexDictionary[PointText] = [FID]
With this solution, I have a dictionary with vertex coordinates as the keys and a list of block group IDs that have a vertex at that coordinate as the value.
>>> BlockGroupVertexDictionary
{'558324.3057036361423.57178': ['18'],
'558327.4401686361422.40755': ['18', '19'],
'558347.5890836361887.12271': ['1'],
'558362.8645026361662.38757': ['17', '18'],
'558378.7836876361760.98381': ['14', '17'],
'558389.9225016361829.97259': ['14'],
'558390.1235856361830.41498': ['1', '14'],
'558390.1870856361652.96599': ['17', '18', '19'],
'558391.32786361398.67786': ['19', '20'],
'558400.5058556361853.25597': ['1'],
'558417.6037156361748.57558': ['14', '15', '17', '19'],
'558425.0594576362017.45522': ['1', '3'],
'558438.2518686361813.61726': ['14', '15'],
'558453.8892486362065.9571': ['3', '5'],
'558453.9626046361375.4135': ['20', '21'],
'558464.7845966361733.49493': ['15', '16'],
'558474.6171066362100.82867': ['4', '5'],
'558476.3606496361467.63697': ['21'],
'558476.3607186361467.63708': ['26'],
'558483.1668826361727.61931': ['19', '20'],
'558485.4911846361797.12981': ['15', '16'],
'558520.6376956361649.94611': ['25', '26'],
'558525.9186066361981.57914': ['1', '3'],
'558527.5061096362189.80664': ['4'],
'558529.0036896361347.5411': ['21'],
'558529.0037236361347.54108': ['26'],
'558529.8873646362083.17935': ['4', '5'],
'558533.062376362006.9792': ['1', '3'],
'558535.4436256361710.90985': ['9', '16', '20'],
'558535.4437266361710.90991': ['25'],
'558548.7071816361705.956': ['9', '10'],
'558550.2603156361432.56769': ['26'],
'558550.2603226361432.56763': ['21'],
'558559.5872216361771.26884': ['9', '16'],
'558560.3288756362178.39003': ['4', '5'],
'558568.7811926361768.05997': ['1', '9', '10'],
'558572.749956362041.11051': ['3', '5'],
'558573.5437016362012.53546': ['1', '3'],
'558575.3048386362048.77518': ['2', '3'],
'558576.189546362172.87328': ['5'],
'558577.1149386361695.34587': ['7', '10'],
'558579.0999636362020.47297': ['1', '3'],
'558581.6312396362025.36096': ['0', '1'],
'558586.7728956362035.28967': ['0', '3'],
'558589.8015336362043.7987': ['2', '3'],
'558601.3250076361686.30355': ['7'],
'558601.3250736361686.30353': ['25'],
'558613.7793476362164.19871': ['2', '5'],
'558616.4062876361634.7097': ['7'],
'558616.4063116361634.70972': ['25'],
'558618.129066361634.29952': ['7', '11', '22'],
'558618.1290896361634.2995': ['25'],
'558626.9644156361875.47515': ['10', '11'],
'558631.2229836362160.17325': ['2'],
'558632.0261236361600.77448': ['25', '26'],
'558639.495586361898.60961': ['11', '13'],
'558650.4935686361918.91358': ['12', '13'],
'558659.2473416361624.50945': ['8', '11', '22', '24'],
'558664.5218136361857.94836': ['7', '10'],
'558666.4126376361622.80343': ['8', '24'],
'558675.1439056361912.52276': ['12', '13'],
'558686.3385396361985.08892': ['0', '1'],
..................
.................
'558739.4377836361931.57279': ['11', '13'],
'558746.8758486361973.84475': ['11', '13'],
'558751.3440576361902.20399': ['6', '11'],
'558768.8067026361258.4715': ['26'],
'558779.9170276361961.16408': ['6', '11'],
'558785.7399596361571.47416': ['22', '24'],
'558791.5596546361882.09619': ['8', '11'],
'558800.2351726361877.75843': ['6', '8'],
'558802.7700816361332.39227': ['26'],
'558802.770176361332.39218': ['22'],
'558804.7899976361336.78827': ['22'],
'558812.9707376361565.14513': ['23', '24'],
'558833.2667696361940.68932': ['6', '24'],
'558921.2068976361539.98868': ['22', '23'],
'558978.3570116361885.00604': ['23', '24'],
'559022.80716361982.3729': ['23'],
'559096.8905816361239.42141': ['22'],
'559130.7573166361935.80614': ['23'],
'559160.3907086361434.15513': ['22']}

Just incase this is still an open question for the OP or someone else stumbles here.
import pysal as ps
w = ps.queen_from_shapefile('shapefile.shp')
http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html#pysal-spatial-weight-types

I am not familiar with the specific data formats being used, but regardless, think the following idea would work.
In Python you can make sets out of tuples of numbers, i.e. (x,y) and (x1,y1,x2,y2), so it should be possible to make a set representing all the points or edges in a given polygon. Afterwards you would be able use very fast set intersection operations to find all 1st order neighbors.
You might be able to speed the process up using some sort of trivial rejection test to avoid further processing of polygons which could not possibly be a neighbor -- perhaps using your polygons' centroids idea.
Does this explanation make sense?

Related

Adding a single 'counter' to each element of an array of lists

I've got a .dat file that i I've pulled the data from and i was using the tabulate plug in to tidy it up and put it into tables. However, part of the question is to add a position column or counter. It should be simple enough to add one element at the start of each list in my array but i am having an absolute nightmare...
the code for pulling in the data from the .dat file is:
def readToDictionary():
global dicts
fi = open('CarRegistry.dat', 'r')
dicts = []
buffer = []
while True:
team = fi.readline()
if not team: break
fields = team.split(',')
buffer.append(fields)
fi.close()
dicts = buffer
print(dicts)
return dicts
Im duplicating the array deliberately as i need to do some other functions on it and want to keep the original data intact.
The raw out put is:
[['1', 'BD61 SLU', 'HONDA', 'CR-V', 'SFDR', '5', '1780', '4510', '130', '39', 'True\n'], ['2', 'CA51 MBE', 'CHEVROLET', 'CORVETTE', 'JTAV', '2', '1877', '1234', '194', '24', 'True\n'], ['3', 'PC14 RSN', 'FORD', 'F-150', 'PQBD', '5', '2121', '5890', '155', '20', 'True\n'], ['4', 'MB19 ORE', 'HONDA', 'ACCORD', 'FDAR', '5', '1849', '4933', '125', '47.3', 'False\n'], ['5', 'BD68 NAP', 'HONDA', 'ACCORD', 'FDAV', '5', '1849', '4933', '171', '37.7', 'False\n']...
what i want to get to is:
[['1', '1', 'BD61 SLU', 'HONDA', 'CR-V', 'SFDR', '5', '1780', '4510', '130', '39', 'True\n'], ['2', '2', 'CA51 MBE', 'CHEVROLET', 'CORVETTE', 'JTAV', '2', '1877', '1234', '194', '24', 'True\n'], ['3', '3', 'PC14 RSN', 'FORD', 'F-150', 'PQBD', '5', '2121', '5890', '155', '20', 'True\n'], ['4', '4', 'MB19 ORE', 'HONDA', 'ACCORD', 'FDAR', '5', '1849', '4933', '125', '47.3', 'False\n'], ['5', '5', 'BD68 NAP', 'HONDA', 'ACCORD', 'FDAV', '5', '1849', '4933', '171', '37.7', 'False\n']...
It's basically a counter at the start of each list.
Ive tried all sorts and just keep getting errors, probably because i cant understand the basics of why i cant just do this:
for i in buffer:
buffer.insert(i, i+1)
to go through each entry in the list and add a value equal to the index +1... I know its probably simple but i've been banging my head off the monitor for a good few hours now...
The key is, you don't want to manipulate buffer. You want to manipulate the individual lists within buffer:
for i,row in enumerate(buffer):
row.insert( 0, str(i+1) )

Python - List of unique sequences

I have a dictionary with elements as lists of certain sequence:
a = {'seq1':['5', '4', '3', '2', '1', '6', '7', '8', '9'],
'seq2':['9', '8', '7', '6', '5', '4', '3', '2', '1'],
'seq3':['5', '4', '3', '2', '1', '11', '12', '13', '14'],
'seq4':['15', '16', '17'],
'seq5':['18', '19', '20', '21', '22', '23'],
'seq6':['18', '19', '20', '24', '25', '26']}
So there are 6 sequences
What I need to do is:
To find only unique lists (if two lists contains the same elements (regardless of their order), they are not unique) - say I need to get rid of the second list (the first founded unique list will stay)
In unique lists I need to find unique subsequences of elements and print
it
Bounds of unique sequences are found by resemblance of elements order - in the 1st and the 3rd lists the bound ends exactly after element '1', so we get the subsequence ['5','4','3','2','1']
As the result I would like to see elements exactly in the same order as it was in the beginning (if it`s possible at all somehow). So I expect this:
[['5', '4', '3', '2', '1']['6', '7', '8', '9']['11', '12', '13', '14']['15', '16', '17']['18', '19', '20']['21', '22', '23']['24', '25', '26']]
Tried to do it this way:
import itertools
unique_sets = []
a = {'seq1':["5","4","3","2","1","6","7","8","9"], 'seq2':["9","8","7","6","5","4","3","2","1"], 'seq3':["5","4","3","2","1","11","12","13","14"], 'seq4':["15","16","17"], 'seq5':["18","19","20","21","22","23"], 'seq6':["18","19","20","24","25","26"]}
b = []
for seq in a.values():
b.append(seq)
for seq1, seq2 in itertools.combinations(b,2): #searching for intersections
if set(seq1).intersection(set(seq2)) not in unique_sets:
#if set(seq1).intersection(set(seq2)) == set(seq1):
#continue
unique_sets.append(set(seq1).intersection(set(seq2)))
if set(seq1).difference(set(seq2)) not in unique_sets:
unique_sets.append(set(seq1).difference(set(seq2)))
for it in unique_sets:
print(it)
I got this which is a little bit different from my expectations:
{'9', '5', '2', '3', '7', '1', '4', '8', '6'}
set()
{'5', '2', '3', '1', '4'}
{'9', '8', '6', '7'}
{'5', '2', '14', '3', '1', '11', '12', '4', '13'}
{'17', '16', '15'}
{'19', '20', '18'}
{'23', '21', '22'}
Without comment in the code above the result is even worse.
Plus I have the problem with unordered elements in the sets, which I get as the result. Tried to do this with two separate lists:
seq1 = set([1,2,3,4,5,6,7,8,9])
seq2 = set([1,2,3,4,5,10,11,12])
and it worked fine - elements didn`t ever change their position in sets. Where is my mistake?
Thanks.
Updated: Ok, now I have a little bit more complicated task, where offered alghorithm won`t work
I have this dictionary:
precond = {
'seq1': ["1","2"],
'seq2': ["3","4","2"],
'seq3': ["5","4","2"],
'seq4': ["6","7","4","2"],
'seq5': ["6","4","7","2"],
'seq6': ["6","1","8","9","10"],
'seq7': ["6","1","8","11","9","12","13","14"],
'seq8': ["6","1","8","11","4","15","13"],
'seq9': ["6","1","8","16","9","11","4","17","18","2"],
'seq10': ["6","1","8","19","9","4","16","2"],
}
I expect these sequences, containing at least 2 elements:
[1, 2],
[4, 2],
[6, 7],
[6, 4, 7, 2],
[6, 1, 8]
[9,10],
[6,1,8,11]
[9,12,13,14]
[4,15,13]
[16,9,11,4,17,18,2]
[19,9,4,16,2]
Right now I wrote this code:
precond = {
'seq1': ["1","2"],
'seq2': ["3","4","2"],
'seq3': ["5","4","2"],
'seq4': ["6","7","4","2"],
'seq5': ["6","4","7","2"],
'seq6': ["6","1","8","9","10"],
'seq7': ["6","1","8","11","9","12","13","14"],
'seq8': ["6","1","8","11","4","15","13"],
'seq9': ["6","1","8","16","9","11","4","17","18","2"],
'seq10': ["6","1","8","19","9","4","16","2"],
}
seq_list = []
result_seq = []
#d = []
for seq in precond.values():
seq_list.append(seq)
#print(b)
contseq_ind = 0
control_seq = seq_list[contseq_ind]
mainseq_ind = 1
el_ind = 0
#index2 = 0
def compar():
if control_seq[contseq_ind] != seq_list[mainseq_ind][el_ind]:
mainseq_ind += 1
compar()
else:
result_seq.append(control_seq[contseq_ind])
contseq_ind += 1
el_ind += 1
if contseq_ind > len(control_seq):
control_seq = seq_list[contseq_ind + 1]
compar()
else:
compar()
compar()
This code is not complete anyway - I created looking for the same elements from the beginning, so I still need to write a code for searching of sequence in the end of two compared elements.
Right now I have a problem with recursion. Immidiately after first recursed call I have this error:
if control_seq[contseq_ind] != b[mainseq_ind][el_ind]:
UnboundLocalError: local variable 'control_seq' referenced before assignment
How can I fix this? Or maybe you have a better idea, than using recursion? Thank you in advance.
Not sure if this is what you wanted, but it gets the same result:
from collections import OrderedDict
a = {'seq1':["5","4","3","2","1","6","7","8","9"],
'seq2':["9","8","7","6","5","4","3","2","1"],
'seq3':["5","4","3","2","1","11","12","13","14"],
'seq4':["15","16","17"],
'seq5':["18","19","20","21","22","23"],
'seq6':["18","19","20","24","25","26"]}
level = 0
counts = OrderedDict()
# go through each value in the list of values to count the number
# of times it is used and indicate which list it belongs to
for elements in a.values():
for element in elements:
if element in counts:
a,b = counts[element]
counts[element] = a,b+1
else:
counts[element] = (level,1)
level+=1
last = 0
result = []
# now break up the dictionary of unique values into lists according
# to the count of each value and the level that they existed in
for k,v in counts.items():
if v == last:
result[-1].append(k)
else:
result.append([k])
last = v
print(result)
Result:
[['5', '4', '3', '2', '1'],
['6', '7', '8', '9'],
['11', '12', '13', '14'],
['15', '16', '17'],
['18', '19', '20'],
['21', '22', '23'],
['24', '25', '26']]

Python function to 'rotate' grid 90 degrees with for loops [duplicate]

This question already has answers here:
How do you rotate a two dimensional array?
(64 answers)
Closed 5 years ago.
I'm trying to 'rotate' a grid 90 degrees clockwise and came up with the following Python code.
def rotate90(grid):
rotatedGrid = grid[:]
for i in range (0, len(grid)):
for j in range (0, len(grid)):
rotatedGrid[i][j] = grid[-(j+1)][i][:]
return rotatedGrid
printing rotate90(grid) on the grid [['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']] outputs [['7', '4', '7'], ['8', '5', '4'], ['9', '4', '7']], whereas I expected [['7', '4', '1'], ['8', '5', '2'], ['9', '6', '3']]. What is the reason for this difference?
(The reason I haven't converted these to ints is that eventually I will be using '#' and '-' characters rather than numbers.)
Your function doesn't work because you didn't make a new structure when you initialized rotatedGrid. You made a copy of each row, but the elements are pointers to the originals in grid. When you assigned within the loop, you were pointing to shared matrix locations.
Fix it with this:
from copy import deepcopy
def rotate90(grid):
rotatedGrid = deepcopy(grid)
Given that change, your code produces the desired output.
We can easily transpose a list l with zip(*l), then reverse the sublists
def rot_90(l):
return [list(reversed(x)) for x in zip(*l)]
rot_90([['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']])
returns
[['7', '4', '1'], ['8', '5', '2'], ['9', '6', '3']]

Deleting partial duplicates in a list of strings/floats of size >1

I'm currently trying to delete some "partial duplicates" (some positions in a tuple may match) in a list of primarily string inputs. This would be some samples from the list
[['Dave', '1'],
['Dave', '2'],
['Dave', '3'],
['Dave', '4'],
['Jack Jones', '5'],
['Joaquin', '6'],
['Joaquin', '7'],
['Joaquin', '8']]
Now, I'd like to delete every row in this list where the element in the first position matches the proceeding row's first positional element like so:
[['Dave', '1'],
['Jack Jones', '5'],
['Joaquin', '6']]
I have tried going about this as I would when considering an array object with numerical data-but I think I'm misusing objects where I shouldn't be.
def duplicate_remover(x):
dupesremoved = []
for i in range(len(x)):
if x[i] != x[i-1]:
dupesremoved.append(x[i])
You could use itertools.groupby, however the actual approach might differ depending on your actual input format:
it = [
['Dave', '1'],
['Dave', '2'],
['Dave', '3'],
['Dave', '4'],
['Jack Jones', '5'],
['Joaquin', '6'],
['Joaquin', '7'],
['Joaquin', '8'],
]
from itertools import groupby
from operator import itemgetter
for first, group in groupby(it, key=itemgetter(0)):
print(next(group))
which prints:
['Dave', '1']
['Jack Jones', '5']
['Joaquin', '6']

How could i refresh a list once an item has been removed from a list within a list in python

This is quite complicated but i would like to be able to refresh a larger list once at item has been taken out of a mini list within the bigger list.
listA = ['1','2','3','4','5','6','6','8','9','5','3','7']
i used the code below to split it into lists of threes
split = [listA[i:(i+3)] for i in range(0, len(listA) - 1, 3)]
print(split)
# [['1','2','3'],['4','5','6'],['6','8','9'],['5','3','7']]
split = [['1','2','3'],['4','5','6'],['6','8','9'],['5','3','7']]
if i deleted #3 from the first list, split will now be
del split[0][-1]
split = [['1','2'],['4','5','6'],['6','8','9'],['5','3','7']]
after #3 has been deleted, i would like to be able to refresh the list so that it looks like;
split = [['1','2','4'],['5','6','6'],['8','9','5'],['3','7']]
thanks in advance
Not sure how big this list is getting, but you would need to flatten it and recalculate it:
>>> listA = ['1','2','3','4','5','6','6','8','9','5','3','7']
>>> split = [listA[i:(i+3)] for i in range(0, len(listA) - 1, 3)]
>>> split
[['1', '2', '3'], ['4', '5', '6'], ['6', '8', '9'], ['5', '3', '7']]
>>> del split[0][-1]
>>> split
[['1', '2'], ['4', '5', '6'], ['6', '8', '9'], ['5', '3', '7']]
>>> listA = sum(split, []) # <- flatten split list back to 1 level
>>> listA
['1', '2', '4', '5', '6', '6', '8', '9', '5', '3', '7']
>>> split = [listA[i:(i+3)] for i in range(0, len(listA) - 1, 3)]
>>> split
[['1', '2', '4'], ['5', '6', '6'], ['8', '9', '5'], ['3', '7']]
Just recreate the single list from your nested lists, then re-split.
You can join the lists, assuming they are only one level deep, with something like:
rejoined = [element for sublist in split for element in sublist]
There are no doubt fancier ways, or single-liners that use itertools or some other library, but don't overthink it. If you're only talking about a few hundred or even a few thousand items this solution is quite good enough.
I need this for turning of cards in the deck in a solitaire game.
You can deal your cards using itertools.groupby() with a good key function:
def group_key(x, n=3, flag=[0], counter=itertools.count(0)):
if next(counter) % n == 0:
flag[0] = flag[0] ^ 1
return flag[0]
^ is a bitwise operator, basically it change the value of the flag from 0 to 1 and viceversa. The flag value is an element of a list because we're doing some kind of memoization.
Example:
>>> deck = ['1', '2', '3', '4', '5', '6', '6', '8', '9', '5', '3', '7']
>>> for k,g in itertools.groupby(deck, key=group_key):
... print(list(g))
['1', '2', '3']
['4', '5', '6']
['6', '8', '9']
['5', '3', '7']
Now let's say you've used card '9' and '8', so your new deck looks like:
>>> deck = ['1', '2', '3', '4', '5', '6', '6', '5', '3', '7']
>>> for k,g in itertools.groupby(deck, key=group_key):
... print(list(g))
['1', '2', '3']
['4', '5', '6']
['6', '5', '3']
['7']
Build an object that contains a list and tracks when the list is altered (probably by controlling write to it), then have the object do it's own split every time the data is altered and save the split list to a member of the object.

Categories

Resources