Fixing array indices in Python - python

I'd like to have arrays that start from say an index of 4 and go to 9. I'm not interested in creating memory space for < 4, so how is best to proceed? My 2D code is as follows:
arr = [[ 0 for row in range(2)] for col in range(1, 129)]
>>> arr[0][0] = 1
>>> arr[128][0] = 1
Traceback (most recent call last):
File "<stdin>", line 1, in ?
IndexError: list index out of range
>>> arr[127][0] = 1
How can selectively just use the specific range i.e. where the last index runs from 1 to 128 inclusive not 0 to 127. This maybe obvious, but is there a way to do this?
Thanks for the suggestion for dicts, I have been avoiding these - I know - much of the code I'm converting is from C, but I think dictionaries might the saviour. Is there a way to do what I am asking with arrays?

For sparse arrays, use a dict:
sparseArray = {}
sparseArray[(0,0)] = 1
sparseArray[(128,128)] = 1
print sparseArray # Show the content of the sparse array
print sparseArray.keys() # Get all used indices.

You can simply emulate a list:
class OffsetList(object):
def __init__(self, offset=4):
self._offset = offset
self._lst = []
def __len__(self):
return len(self._lst)
def __getitem__(self, key):
return self._lst[key - self._offset]
def __setitem__(self, key, val):
self._lst[key - self._offset] = val
def __delitem__(self, key):
del self._lst[key - self._offset]
def __iter__(self):
return iter(self._lst)
def __contains__(self, item):
return item in self._lst
# All other methods go to the backing list.
def __getattr__(self, a):
return getattr(self._lst, a)
# Test it like this:
ol = OffsetList(4)
ol.append(2)
assert ol[4] == 2
assert len(ol) == 1

You have two options here. You can use sparse lists, or you can create a container type that basically has a normal list and a start index, such that when you request
specialist.get(4)
you actually get
specialist.innerlist[4 - startidx]

If you really wanted list semantics and all, I suppose you could do
class OffsetyList(list):
def __init__(self, *args, **kwargs):
list.__init__(self, *args)
self._offset = int(kwargs.get("offset", 0))
def __getitem__(self, idx):
return list.__getitem__(self, idx + self._offset)
def __setitem__(self, idx, value):
list.__setitem__(self, idx + self._offset, value)
# Implementing the rest of the class
# is left as an exercise for the reader.
ol = OffsetyList(offset = -5)
ol.extend(("foo", "bar", "baz"))
print ol[5], ol[7], ol[6]
but this seems very fragile to say the least.

Related

Create an insert and pop method on list class

I need to implement an insert method (insert(self, index, val)), that inserts val before index, and a pop method (pop(self)), that removes the last element from mylist, onto the MyList class. The behavior should be identical to the methods already available in python.
Note: For the insert method, similarly with the append method already done, the capacity of the array should be doubled if there is no room for an additional element. The pop method should return the element removed from the list, and put None
in its place in the array. If pop was called on an empty list, an IndexError
exception should be raised.
My code thus far:
import ctypes # provides low-level arrays
def make_array(n):
return (n * ctypes.py_object)()
class MyList:
def __init__(self):
self.data = make_array(1)
self.capacity = 1
self.n = 0
def __len__(self):
return self.n
def append(self, val):
if(self.n == self.capacity):
self.resize(2 * self.capacity)
self.data[self.n] = val
self.n += 1
def resize(self, new_size):
new_array = make_array(new_size)
for ind in range(self.n):
new_array[ind] = self.data[ind]
self.data = new_array
self.capacity = new_size
def extend(self, other):
for elem in other:
self.append(elem)
def __getitem__(self, ind):
if not(0 <= ind <= self.n - 1):
raise IndexError('MyList index is out of range')
return self.data[ind]
def __setitem__(self, ind, val):
if not(0 <= ind <= self.n - 1):
raise IndexError('MyList index is out of range')
self.data[ind] = val
mylst1 = MyList()
for i in range(5):
mylst1.append(i)

Where is the value of objects like lists,ints, and strings stored?

Alright so all lists have __setitem__ and __getitem__ and ints have __add__ __sub__ and such to operate on their value. But where is that value actually stored / how can I reference it? Say I want to make a class imitating an list. It might look something like this
class Memory(object):
def __init__(self):
self.data = []
def __getitem__(self, i):
return self.data[i]
def __setitem__(self, key, item):
self.data[key] = item
This isn't very efficient, and I'd have to most likely write every single method of the class individually, which can span hundreds of lines with multiple classes.
The next best solution to create the class being a child of a list like:
class Memory(list):
...
But you can't edit any of its methods because you can't reference its value. If you changes its __setitem__()
What I was wanting to do with this is to create a list class so I can set the list's and do other operations values all on one lambda. I can't just simply directly call __setitem__(key,item) because you can't input a key outside of the range of the items already present in the list. How would I be able to edit the list's value without calling its __setitem__() method.
I realize this doesn't directly answer your question about "where is the value ... stored", and I'm not sure what you mean by "I can't just simply directly call __setitem__(key,item) because you can't input a key outside of the range of the items already present in the list," but if I understand you correctly, you just have to "fill in" values in between the value you want to set and the current length.
For example, I have a utility class that I sometimes use to do this. (See the test cases at the bottom for an idea of how I use it.)
class DynamicArray(list):
''' Just like a normal list except you can skip indices when you fill it. '''
def __init__(self, defaultVal = None, startingCapacity = 0):
super(DynamicArray, self).__init__()
self.defaultVal = defaultVal
if (startingCapacity > 0):
self += [ defaultVal ] * startingCapacity
def insert(self, ind, val):
if (ind > len(self)):
for i in xrange(len(self), ind):
self.append(self.defaultVal)
super(DynamicArray, self).insert(ind, val)
def set(self, ind, val):
self[ind] = val
def __setitem__(self, ind, val):
if (ind >= len(self)):
for i in xrange(len(self), ind + 1):
self.append(self.defaultVal)
super(DynamicArray, self).__setitem__(ind, val)
if __name__ == "__main__":
a = DynamicArray()
assert(len(a) == 0)
a[3] = 2
assert(a[3] == 2)
assert(a[0] is None and a[1] is None and a[2] is None)
assert(len(a) == 4)
a[1] = 1
assert(a[1] == 1)
assert(a[3] == 2)
assert(a[0] is None and a[2] is None)
assert(len(a) == 4)
a[5] = 7
assert(a[5] == 7)
assert(a[3] == 2)
assert(a[1] == 1)
assert(a[0] is None and a[2] is None)
assert(len(a) == 6)

Generating 3 billion-th element of a sequence in python

I am trying to generate the following sequence:
011212201220200112 ... constructed as follows: first is 0,
then repeated the following action:
already written part is attributed to the right with replacement
0 to 1, 1 to 2, 2 to 0.
E.g.
0 -> 01 -> 0112 -> 01121220 -> ...
I am trying to find the 3 billion-th element of this sequence.
I realized that the sequence grows exponentially and hence derived that:
log(base2) (3 billion) ~ 32
So I just need to generate this sequence 32 times.
Here is what I tried in python:
import os
import sys
s=['0']
num_dict = {'0':'1' , '1':'2' , '2':'0'}
def mapper(b):
return num_dict[b]
def gen(s):
while True:
yield s
s.extend( map(mapper,s) )
a = gen(s)
for i in xrange(32):
a.next()
print a.next()[3000000000 - 1]
The problem is my RAM gets filled up before hitting the 3 billion mark.
Is there a better way to do this problem ?
EDIT: This program could crash your machine.Please try for xrange(25) for testing purposes
There are enough hints in the comments that you should be able to find the one-line solution. I think that it's more interesting to try to derive it with a more general tool, namely, implicit data structures. Here's a class for singleton lists.
class Singleton:
def __init__(self, x):
self.x = x
def __getitem__(self, i):
if not isinstance(i, int): raise TypeError(i)
elif not (0 <= i < len(self)): raise IndexError(i)
else: return self.x
def __len__(self): return 1
We can use this class like so.
>>> lst = Singleton(42)
>>> lst[0]
42
>>> len(lst)
1
Now we define a concatenation class and a mapper class, where the latter takes a function and implicitly applies it to each list element.
class Concatenation:
def __init__(self, lst1, lst2):
self.lst1 = lst1
self.lst2 = lst2
self.cachedlen = len(lst1) + len(lst2)
def __getitem__(self, i):
if not isinstance(i, int): raise TypeError(i)
elif not (0 <= i < len(self)): raise IndexError(i)
elif i < len(self.lst1): return self.lst1[i]
else: return self.lst2[i - len(self.lst1)]
def __len__(self): return self.cachedlen
class Mapper:
def __init__(self, f, lst):
self.f = f
self.lst = lst
def __getitem__(self, i): return self.f(self.lst[i])
def __len__(self): return len(self.lst)
Now let's rewrite your code to use these classes.
a = Singleton(0)
for i in range(32):
a = Concatenation(a, Mapper({0: 1, 1: 2, 2: 0}.get, a))
print(a[3000000000 - 1])
As an exercise: why do we need cachedlen?

Python/sage: can lists start at index 1?

I've downloaded from a supposedly serious source a sage script. It doesn't work on my computer, and a quick debugging showed that a problem came from the fact that at some point, the authors were doing as if a n-element list was numbered from 1 to n (whereas the “normal” numbering in Python and (thus) sage is 0..n-1).
What am I missing? Is there a global variable hidden somewhere that changes this convention, like in APL?
Thanks for your help (I hope my question is clear despite my feeble grasp of both English and CSish...)
Python (and therefore sage) lists are always numbered from 0, and there isn't a way to change that.
Looking at CPython's source, in http://hg.python.org/cpython/file/70274d53c1dd/Objects/listobject.c on line 449:
static PyObject *
list_item(PyListObject *a, Py_ssize_t i)
{
if (i < 0 || i >= Py_SIZE(a)) {
if (indexerr == NULL) {
indexerr = PyString_FromString(
"list index out of range");
if (indexerr == NULL)
return NULL;
}
PyErr_SetObject(PyExc_IndexError, indexerr);
return NULL;
}
Py_INCREF(a->ob_item[i]);
return a->ob_item[i];
}
The item lookup delegates straight to the underlying C array, and C arrays are always zero-based. So Python lists are always zero-based as well.
A simple class that shifts the index for you provides a clean interface to something reusable.
class Array(object):
def __init__(self, items: list) -> None:
self.items = items
def __repr__(self) -> str:
return '{}({})'.format(self.__class__.__name__, self.items)
def __len__(self) -> int:
return len(self.items)
def __contains__(self, item: any) -> bool:
return item in self.items
def __getitem__(self, key: int) -> any:
return self.items[key - 1]
def __setitem__(self, key: int, value: any) -> None:
self.items[key - 1] = value
def __delitem__(self, key: int) -> None:
del self.items[key - 1]
Well I too was facing the same idea on how to implement the method of indexing to be start from 1.
I wanted to implement the Insertion Sort Algorithm which is as follows:
As we already know python list start from 0, what I did was following:
A = ['dummy',5,2,6,4,1,3]
for j in range(2,len(A)):
key = A[j]
i=j-1
while i>0 and A[i]>key:
A[i+1] = A[i]
i = i-1
A[i+1] = key
A.pop(0)
print A
I Just added a 'Dummy' in index 0, did all the work like in Algorithm and removed the 'dummy' again. This was just a cheating method.
I would suggest subclassing e.g. collections.abc.MutableSequence for something like this, because once the protocol (in this case: __getitem__, __setitem__, __delitem__, __len__, insert) is implemented all list methods should work on the custom sequence type.
The solution I came up with uses collections.abc.MutableSequence with a list wrapper (_lst) and a helper class component that doesn't know much about anything except that it is subscriptable, i.e. it implements __getitem__ which handles the index modification.
import collections.abc
class _IndexComponent:
def __getitem__(self, index):
if index == 0: raise IndexError("Index 0 is a lie.")
if index > 0: return index -1
else: return index
class OneList(collections.abc.MutableSequence):
def __init__(self, init: list = None) -> None:
self._index_comp = _IndexComponent()
self._lst = []
if not init is None: # and isinstance(init, list)?
self._lst.extend(init)
def __getitem__(self, index: int) -> any:
return self._lst[self._index_comp[index]]
def __setitem__(self, index: int, val: any) -> None:
self._lst[self._index_comp] = val
def __delitem__(self, index: int) -> None:
del self._lst[self._index_comp[index]]
def __len__(self) -> int:
return len(self._lst)
def insert(self, index: int, val: any) -> None:
self._lst.insert(self._index_comp[index], val)
def __repr__(self) -> str:
return f"{self._lst}"
Now for example pop works although it isn't explicitly implemented:
ol = OneList([1,2,3,4])
print(ol.pop(1))
ol.pop(0) # IndexError
Somehow this feels kind of messy though, I would appriciate if someone shared a better solution.
l = []
l.extend([])
print(l)
In [1]: index_0 = ['foo', 'bar', 'quux']
In [2]: index_1 = [None] + index_0
In [3]: index_1[1]
Out[3]: 'foo'
In [4]: index_1[1:]
Out[4]: ['foo', 'bar', 'quux']

Multiple levels of 'collection.defaultdict' in Python

Thanks to some great folks on SO, I discovered the possibilities offered by collections.defaultdict, notably in readability and speed. I have put them to use with success.
Now I would like to implement three levels of dictionaries, the two top ones being defaultdict and the lowest one being int. I don't find the appropriate way to do this. Here is my attempt:
from collections import defaultdict
d = defaultdict(defaultdict)
a = [("key1", {"a1":22, "a2":33}),
("key2", {"a1":32, "a2":55}),
("key3", {"a1":43, "a2":44})]
for i in a:
d[i[0]] = i[1]
Now this works, but the following, which is the desired behavior, doesn't:
d["key4"]["a1"] + 1
I suspect that I should have declared somewhere that the second level defaultdict is of type int, but I didn't find where or how to do so.
The reason I am using defaultdict in the first place is to avoid having to initialize the dictionary for each new key.
Any more elegant suggestion?
Thanks pythoneers!
Use:
from collections import defaultdict
d = defaultdict(lambda: defaultdict(int))
This will create a new defaultdict(int) whenever a new key is accessed in d.
Another way to make a pickleable, nested defaultdict is to use a partial object instead of a lambda:
from functools import partial
...
d = defaultdict(partial(defaultdict, int))
This will work because the defaultdict class is globally accessible at the module level:
"You can't pickle a partial object unless the function [or in this
case, class] it wraps is globally accessible ... under its __name__
(within its __module__)"
-- Pickling wrapped partial functions
Look at nosklo's answer here for a more general solution.
class AutoVivification(dict):
"""Implementation of perl's autovivification feature."""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
Testing:
a = AutoVivification()
a[1][2][3] = 4
a[1][3][3] = 5
a[1][2]['test'] = 6
print a
Output:
{1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}
As per #rschwieb's request for D['key'] += 1, we can expand on previous by overriding addition by defining __add__ method, to make this behave more like a collections.Counter()
First __missing__ will be called to create a new empty value, which will be passed into __add__. We test the value, counting on empty values to be False.
See emulating numeric types for more information on overriding.
from numbers import Number
class autovivify(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
def __add__(self, x):
""" override addition for numeric types when self is empty """
if not self and isinstance(x, Number):
return x
raise ValueError
def __sub__(self, x):
if not self and isinstance(x, Number):
return -1 * x
raise ValueError
Examples:
>>> import autovivify
>>> a = autovivify.autovivify()
>>> a
{}
>>> a[2]
{}
>>> a
{2: {}}
>>> a[4] += 1
>>> a[5][3][2] -= 1
>>> a
{2: {}, 4: 1, 5: {3: {2: -1}}}
Rather than checking argument is a Number (very non-python, amirite!) we could just provide a default 0 value and then attempt the operation:
class av2(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
def __add__(self, x):
""" override addition when self is empty """
if not self:
return 0 + x
raise ValueError
def __sub__(self, x):
""" override subtraction when self is empty """
if not self:
return 0 - x
raise ValueError
Late to the party, but for arbitrary depth I just found myself doing something like this:
from collections import defaultdict
class DeepDict(defaultdict):
def __call__(self):
return DeepDict(self.default_factory)
The trick here is basically to make the DeepDict instance itself a valid factory for constructing missing values. Now we can do things like
dd = DeepDict(DeepDict(list))
dd[1][2].extend([3,4])
sum(dd[1][2]) # 7
ddd = DeepDict(DeepDict(DeepDict(list)))
ddd[1][2][3].extend([4,5])
sum(ddd[1][2][3]) # 9
def _sub_getitem(self, k):
try:
# sub.__class__.__bases__[0]
real_val = self.__class__.mro()[-2].__getitem__(self, k)
val = '' if real_val is None else real_val
except Exception:
val = ''
real_val = None
# isinstance(Avoid,dict)也是true,会一直递归死
if type(val) in (dict, list, str, tuple):
val = type('Avoid', (type(val),), {'__getitem__': _sub_getitem, 'pop': _sub_pop})(val)
# 重新赋值当前字典键为返回值,当对其赋值时可回溯
if all([real_val is not None, isinstance(self, (dict, list)), type(k) is not slice]):
self[k] = val
return val
def _sub_pop(self, k=-1):
try:
val = self.__class__.mro()[-2].pop(self, k)
val = '' if val is None else val
except Exception:
val = ''
if type(val) in (dict, list, str, tuple):
val = type('Avoid', (type(val),), {'__getitem__': _sub_getitem, 'pop': _sub_pop})(val)
return val
class DefaultDict(dict):
def __getitem__(self, k):
return _sub_getitem(self, k)
def pop(self, k):
return _sub_pop(self, k)
In[8]: d=DefaultDict()
In[9]: d['a']['b']['c']['d']
Out[9]: ''
In[10]: d['a']="ggggggg"
In[11]: d['a']
Out[11]: 'ggggggg'
In[12]: d['a']['pp']
Out[12]: ''
No errors again.
No matter how many levels nested.
pop no error also
dd=DefaultDict({"1":333333})

Categories

Resources