I am currently working on a post processing program for which I use a .txt file. This text file contains 4 informations that are repeated 8 times on each line. I created a function to get these informations and store them in lists in the simplest way :
def add_to_lists(line, frequence, phase, in_phase, in_quad) :
# Serie 1 - Even
frequence[0].append(line[3])
in_phase[0].append(line[4])
in_quad[0].append(line[5])
phase[0].append(line[6])
frequence[1].append(line[7])
in_phase[1].append(line[8])
in_quad[1].append(line[9])
phase[1].append(line[10])
frequence[2].append(line[11])
in_phase[2].append(line[12])
in_quad[2].append(line[13])
phase[2].append(line[14])
frequence[3].append(line[15])
in_phase[3].append(line[16])
in_quad[3].append(line[17])
phase[3].append(line[18])
# Serie 2 - Odd
frequence[4].append(line[19])
in_phase[4].append(line[20])
in_quad[4].append(line[21])
phase[4].append(line[22])
frequence[5].append(line[23])
in_phase[5].append(line[24])
in_quad[5].append(line[25])
phase[5].append(line[26])
frequence[6].append(line[27])
in_phase[6].append(line[28])
in_quad[6].append(line[29])
phase[6].append(line[30])
frequence[7].append(line[31])
in_phase[7].append(line[32])
in_quad[7].append(line[33])
phase[7].append(line[34])
This method works fine but I was wondering if there was a more efficient way of filling in those lists.
Instead of popping out which may be dangerous, you can simply use a step in your for loop, and divide the loop index by the step.
def add_to_lists(line, frequence, phase, in_phase, in_quad, step=4) :
for i in range(3, len(line), step):
idx = (i-3) // step
frequence[idx].append(line[i])
in_phase[idx].append(line[i+1])
in_quad[idx].append(line[i+2])
phase[idx].append(line[i+3])
You could chunk your line variable into sublists of length 4. You can simply pip install more-itertools and import chunked from this package.
from more_itertools import chunked
line_chunks = chunked(line[3:], 4)
for i, line_chunk in enumerate(line_chunks):
frequence[i].append(line_chunk[0])
in_phase[i].append(line_chunk[1])
in_quad[i].append(line_chunk[2])
phase[i].append(line_chunk[3])
from itertools import islice
def add_to_list(line, frequence, phase, in_phase, in_quad):
frequence.extend(islice(line, 0, None, 4))
in_phase.extend(islice(line, 1, None, 4))
in_quad.extend(islice(line, 2, None, 4))
phase.extend(islice(line, 3, None, 4))
You could just return a tuple of every thing, like this
def add_to_list(line):
return (list(islice(line, 0, None, 4)),
list(islice(line, 1, None, 4)),
list(islice(line, 2, None, 4)),
list(islice(line, 3, None, 4)))
You could use a generator to create chunks...
def chunked(elements, size, start_index=0, limit=None):
""" Generator creating chunks of given size. """
if limit == 0:
return
for chunk_index, i in enumerate(range(start_index, len(elements), size)):
if limit is None or chunk_index < limit:
yield (chunk_index, elements[i:i+size])
else:
break
def add_to_lists(line, frequence, phase, in_phase, in_quad):
for i, data in chunked(line, 4, 3, 8):
frequence[i].append(data[0])
in_phase[i].append(data[1])
in_quad[i].append(data[2])
phase[i].append(data[3])
I think using a generator, as in this and chatax' answers used, is more readable, reusable and testable. It separates distinct behaviours.
Creating 8 Chunks
Fill Arrays
That said, the generator chunked can easily be tested, e.g. with an unit test.
You could pop the values from the line list (i.e. get the fourth item in this case and remove it)
for i in range(8):
frequence[i].append(line.pop(3))
in_phase[i].append(line.pop(3))
in_quad[i].append(line.pop(3))
phase[i].append(line.pop(3))
Edit: While this works, popping the list undeniably affects it. If this is unintended (or you don't know if you should), stepping with range() is a better option
i am trying to extract a specific line as variable in file.
this is content of my test.txt
#first set
Task Identification Number: 210CT1
Task title: Assignment 1
Weight: 25
fullMark: 100
Description: Program and design and complexity running time.
#second set
Task Identification Number: 210CT2
Task title: Assignment 2
Weight: 25
fullMark: 100
Description: Shortest Path Algorithm
#third set
Task Identification Number: 210CT3
Task title: Final Examination
Weight: 50
fullMark: 100
Description: Close Book Examination
this is my code
with open(home + '\\Desktop\\PADS Assignment\\test.txt', 'r') as mod:
for line in mod:
taskNumber , taskTile , weight, fullMark , desc = line.strip(' ').split(": ")
print(taskNumber)
print(taskTile)
print(weight)
print(fullMark)
print(description)
here is what i'm trying to do:
taskNumber is 210CT1
taskTitle is Assignment 1
weight is 25
fullMark is 100
desc is Program and design and complexity running time
and loop until the third set
but there's an error occurred in the output
ValueError: not enough values to unpack (expected 5, got 2)
Reponse for SwiftsNamesake
i tried out your code . i am still getting an error.
ValueError: too many values to unpack (expected 5)
this is my attempt by using your code
from itertools import zip_longest
def chunks(iterable, n, fillvalue=None):
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
with open(home + '\\Desktop\\PADS Assignment\\210CT.txt', 'r') as mod:
for group in chunks(mod.readlines(), 5+2, fillvalue=''):
# Choose the item after the colon, excluding the extraneous rows
# that don't have one.
# You could probably find a more elegant way of achieving the same thing
l = [item.split(': ')[1].strip() for item in group if ':' in item]
taskNumber , taskTile , weight, fullMark , desc = l
print(taskNumber , taskTile , weight, fullMark , desc, sep='|')
As previously mentioned, you need some sort of chunking. To chunk it usefully we'd also need to ignore the irrelevant lines of the file. I've implemented such a function with some nice Python witchcraft below.
It might also suit you to use a namedtuple to store the values. A namedtuple is a pretty simple type of object, that just stores a number of different values - for example, a point in 2D space might be a namedtuple with an x and a y field. This is the example given in the Python documentation. You should refer to that link for more info on namedtuples and their uses, if you wish. I've taken the liberty of making a Task class with the fields ["number", "title", "weight", "fullMark", "desc"].
As your variables are all properties of a task, using a named tuple might make sense in the interest of brevity and clarity.
Aside from that, I've tried to generally stick to your approach, splitting by the colon. My code produces the output
================================================================================
number is 210CT1
title is Assignment 1
weight is 25
fullMark is 100
desc is Program and design and complexity running time.
================================================================================
number is 210CT2
title is Assignment 2
weight is 25
fullMark is 100
desc is Shortest Path Algorithm
================================================================================
number is 210CT3
title is Final Examination
weight is 50
fullMark is 100
desc is Close Book Examination
which seems to be roughly what you're after - I'm not sure how strict your output requirements are. It should be relatively easy to modify to that end, though.
Here is my code, with some explanatory comments:
from collections import namedtuple
#defines a simple class 'Task' which stores the given properties of a task
Task = namedtuple("Task", ["number", "title", "weight", "fullMark", "desc"])
#chunk a file (or any iterable) into groups of n (as an iterable of n-tuples)
def n_lines(n, read_file):
return zip(*[iter(read_file)] * n)
#used to strip out empty lines and lines beginning with #, as those don't appear to contain any information
def line_is_relevant(line):
return line.strip() and line[0] != '#'
with open("input.txt") as in_file:
#filters the file for relevant lines, and then chunks into 5 lines
for task_lines in n_lines(5, filter(line_is_relevant, in_file)):
#for each line of the task, strip it, split it by the colon and take the second element
#(ie the remainder of the string after the colon), and build a Task from this
task = Task(*(line.strip().split(": ")[1] for line in task_lines))
#just to separate each parsed task
print("=" * 80)
#iterate over the field names and values in the task, and print them
for name, value in task._asdict().items():
print("{} is {}".format(name, value))
You can also reference each field of the Task, like this:
print("The number is {}".format(task.number))
If the namedtuple approach is not desired, feel free to replace the content of the main for loop with
taskNumber, taskTitle, weight, fullMark, desc = (line.strip().split(": ")[1] for line in task_lines)
and then your code will be back to normal.
Some notes on other changes I've made:
filter does what it says on the tin, only iterating over lines that meet the predicate (line_is_relevant(line) is True).
The * in the Task instantiation unpacks the iterator, so each parsed line is an argument to the Task constructor.
The expression (line.strip().split(": ")[1] for line in task_lines) is a generator. This is needed because we're doing multiple lines at once with task_lines, so for each line in our 'chunk' we strip it, split it by the colon and take the second element, which is the value.
The n_lines function works by passing a list of n references to the same iterator to the zip function (documentation). zip then tries to yield the next element from each element of this list, but as each of the n elements is an iterator over the file, zip yields n lines of the file. This continues until the iterator is exhausted.
The line_is_relevant function uses the idea of "truthiness". A more verbose way to implement it might be
def line_is_relevant(line):
return len(line.strip()) > 0 and line[0] != '#'
However, in Python, every object can implicitly be used in boolean logic expressions. An empty string ("") in such an expression acts as False, and a non-empty string acts as True, so conveniently, if line.strip() is empty it will act as False and line_is_relevant will therefore be False. The and operator will also short-circuit if the first operand is falsy, which means the second operand won't be evaluated and therefore, conveniently, the reference to line[0] will not cause an IndexError.
Ok, here's my attempt at a more extended explanation of the n_lines function:
Firstly, the zip function lets you iterate over more than one 'iterable' at once. An iterable is something like a list or a file, that you can go over in a for loop, so the zip function can let you do something like this:
>>> for i in zip(["foo", "bar", "baz"], [1, 4, 9]):
... print(i)
...
('foo', 1)
('bar', 4)
('baz', 9)
The zip function returns a 'tuple' of one element from each list at a time. A tuple is basically a list, except it's immutable, so you can't change it, as zip isn't expecting you to change any of the values it gives you, but to do something with them. A tuple can be used pretty much like a normal list apart from that. Now a useful trick here is using 'unpacking' to separate each of the bits of the tuple, like this:
>>> for a, b in zip(["foo", "bar", "baz"], [1, 4, 9]):
... print("a is {} and b is {}".format(a, b))
...
a is foo and b is 1
a is bar and b is 4
a is baz and b is 9
A simpler unpacking example, which you may have seen before (Python also lets you omit the parentheses () here):
>>> a, b = (1, 2)
>>> a
1
>>> b
2
Although the n-lines function doesn't use this. Now zip can also work with more than one argument - you can zip three, four or as many lists (pretty much) as you like.
>>> for i in zip([1, 2, 3], [0.5, -2, 9], ["cat", "dog", "apple"], "ABC"):
... print(i)
...
(1, 0.5, 'cat', 'A')
(2, -2, 'dog', 'B')
(3, 9, 'apple', 'C')
Now the n_lines function passes *[iter(read_file)] * n to zip. There are a couple of things to cover here - I'll start with the second part. Note that the first * has lower precedence than everything after it, so it is equivalent to *([iter(read_file)] * n). Now, what iter(read_file) does, is constructs an iterator object from read_file by calling iter on it. An iterator is kind of like a list, except you can't index it, like it[0]. All you can do is 'iterate over it', like going over it in a for loop. It then builds a list of length 1 with this iterator as its only element. It then 'multiplies' this list by n.
In Python, using the * operator with a list concatenates it to itself n times. If you think about it, this kind of makes sense as + is the concatenation operator. So, for example,
>>> [1, 2, 3] * 3 == [1, 2, 3] + [1, 2, 3] + [1, 2, 3] == [1, 2, 3, 1, 2, 3, 1, 2, 3]
True
By the way, this uses Python's chained comparison operators - a == b == c is equivalent to a == b and b == c, except b only has to be evaluated once,which shouldn't matter 99% of the time.
Anyway, we now know that the * operator copies a list n times. It also has one more property - it doesn't build any new objects. This can be a bit of a gotcha -
>>> l = [object()] * 3
>>> id(l[0])
139954667810976
>>> id(l[1])
139954667810976
>>> id(l[2])
139954667810976
Here l is three objects - but they're all in reality the same object (you might think of this as three 'pointers' to the same object). If you were to build a list of more complex objects, such as lists, and perform an in place operation like sorting them, it would affect all elements of the list.
>>> l = [ [3, 2, 1] ] * 4
>>> l
[[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1]]
>>> l[0].sort()
>>> l
[[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]
So [iter(read_file)] * n is equivalent to
it = iter(read_file)
l = [it, it, it, it... n times]
Now the very first *, the one with the low precedence, 'unpacks' this, again, but this time doesn't assign it to a variable, but to the arguments of zip. This means zip receives each element of the list as a separate argument, instead of just one argument that is the list. Here is an example of how unpacking works in a simpler case:
>>> def f(a, b):
... print(a + b)
...
>>> f([1, 2]) #doesn't work
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: f() missing 1 required positional argument: 'b'
>>> f(*[1, 2]) #works just like f(1, 2)
3
So in effect, now we have something like
it = iter(read_file)
return zip(it, it, it... n times)
Remember that when you 'iterate' over a file object in a for loop, you iterate over each lines of the file, so when zip tries to 'go over' each of the n objects at once, it draws one line from each object - but because each object is the same iterator, this line is 'consumed' and the next line it draws is the next line from the file. One 'round' of iteration from each of its n arguments yields n lines, which is what we want.
Your line variable gets only Task Identification Number: 210CT1 as its first input. You're trying to extract 5 values from it by splitting it by :, but there are only 2 values there.
What you want is to divide your for loop into 5, read each set as 5 lines, and split each line by :.
The problem here is that you are spliting the lines by : and for each line there is only 1 : so there are 2 values.
In this line:
taskNumber , taskTile , weight, fullMark , desc = line.strip(' ').split(": ")
you are telling it that there are 5 values but it only finds 2 so it gives you an error.
One way to fix this is to run multiple for loops one for each value since you are not allowed to change the format of the file. I would use the first word and sort the data into different
import re
Identification=[]
title=[]
weight=[]
fullmark=[]
Description=[]
with open(home + '\\Desktop\\PADS Assignment\\test.txt', 'r') as mod::
for line in mod:
list_of_line=re.findall(r'\w+', line)
if len(list_of_line)==0:
pass
else:
if list_of_line[0]=='Task':
if list_of_line[1]=='Identification':
Identification.append(line[28:-1])
if list_of_line[1]=='title':
title.append(line[12:-1])
if list_of_line[0]=='Weight':
weight.append(line[8:-1])
if list_of_line[0]=='fullMark':
fullmark.append(line[10:-1])
if list_of_line[0]=='Description':
Description.append(line[13:-1])
print('taskNumber is %s' % Identification[0])
print('taskTitle is %s' % title[0])
print('Weight is %s' % weight[0])
print('fullMark is %s' %fullmark[0])
print('desc is %s' %Description[0])
print('\n')
print('taskNumber is %s' % Identification[1])
print('taskTitle is %s' % title[1])
print('Weight is %s' % weight[1])
print('fullMark is %s' %fullmark[1])
print('desc is %s' %Description[1])
print('\n')
print('taskNumber is %s' % Identification[2])
print('taskTitle is %s' % title[2])
print('Weight is %s' % weight[2])
print('fullMark is %s' %fullmark[2])
print('desc is %s' %Description[2])
print('\n')
of course you can use a loop for the prints but i was too lazy so i copy and pasted :).
IF YOU NEED ANY HELP OR HAVE ANY QUESTIONS PLEASE PLEASE ASK!!!
THIS CODE ASSUMES THAT YOU ARE NOT THAT ADVANCED IN CODING
Good Luck!!!
As another poster (#Cuber) has already stated, you're looping over the lines one-by-one, whereas the data sets are split across five lines. The error message is essentially stating that you're trying to unpack five values when all you have is two. Furthermore, it looks like you're only interested in the value on the right hand side of the colon, so you really only have one value.
There are multiple ways of resolving this issue, but the simplest is probably to group the data into fives (plus the padding, making it seven) and process it in one go.
First we'll define chunks, with which we'll turn this somewhat fiddly process into one elegant loop (from the itertools docs).
from itertools import zip_longest
def chunks(iterable, n, fillvalue=None):
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
Now, we'll use it with your data. I've omitted the file boilerplate.
for group in chunks(mod.readlines(), 5+2, fillvalue=''):
# Choose the item after the colon, excluding the extraneous rows
# that don't have one.
# You could probably find a more elegant way of achieving the same thing
l = [item.split(': ')[1].strip() for item in group if ':' in item]
taskNumber , taskTile , weight, fullMark , desc = l
print(taskNumber , taskTile , weight, fullMark , desc, sep='|')
The 2 in 5+2 is for the padding (the comment above and the empty line below).
The implementation of chunks may not make sense to you at the moment. If so, I'd suggest looking into Python generators (and the itertools documentation in particular, which is a marvellous resource). It's also a good idea to get your hands dirty and tinker with snippets inside the Python REPL.
You can still read in lines one by one, but you will have to help the code understand what it's parsing. We can use an OrderedDict to lookup the appropriate variable name.
import os
import collections as ct
def printer(dict_, lookup):
for k, v in lookup.items():
print("{} is {}".format(v, dict_[k]))
print()
names = ct.OrderedDict([
("Task Identification Number", "taskNumber"),
("Task title", "taskTitle"),
("Weight", "weight"),
("fullMark","fullMark"),
("Description", "desc"),
])
filepath = home + '\\Desktop\\PADS Assignment\\test.txt'
with open(filepath, "r") as f:
for line in f.readlines():
line = line.strip()
if line.startswith("#"):
header = line
d = {}
continue
elif line:
k, v = line.split(":")
d[k] = v.strip(" ")
else:
printer(d, names)
printer(d, names)
Output
taskNumber is 210CT3
taskTitle is Final Examination
weight is 50
fullMark is 100
desc is Close Book Examination
taskNumber is 210CT1
taskTitle is Assignment 1
weight is 25
fullMark is 100
desc is Program and design and complexity running time.
taskNumber is 210CT2
taskTitle is Assignment 2
weight is 25
fullMark is 100
desc is Shortest Path Algorithm
You're trying to get more data than is present on one line; the five pieces of data are on separate lines.
As SwiftsNamesake suggested, you can use itertools to group the lines:
import itertools
def keyfunc(line):
# Ignores comments in the data file.
if len(line) > 0 and line[0] == "#":
return True
# The separator is an empty line between the data sets, so it returns
# true when it finds this line.
return line == "\n"
with open(home + '\\Desktop\\PADS Assignment\\test.txt', 'r') as mod:
for k, g in itertools.groupby(mod, keyfunc):
if not k: # Does not process lines that are separators.
for line in g:
data = line.strip().partition(": ")
print(f"{data[0] is {data[2]}")
# print(data[0] + " is " + data[2]) # If python < 3.6
print("") # Prints a newline to separate groups at the end of each group.
If you want to use the data in other functions, output it as a dictionary from a generator:
from collections import OrderedDict
import itertools
def isSeparator(line):
# Ignores comments in the data file.
if len(line) > 0 and line[0] == "#":
return True
# The separator is an empty line between the data sets, so it returns
# true when it finds this line.
return line == "\n"
def parseData(data):
for line in data:
k, s, v = line.strip().partition(": ")
yield k, v
def readData(filePath):
with open(filePath, "r") as mod:
for key, g in itertools.groupby(mod, isSeparator):
if not key: # Does not process lines that are separators.
yield OrderedDict((k, v) for k, v in parseData(g))
def printData(data):
for d in data:
for k, v in d.items():
print(f"{k} is {v}")
# print(k + " is " + v) # If python < 3.6
print("") # Prints a newline to separate groups at the end of each group.
data = readData(home + '\\Desktop\\PADS Assignment\\test.txt')
printData(data)
Inspired by itertools-related solutions, here is another using the more_itertools.grouper tool from the more-itertools library. It behaves similarly to #SwiftsNamesake's chunks function.
import collections as ct
import more_itertools as mit
names = dict([
("Task Identification Number", "taskNumber"),
("Task title", "taskTitle"),
("Weight", "weight"),
("fullMark","fullMark"),
("Description", "desc"),
])
filepath = home + '\\Desktop\\PADS Assignment\\test.txt'
with open(filepath, "r") as f:
lines = (line.strip() for line in f.readlines())
for group in mit.grouper(7, lines):
for line in group[1:]:
if not line: continue
k, v = line.split(":")
print("{} is {}".format(names[k], v.strip()))
print()
Output
taskNumber is 210CT1
taskTitle is Assignment 1
weight is 25
fullMark is 100
desc is Program and design and complexity running time.
taskNumber is 210CT2
taskTitle is Assignment 2
weight is 25
fullMark is 100
desc is Shortest Path Algorithm
taskNumber is 210CT3
taskTitle is Final Examination
weight is 50
fullMark is 100
desc is Close Book Examination
Care was taken to print the variable name with the corresponding value.
My goal for the program is the following:
Given any shape (represented as enumerated points and their connections to other points), return a list containg all possible paths (as strings/lists/...). A path is a 'drawing' of the given shape, in which:
no connection has been used more than once and
the 'pen' hasn't been lifted (example included below).
The following code is essentially what I've come up with so far. It's not the code of the actual program, but the basic semantics are the same (i.e. if this code will work, my program will work too).
"""
Example used:
2
/ \
/ \
/ \
1-------3
"""
from copy import deepcopy
points = {1: [2,3],
2: [1,3],
3: [1,2]}
def find_paths(prev_point, points):
for current_point in points[prev_point]:
points[current_point].remove(prev_point)
points[prev_point].remove(current_point)
return [prev_point] + find_paths(current_point, points)
return [prev_point]
def collect(points):
results = []
for first_point in points:
result = find_paths(first_point, deepcopy(points))
results.append(result)
return results
print(collect(points))
My struggle has been to make it return all paths. As of now, it lists only 3 (out of 6). I do understand that the issue arises from the for-loop in f being executed exactly once each time it is called (and it's being called 3 times), since the execution is terminated by return each time. However, I have up until now failed to find a way to avoid this - I played around with making f a generator but this has given me a list of generators as the end result, no matter how I tried to change it.
Any help is appreciated!
EDIT: The generator-version I had simply replaced the returns in find_paths with yield s.
So the last two lines look like:
...
yield [prev_point] + find_paths(current_point, points)
yield [prev_point]
Additionally, I played around with a 'flattener' for generators, but it didn't work at all:
def flatten(x):
if callable(x):
for i in x:
yield flatten(i)
yield x
def func():
yield 1
lis = [1,2,func]
for f in flatten(lis):
print(f)
I think the following works. I based it off of your original code, but did a few things (some necessary, some not):
Rename parameters in find_paths to make more sense for me. We are working with the current_point not the previous_point, etc.
Add an end condition to stop recursion.
Make a copy of points for every possible path being generated and return (yield) each one of those possible paths. Your original code didn't have logic for this since it only expected one result per call to find_paths, but that doesn't really make sense when using recursion like this. I also extend my final result for the same reason.
Here is the code:
from copy import deepcopy
points = {1: [2,3],
2: [1,3],
3: [1,2]}
def find_paths(current_point, points):
if len(points[current_point]) == 0:
# End condition: have we found a complete path? Then yield
if all(not v for v in points.values()):
yield [current_point]
else:
for next_point in points[current_point]:
new_points = deepcopy(points)
new_points[current_point].remove(next_point)
new_points[next_point].remove(current_point)
paths = find_paths(next_point, new_points)
for path in paths:
yield [current_point] + path
def collect(points):
results = []
for first_point in points:
result = find_paths(first_point, points)
results.extend(list(result))
return results
print(collect(points))
Results in:
[1, 2, 3, 1]
[1, 3, 2, 1]
[2, 1, 3, 2]
[2, 3, 1, 2]
[3, 1, 2, 3]
[3, 2, 1, 3]
Your original example image should work with the following:
points = {
1: [2,3],
2: [1,3,4,5],
3: [1,2,4,5],
4: [2,3,5],
5: [2,3,4],
}
Edit: Removed the extra deepcopy I had in collect.
It is necessary to copy the points every time because you are "saving" the current state of the current path you are "drawing". If you didn't copy it then going down the path to node 2 would change the state of the points when going down the path to node 3.
The code is passed an array. My understanding is this passing is done by reference. I want the function to recursively divide the last remaining half of the list in two and set each value that it was split at to zero.
The change to zero is happens in the array but when I call print a at the end I get the original array.
What am i doing wrong?
a = range(10)
def listreduction(array):
if len(array) == 1:
array[0] = 0
return
split = len(array)/2
array[split] = 0
return listreduction(array[split:])
listreduction(a)
print a
The current output is
[0, 1, 2, 3, 4, 0, 6, 7, 8, 9]
The should be more zeros to the right of the second one
A slice creates a new list. If you want to do this recursively, you'll have to pass the index where the function is supposed to work on the list, not the actual slice.
This is probably what you want.
a = range(1, 10)
def list_reduction(l, prev_split_pos=None):
split_pos = (len(l) + prev_split_pos) / 2 if prev_split_pos else len(l) / 2
if split_pos == prev_split_pos:
return
l[split_pos] = 0
return list_reduction(l, split_pos)
list_reduction(a)
print a
So, to your code. Everytime you do a list slice, you actually generate a new list, which is not at all connected to the old one. This is why you don't see any mutations to it except the first one.
Since you use recursion, the slice operation in the argument will create new list instance which is different than you instance. That's the reason.
You can change your code as following:
a = range(10)
def list_reduction(array, position=0):
if len(array) -1 <= position:
return
split = position + (len(array) - position) / 2
array[split] = 0
return list_reduction(array, split)
list_reduction(a)
print a
The output is:
[0, 1, 2, 3, 4, 0, 6, 0, 0, 0]
In Python argumnet passing is different from other conventional programming language. arguments are passed by object reference. And the whether the referred object will be modifed or not it depends on two things
Whether the variable is mutable or immutable. In your case range will create a list so its a mutable object. That implies that if you update array it should also update a.
Operation. = operation will always create new object reference. So even in your case array is mutable but since you are doing assignment operation It will create a new object reference.
Following example should clear up the things for you.
Example 1
>>> a = [1,2]
def fun1(array):
array= array + [3]
print "var array = %s" %array
fun1(a)
print "var a = %s" %a
Output
var array = [1, 2, 3]
var a = [1, 2]
Example 2
a = [1,2]
def fun1(array):
array.append(3)
print "var array = %s" %array
fun1(a)
print "var a = %s" %a
Output
var array = [1, 2, 3]
var a = [1, 2,3]
I usually don't write my Python code in the best way since I'm relatively new to it, someone requested that I make changes to a Django app since the code doesn't look so nice.
Here's what it looks like:
#login_required
def submission_set_rank(request):
r1_obj_id = request.GET.get('rank1','')
r2_obj_id = request.GET.get('rank2','')
r3_obj_id = request.GET.get('rank3','')
r4_obj_id = request.GET.get('rank4','')
r5_obj_id = request.GET.get('rank5','')
#rate the first BallotStats object
ballot_1 = BallotStats.objects.get(object_id=r1_obj_id)
ballot_2 = BallotStats.objects.get(object_id=r2_obj_id)
ballot_3 = BallotStats.objects.get(object_id=r3_obj_id)
ballot_4 = BallotStats.objects.get(object_id=r4_obj_id)
ballot_5 = BallotStats.objects.get(object_id=r5_obj_id)
ballot_1.score += 5
ballot_2.score += 4
ballot_3.score += 3
ballot_4.score += 2
ballot_5.score += 1
ballot_1.save()
ballot_2.save()
ballot_3.save()
ballot_4.save()
ballot_5.save()
return HttpResponseRedirect('/submissions/results/film/')
As it turns out I realized that I've always been writing my Python code this way, is there a way to make it look better instead of taking up 21+ lines of code?
The biggest problem is not the style of the code - it is that you are making 10 queries: 5 for getting the objects and 5 for updating them.
Filter out objects using __in at once:
#login_required
def submission_set_rank(request):
keys = {'rank1': 5, 'rank2': 4, 'rank3': 3, 'rank4': 2, 'rank5': 1}
ranks = [request.GET.get(key,'') for key in keys]
for ballot in BallotStats.objects.filter(object_id__in=ranks):
ballot.score += keys[ballot.object_id]
ballot.save()
return HttpResponseRedirect('/submissions/results/film/')
This will make 6 queries at most: 1 for getting the objects and 5 for updating them.
Also, you can "mark" the view with the commit_manually decorator (commit_on_success would also work for you). It should speed up things significantly:
#login_required
#transaction.commit_manually
def submission_set_rank(request):
keys = {'rank1': 5, 'rank2': 4, 'rank3': 3, 'rank4': 2, 'rank5': 1}
ranks = [request.GET.get(key,'') for key in keys]
for ballot in BallotStats.objects.filter(object_id__in=ranks):
ballot.score += keys[ballot.object_id]
ballot.save()
transaction.commit()
return HttpResponseRedirect('/submissions/results/film/')
And I have the strong feeling that you can do this in even a single update query. For example, by using connection.cursor() directly with the help of executemany():
#login_required
def submission_set_rank(request):
keys = {'rank1': 5, 'rank2': 4, 'rank3': 3, 'rank4': 2, 'rank5': 1}
ranks = [{'score': request.GET.get(key,''), 'id': key} for key in keys]
cursor = connection.cursor()
cursor.executemany("""
UPDATE
ballot_stats
SET
score = score + %(score)s
WHERE
object_id = %(id)s
""", ranks)
return HttpResponseRedirect('/submissions/results/film/')
Make sure the field and table names are correct.
In your case, a little bit of looping wouldn't hurt at all. In fact, as a general rule, whenever you have to repeat something more than twice, try to make it a loop.
n = 5
for i in range(1, n+1):
obj_id = request.GET('rank' + str(i), '')
ballot = BallotStats.objects.get(object_id=obj_id)
ballot.score += n - i + 1
ballot.save()
If we're talking about saving lines of code, you can combine the 4 lines into one line, by replacing your .save() with a .update() and using an F() expression to take care of the +=. Also, as discussed by #alecxe, this will cut your queries in half. It'd look like this:
#login_required
def submission_set_rank(request):
BallotStats.objects.filter(object_id=request.GET.get('rank1','')).update(score=F('score') + 5)
BallotStats.objects.filter(object_id=request.GET.get('rank2','')).update(score=F('score') + 4)
BallotStats.objects.filter(object_id=request.GET.get('rank3','')).update(score=F('score') + 3)
BallotStats.objects.filter(object_id=request.GET.get('rank4','')).update(score=F('score') + 2)
BallotStats.objects.filter(object_id=request.GET.get('rank5','')).update(score=F('score') + 1)
return HttpResponseRedirect('/submissions/results/film/')