How to deal with large csv file in python? - python

I have CSV file that contains a data of 40k rows.
My each function open csv file and works with it and then close it.
Is there a way that I can open the file once and then close it and I can work with it whenever I want? I tried to put each field in a separate list and work whit it whenever I call it or in dictionary but both methods works good up to 1k row if more then it takes long time to processes it, I found a way to speed up by filtering them, but not sure how to apply it.
sample of my codes.
files=open("myfile.csv","r")
def spec_total():
total = 0.0
files.readline() # skip first row
for line in files:
field=line.strip().split(",") #make Into fields
tall=float(field[0])
if tall >= 9.956:
total +=tall
print("The sum is: %0.5f" % (total))
spec_total()
files.close()
other function
files=open("3124749c.csv","r")
def code():
match= 0
files.readline() # skip first row
for row in files:
field=row.strip().split(",") #make Into fields
code=(field[4])
import re
if re.search(r'\[[A-Za-z][0-9]+\][0-9]+[A-Za-z]{2}[0-9]+#[0-9]+', code) is None:
match += 1
print("The answer that do not match code is :",match)
code()
files.close()
and there is plenty more functions that opens each time the csv file and split them into field in order to recognise which field I am referring to.

If I understand correctly try:
import csv
total = 0.0
for row in csv.reader(open("myfile.csv")):
tall = float(row[0])
if tall >= 9.956:
total += tall
print("The sum is: %0.5f" % total)
More complex version - create calculation classes for processing each row.
class Calc(object):
def process(self,row):
pass
def value(self):
pass
class SumColumn(Calc):
def __init__(self, column=0,tall=9.956):
self.column = column
self.total = 0
def process(self, row):
tall = float(row[0])
if tall >= self.tall:
self.total += tall
def value(self):
return self.total
class ColumnAdder(Calc):
def __init__(self, col1, col2):
self.total = 0
self.col1 = col1
self.col2 = col2
def process(self, row):
self.total += (row[self.col1] + row[self.col2])
def value(self):
return self.total
class ColumnMatcher(Calc):
def __init__(self, col=4):
self.matches = 0
def process(self, row):
code = row[4]
import re
if re.search(r'\[[A-Za-z][0-9]+\][0-9]+[A-Za-z]{2}[0-9]+#[0-9]+', code) is None:
self.match += 1
def value(self):
return self.matches
import csv
col0_sum = SumColumn()
col3_sum = SumColumn(3, 2.45)
col5_6_add = ColumnAdder(5,6)
col4_matches = ColumnMatcher()
for row in csv.reader(open("myfile.csv")):
col0_sum.process(row)
col3_sum.process(row)
col5_6_add.process(row)
col4_matches.process(row)
print col0_sum.value()
print col3_sum.value()
print col5_6_add.value()
print col4_matches.value()
This code was typed into SO, which was a tedious affair - so bare with on syntax etc.
For illustration purposes only - not to be taken too literally.

All is object in Python: that means functions too.
So there is no need to define special classes to craft functions as instances of these classes as sotapme does, since every function we define is already an object in the sense of 'instance of a class'.
Now, if someone needs to create several functions of the same type, for example each of them adds all the values of a precise CSV file's column, that's right that it's interesting to create these many functions by a repeating process.
At this point, raises the question: using function factory or class ?
Personnaly, I prefer the function factory way because it is less verbose.
I also discovered in the Theran's answer HERE that it's also faster.
In the following code, I use a trick with globals() to give a particular name to each function created by means of a function factory. Some will say it's bad, but I don't know why. If there's another way to do the same , I will be happy to learn it.
In the code, 3 functions are build by a function factory, and I let one defined by plain normal definition (op3).
Python is fantastic!
import csv
import re
# To create a CSV file
with open('Data.csv','wb') as csvhandle:
hw = csv.writer(csvhandle)
hw.writerows( ((2,10,'%%',3000,'-statusOK-'),
(5,3,'##',500,'-modo OOOOKKK-'),
(1,60,'**',700,'-- anarada-')) )
del hw
# To visualize the content of the CSV file
with open(r'Data.csv','rb') as f:
print "The CSV file at start :\n "+\
'\n '.join(map(repr,csv.reader(f)))
def run_funcs_on_CSVfile(FUNCS,CSV):
with open(CSV,'rb') as csvhandle:
for f in FUNCS:
# this is necessary for functions not created via
# via a function factory but via plain definition
# that defines only the attribute col of the function
if 'field' not in f.__dict__:
f.field = f.col - 1
# columns are numbered 1,2,3,4,...
# fields are numbered 0,1,2,3,...
for row in csv.reader(csvhandle):
for f in FUNCS:
f(row[f.field])
def SumColumn(name,col,start=0):
def g(s):
g.kept += int(s)
g.kept = start
g.field = col -1
g.func_name = name
globals()[name] = g
def MultColumn(name,col,start=1):
def g(s):
g.kept *= int(s)
g.kept = start
g.field = col - 1
g.func_name = name
globals()[name] = g
def ColumnMatcher(name,col,pat,start = 0):
RE = re.compile(pat)
def g(s,regx = RE):
if regx.search(s):
g.kept += 1
g.kept = start
g.field = col - 1
g.func_name = name
globals()[name] = g
SumColumn('op1',1)
MultColumn('op2',2)
ColumnMatcher('op4',5,'O+K')
def op3(s):
s = int(s)
if s%2:
op3.kept += (2*s)
else:
op3.kept += s
op3.kept = 0
op3.col = 4
print '\nbefore:\n ' +\
'\n '.join('%s.kept == %d'
% (f.func_name, f.kept)
for f in (op1,op2,op3,op4) )
# The treatment is done here
run_funcs_on_CSVfile((op2,op3,op4,op1),r'Data.csv')
# note that the order of the functions in the tuple
# passed as argument can be any either one or another
print '\nafter:\n ' +\
'\n '.join('%s(column %d) in %s.kept == %d'
% (f.func_name, f.field+1, f.func_name, f.kept)
for f in (op1,op2,op3,op4) )
.
result
.
The CSV file at start :
['2', '10', '%%', '3000', '-statusOK-']
['5', '3', '##', '500', '-modo OOOOKKK-']
['1', '60', '**', '700', '-- anarada-']
before:
op1.kept == 0
op2.kept == 1
op3.kept == 0
op4.kept == 0
after:
op1(column 1) in op1.kept == 8
op2(column 2) in op2.kept == 1800
op3(column 4) in op3.kept == 4200
op4(column 5) in op4.kept == 2

Related

How to know the name of a classs loade like parameter on other class - Pyhton [duplicate]

This question already has answers here:
Getting the name of a variable as a string
(32 answers)
Closed 4 months ago.
Is it possible to get the original variable name of a variable passed to a function? E.g.
foobar = "foo"
def func(var):
print var.origname
So that:
func(foobar)
Returns:
>>foobar
EDIT:
All I was trying to do was make a function like:
def log(soup):
f = open(varname+'.html', 'w')
print >>f, soup.prettify()
f.close()
.. and have the function generate the filename from the name of the variable passed to it.
I suppose if it's not possible I'll just have to pass the variable and the variable's name as a string each time.
EDIT: To make it clear, I don't recommend using this AT ALL, it will break, it's a mess, it won't help you in any way, but it's doable for entertainment/education purposes.
You can hack around with the inspect module, I don't recommend that, but you can do it...
import inspect
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.getframeinfo(frame[0]).code_context[0].strip()
args = string[string.find('(') + 1:-1].split(',')
names = []
for i in args:
if i.find('=') != -1:
names.append(i.split('=')[1].strip())
else:
names.append(i)
print names
def main():
e = 1
c = 2
foo(e, 1000, b = c)
main()
Output:
['e', '1000', 'c']
To add to Michael Mrozek's answer, you can extract the exact parameters versus the full code by:
import re
import traceback
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
vars_name = re.compile(r'\((.*?)\).*$').search(code).groups()[0]
print vars_name
return
foobar = "foo"
func(foobar)
# PRINTS: foobar
Looks like Ivo beat me to inspect, but here's another implementation:
import inspect
def varName(var):
lcls = inspect.stack()[2][0].f_locals
for name in lcls:
if id(var) == id(lcls[name]):
return name
return None
def foo(x=None):
lcl='not me'
return varName(x)
def bar():
lcl = 'hi'
return foo(lcl)
bar()
# 'lcl'
Of course, it can be fooled:
def baz():
lcl = 'hi'
x='hi'
return foo(lcl)
baz()
# 'x'
Moral: don't do it.
Another way you can try if you know what the calling code will look like is to use traceback:
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
code will contain the line of code that was used to call func (in your example, it would be the string func(foobar)). You can parse that to pull out the argument
You can't. It's evaluated before being passed to the function. All you can do is pass it as a string.
#Ivo Wetzel's answer works in the case of function call are made in one line, like
e = 1 + 7
c = 3
foo(e, 100, b=c)
In case that function call is not in one line, like:
e = 1 + 7
c = 3
foo(e,
1000,
b = c)
below code works:
import inspect, ast
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.findsource(frame[0])[0]
nodes = ast.parse(''.join(string))
i_expr = -1
for (i, node) in enumerate(nodes.body):
if hasattr(node, 'value') and isinstance(node.value, ast.Call)
and hasattr(node.value.func, 'id') and node.value.func.id == 'foo' # Here goes name of the function:
i_expr = i
break
i_expr_next = min(i_expr + 1, len(nodes.body)-1)
lineno_start = nodes.body[i_expr].lineno
lineno_end = nodes.body[i_expr_next].lineno if i_expr_next != i_expr else len(string)
str_func_call = ''.join([i.strip() for i in string[lineno_start - 1: lineno_end]])
params = str_func_call[str_func_call.find('(') + 1:-1].split(',')
print(params)
You will get:
[u'e', u'1000', u'b = c']
But still, this might break.
You can use python-varname package
from varname import nameof
s = 'Hey!'
print (nameof(s))
Output:
s
Package below:
https://github.com/pwwang/python-varname
For posterity, here's some code I wrote for this task, in general I think there is a missing module in Python to give everyone nice and robust inspection of the caller environment. Similar to what rlang eval framework provides for R.
import re, inspect, ast
#Convoluted frame stack walk and source scrape to get what the calling statement to a function looked like.
#Specifically return the name of the variable passed as parameter found at position pos in the parameter list.
def _caller_param_name(pos):
#The parameter name to return
param = None
#Get the frame object for this function call
thisframe = inspect.currentframe()
try:
#Get the parent calling frames details
frames = inspect.getouterframes(thisframe)
#Function this function was just called from that we wish to find the calling parameter name for
function = frames[1][3]
#Get all the details of where the calling statement was
frame,filename,line_number,function_name,source,source_index = frames[2]
#Read in the source file in the parent calling frame upto where the call was made
with open(filename) as source_file:
head=[source_file.next() for x in xrange(line_number)]
source_file.close()
#Build all lines of the calling statement, this deals with when a function is called with parameters listed on each line
lines = []
#Compile a regex for matching the start of the function being called
regex = re.compile(r'\.?\s*%s\s*\(' % (function))
#Work backwards from the parent calling frame line number until we see the start of the calling statement (usually the same line!!!)
for line in reversed(head):
lines.append(line.strip())
if re.search(regex, line):
break
#Put the lines we have groked back into sourcefile order rather than reverse order
lines.reverse()
#Join all the lines that were part of the calling statement
call = "".join(lines)
#Grab the parameter list from the calling statement for the function we were called from
match = re.search('\.?\s*%s\s*\((.*)\)' % (function), call)
paramlist = match.group(1)
#If the function was called with no parameters raise an exception
if paramlist == "":
raise LookupError("Function called with no parameters.")
#Use the Python abstract syntax tree parser to create a parsed form of the function parameter list 'Name' nodes are variable names
parameter = ast.parse(paramlist).body[0].value
#If there were multiple parameters get the positional requested
if type(parameter).__name__ == 'Tuple':
#If we asked for a parameter outside of what was passed complain
if pos >= len(parameter.elts):
raise LookupError("The function call did not have a parameter at postion %s" % pos)
parameter = parameter.elts[pos]
#If there was only a single parameter and another was requested raise an exception
elif pos != 0:
raise LookupError("There was only a single calling parameter found. Parameter indices start at 0.")
#If the parameter was the name of a variable we can use it otherwise pass back None
if type(parameter).__name__ == 'Name':
param = parameter.id
finally:
#Remove the frame reference to prevent cyclic references screwing the garbage collector
del thisframe
#Return the parameter name we found
return param
If you want a Key Value Pair relationship, maybe using a Dictionary would be better?
...or if you're trying to create some auto-documentation from your code, perhaps something like Doxygen (http://www.doxygen.nl/) could do the job for you?
I wondered how IceCream solves this problem. So I looked into the source code and came up with the following (slightly simplified) solution. It might not be 100% bullet-proof (e.g. I dropped get_text_with_indentation and I assume exactly one function argument), but it works well for different test cases. It does not need to parse source code itself, so it should be more robust and simpler than previous solutions.
#!/usr/bin/env python3
import inspect
from executing import Source
def func(var):
callFrame = inspect.currentframe().f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
expression = source.asttokens().get_text(callNode.args[0])
print(expression, '=', var)
i = 1
f = 2.0
dct = {'key': 'value'}
obj = type('', (), {'value': 42})
func(i)
func(f)
func(s)
func(dct['key'])
func(obj.value)
Output:
i = 1
f = 2.0
s = string
dct['key'] = value
obj.value = 42
Update: If you want to move the "magic" into a separate function, you simply have to go one frame further back with an additional f_back.
def get_name_of_argument():
callFrame = inspect.currentframe().f_back.f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
return source.asttokens().get_text(callNode.args[0])
def func(var):
print(get_name_of_argument(), '=', var)
If you want to get the caller params as in #Matt Oates answer answer without using the source file (ie from Jupyter Notebook), this code (combined from #Aeon answer) will do the trick (at least in some simple cases):
def get_caller_params():
# get the frame object for this function call
thisframe = inspect.currentframe()
# get the parent calling frames details
frames = inspect.getouterframes(thisframe)
# frame 0 is the frame of this function
# frame 1 is the frame of the caller function (the one we want to inspect)
# frame 2 is the frame of the code that calls the caller
caller_function_name = frames[1][3]
code_that_calls_caller = inspect.findsource(frames[2][0])[0]
# parse code to get nodes of abstract syntact tree of the call
nodes = ast.parse(''.join(code_that_calls_caller))
# find the node that calls the function
i_expr = -1
for (i, node) in enumerate(nodes.body):
if _node_is_our_function_call(node, caller_function_name):
i_expr = i
break
# line with the call start
idx_start = nodes.body[i_expr].lineno - 1
# line with the end of the call
if i_expr < len(nodes.body) - 1:
# next expression marks the end of the call
idx_end = nodes.body[i_expr + 1].lineno - 1
else:
# end of the source marks the end of the call
idx_end = len(code_that_calls_caller)
call_lines = code_that_calls_caller[idx_start:idx_end]
str_func_call = ''.join([line.strip() for line in call_lines])
str_call_params = str_func_call[str_func_call.find('(') + 1:-1]
params = [p.strip() for p in str_call_params.split(',')]
return params
def _node_is_our_function_call(node, our_function_name):
node_is_call = hasattr(node, 'value') and isinstance(node.value, ast.Call)
if not node_is_call:
return False
function_name_correct = hasattr(node.value.func, 'id') and node.value.func.id == our_function_name
return function_name_correct
You can then run it as this:
def test(*par_values):
par_names = get_caller_params()
for name, val in zip(par_names, par_values):
print(name, val)
a = 1
b = 2
string = 'text'
test(a, b,
string
)
to get the desired output:
a 1
b 2
string text
Since you can have multiple variables with the same content, instead of passing the variable (content), it might be safer (and will be simpler) to pass it's name in a string and get the variable content from the locals dictionary in the callers stack frame. :
def displayvar(name):
import sys
return name+" = "+repr(sys._getframe(1).f_locals[name])
If it just so happens that the variable is a callable (function), it will have a __name__ property.
E.g. a wrapper to log the execution time of a function:
def time_it(func, *args, **kwargs):
start = perf_counter()
result = func(*args, **kwargs)
duration = perf_counter() - start
print(f'{func.__name__} ran in {duration * 1000}ms')
return result

How do I print all of the instances from a set of variables that are undefined from the beginning?

I have a program that I want to be able to print all of the instances of each variable using my method that I created. Problem is I can't figure out a way to print them since each are listed under a different variable that aren't configured from hardcoding them in and I need a way to automatically recall them in my code.
class fit:
def __init__(self,day,did,workout='Not Recorded',time='An unknown amount of',calories='An unknown amount of'):
self.day = day
self.did = did
if did.lower()=='no':
self.workout = 'Not Recorded'
self.time = "An unknown amount of Minutes"
self.calories = "An unknown amount of Calories"
else:
self.workout = workout
self.time = "{} Minutes".format(time)
self.calories = "{} Calories".format(calories)
def formate(self):
self.formate = "{}:\n\nDid you work out: {}\nWorkout focus: {}\nYou worked out for: {}\nYou burned: {}\n\n----------------------------------------------------------".format(self.day,self.did,self.workout,self.time,self.calories)
return self.formate
def reader(day,index):
file = open('readme.txt')
file = file.read()
stripped = file.rsplit("\n")
for i in range(len(stripped)):
stripped[i] = stripped[i].rsplit(" ")
del stripped[-1]
if int(index) >= len(stripped[day-1]):
return "none"
else:
return stripped[day-1][index]
x = 0
def create_new_instance(class_name,instance_name):
globals()[instance_name] = class_name(reader(x,0),reader(x,1),reader(x,2),reader(x,3),reader(x,4))
print('Class instance {} created'.format(instance_name))
while True:
try:
x+=1
ins = 'day_' + str(x)
create_new_instance(fit,ins)
except:
break
break
def printer(instance):
print(.formate())
while True:
x+=1
inst = 'day_' + str(x)
printer(inst)
An example of this might be that I have 8 lines of data from a text document and I have a system that creates instances of day_1, day_2, day_3 ect until day_8 and then I want to print each of those instances out, but again I don't have those instances directly hardcoded into my code so I don't know how I'd do it. I've tried looking into maybe a while loop and increasing a variable by 1 and concatenating it with day and trying to make a variable out of that but the my limited experience with python isn't helping.
A very unpythonic and ugly way would be to use exec, for example:
day_3=5
x = 'day_'+'3'
exec("print("+x+")")
I would recommend another way to store your variables though.

Create objects in Python based on file

I'm coding a game in Python 3 and I need to create an unknown number of objects with each objects properties based on the contents of a file.
To explain, I'll dump some code here:
class attack(object):
def __init__(self, name, power):
self.name = name
self.element = int(power)
import getline from linecache
Attacks = []
count = 1
while 1==1:
line=getline("Attacks.txt", count)
line = line.rstrip()
if line == "":
break
else:
linelist = line.split()
#something involving "attack(linelist[1], linelist[2])"
Attacks.append(item)
count += 1
"Attacks.txt" contains this:
0 Punch 2
1 Kick 3
2 Throw 4
3 Dropkick 6
4 Uppercut 8
When the code is done, the list "Attacks" should contain 5 attack objects, one for each line of "Attacks.txt" with the listed name and power. The name is for the user only; in the code, each object will only be called for by its place in its list.
The idea is that the end user can change "Attacks.txt" (and other similar files) to add, remove or change entries; that way, they can modify my game without digging around in the actual code.
The issue is I have no idea how to create objects on the fly like this or if I even can. I already have working code that builds a list from a file; the only problem is the object creation.
My question, simply put, is how do I do this?
I had the same problem someday:
How to call class constructor having its name in text variable? [Python]
You obviously have to define classes which names are in file. I assume that is done. And you need to have them in current module namespace globals()
from somelib import Punch, Kick, Throw, Dropkick, Uppercut
globals()[class_name](x, y)
line = getline("Attacks.txt", count)
line = line.rstrip()
linelist = line.split()
class_name = linelist[1]
value = linelist[2]
class_object = globals()[class_name]
item = class_object(value)
# or shortly in one line:
# item = globals()[linelist[1]](linelist[2])
You could create a class like so providing overloading operators to support the operations:
class Operation:
def __init__(self, *header):
self.__dict__ = dict(zip(['attack', 'power'], header))
class Attack:
def __init__(self, *headers):
self.__dict__ = {"attack{}".format(i):Operation(*a) for i, a in enumerate(headers, start=1)}
def __setitem__(self, attack_type, new_power):
self.__dict__ = {a:Operation(attack_type, new_power) if b.attack == attack_type else b for a, b in self.__dict__.items()}
def __getitem__(self, attack):
return [b.power for _, b in self.__dict__.items() if b.attack == attack]
#property
def power_listings(self):
return '\n'.join(['{} {}'.format(*[b.attack, b.power]) for _, b in self.__dict__.items()])
with open('filename.txt') as f:
f = [i.strip('\n').split() for i in f]
a = Attack(*f)
print(a.power_listings)
a['Throw'] = 6 #updating the power of any occurrence of Throw
Output:
Throw 6
Kick 3
Punch 2
Uppercut 8
Dropkick 6

Functions not using variables from functions above them

I have a program where I have defined a function called historyA()
def historyA(self, name, price, category, sub, comment):
sh = login("Budget")
worksheet = sh.get_worksheet(0)
emptyrowresize()
date = getDate()
toAppend = [date, name, category, sub, price, comment]
worksheet.append_row(toAppend)
In this function it is giving the variable worksheet values.
Immediately after that it runs the function emptyrowresize()
def emptyrowresize():
print("\n")
count = 0
cycle = 0
empty = False
for i in range(emptyrowcount()):
for i in range(worksheet.col_count):
if(lastvalue(i+1) == ""):
count += 1
if(count >= worksheet.col_count):
empty = True
if(empty == True):
worksheet.resize(worksheet.row_count-1,worksheet.col_count)
print("Fixing empty row at {0}..." .format(worksheet.row_count+1))
break
emptyrowresize() calls the functions lastvalue() in it
def lastvalue(x, y=0):
cycles = 0
#returns last value in col x with vertical offset y
while(True):
count = worksheet.row_count
val = worksheet.cell(int(count-y), x).value
cycles += 1
if(val == ''):
y = cycles
else:
break
return val
and last value needs the worksheet that was named in historyA() but it keeps giving the error that worksheet is not a defined global variable.
Why is this not working? I have had a problem with this once before but I don't remember how I fixed it without making a separate function for each of the three worksheets I need to work with.
EDIT: I feel like it is worth mentioning that everything passed historyA() is in a separate file that I am importing. I'm not sure if that means anything.

Optimizing modifiable named list based on namedtuple

My goal is to optimize a framework based on a stack of modifiers for CSV-sourced lists. Each modifier uses a header list to work on a named basis.
CSV example (including header):
date;place
13/02/2013;New York
15/04/2012;Buenos Aires
29/10/2010;Singapour
I have written some code based on namedtuple in order to be able to use lists generated by csv module without reorganizing data every time. Generated code below :
class MyNamedList(object):
__slots__ = ("__values")
_fields = ['date', 'ignore', 'place']
def __init__(self, values):
self.__values = values
if len(self.__values) <= 151:
for i in range(len(self.__values), 151):
self.__values += [None,]
#property
def date(self):
return self.__values[0]
#date.setter
def date(self, val):
self.__values[0] = val
#property
def ignore(self):
return self.__values[150]
#ignore.setter
def ignore(self, val):
self.__values[150] = val
#property
def place(self):
return self.__values[1]
#b.setter
def place(self, val):
self.__values[1] = val
I must say i am very disappointed with performance using this class. Calling a simple modifier function (which changes "ignore" to True 100 times. Yes i know it is useless) for each line of a 70000-line csv file takes 9 seconds (with pypy. 5.5 using original python) whereas equivalent code using a list named foo takes 1.1 second (same with pypy and original python).
Is there anything i could do to get comparable performance between both approaches ? To me, record.ignore = True could be directly inlined (or so) and therefore translated into record[150] = True. Is there any blocking point i don't see to get this to happen ?
Note that the record i am modifying is actually (for now) not created for each line in the CSV file, meaning adding more items into the list happens only once, before the iteration.
Update : sample codes
--> Using namedlist
import namedlist
MyNamedList=namedlist.namedlist("MyNamedList", {"a":1, "b":2, "ignore":150})
test = MyNamedList([0,1])
def foo(a):
test.ignore = True # x100 times
import csv
stream = csv.reader(open("66666.csv", "rb"))
for i in stream:
foo(i)
--> Not using namedlist
import namedlist
import csv
MyNamedList=namedlist.namedlist("MyNamedList", {"a":1, "b":2, "ignore":150})
test = MyNamedList([0,1])
sample_data = []
for i in range(len(sample_data), 151):
sample_data += [None,]
def foo(a):
sample_data[150] = True # x100 times
stream = csv.reader(open("66666.csv", "rb"))
for i in stream:
foo(i)
Update #2 : code for namedlist.py (heavily based on namedtuple.py
# Retrieved from http://code.activestate.com/recipes/500261/
# Licensed under the PSF license
from keyword import iskeyword as _iskeyword
import sys as _sys
def namedlist(typename, field_indices, verbose=False, rename=False):
# Parse and validate the field names. Validation serves two purposes,
# generating informative error messages and preventing template injection attacks.
field_names = field_indices.keys()
for name in [typename,] + field_names:
if not min(c.isalnum() or c=='_' for c in name):
raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
raise ValueError('Type names and field names cannot be a keyword: %r' % name)
if name[0].isdigit():
raise ValueError('Type names and field names cannot start with a number: %r' % name)
seen_names = set()
for name in field_names:
if name.startswith('_') and not rename:
raise ValueError('Field names cannot start with an underscore: %r' % name)
if name in seen_names:
raise ValueError('Encountered duplicate field name: %r' % name)
seen_names.add(name)
# Create and fill-in the class template
numfields = len(field_names)
argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
reprtxt = ', '.join('%s=%%r' % name for name in field_names)
max_index=-1
for name in field_names:
index = field_indices[name]
if max_index < index:
max_index = index
max_index += 1
template = '''class %(typename)s(object):
__slots__ = ("__values") \n
_fields = %(field_names)r \n
def __init__(self, values):
self.__values = values
if len(self.__values) <= %(max_index)s:
for i in range(len(self.__values), %(max_index)s):
self.__values += [None,]'''% locals()
for name in field_names:
index = field_indices[name]
template += ''' \n
#property
def %s(self):
return self.__values[%d]
#%s.setter
def %s(self, val):
self.__values[%d] = val''' % (name, index, name, name, index)
if verbose:
print template
# Execute the template string in a temporary namespace
namespace = {'__name__':'namedtuple_%s' % typename,
'_property':property, '_tuple':tuple}
try:
exec template in namespace
except SyntaxError, e:
raise SyntaxError(e.message + ':\n' + template)
result = namespace[typename]
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in enviroments where
# sys._getframe is not defined (Jython for example) or sys._getframe is not
# defined for arguments greater than 0 (IronPython).
try:
result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError):
pass
return result

Categories

Resources