Hey all, Linux has a lot of great features in procfs and sysfs, and tools like vmstat extend that quite a bit, but I have a need to collect data from a variety of these systems and was hoping to leverage a unified Python utility instead of hacking together a bunch of disparate scripts.
In order to do that I first need to identify whether or not Python has the bits and pieces I need to adequately parse/process the different data collection points. So, the essence of my question:
Is there a python module that handles/parses the sysfs objects already?
I've looked for such a beast via Google, usenet, and various forums, but I haven't yet found anything intelligent or functional. So, before I carve one out, I figured I'd check here first.
Try this one:
from os import listdir
from os.path import isdir, isfile, islink, join, realpath, normpath
from keyword import iskeyword
_norm = lambda name: name + ('_' if iskeyword(name) else '')
def _denorm(name):
if name.endswith('_') and iskeyword(name[:-1]):
return name[:-1]
else:
return name
def _norm_path(path):
return normpath(realpath(path))
class SysFsObject(object):
__slots__ = ['_path', '__dict__']
#staticmethod
def __id_args__(path='/sys'):
return _norm_path(path)
def __init__(self, path='/sys'):
self._path = _norm_path(path)
if not self._path.startswith('/sys'):
raise RuntimeError("Using this on non-sysfs files is dangerous!")
self.__dict__.update(dict.fromkeys(_norm(i) for i in listdir(self._path)))
def __repr__(self):
return "<SysFsObject %s>" % self._path
def __setattr__(self, name, val):
if name.startswith('_'):
return object.__setattr__(self, name, val)
name = _denorm(name)
p = realpath(join(self._path, name))
if isfile(p):
file(p, 'w').write(str(val))
else:
raise RuntimeError
def __getattribute__(self, name):
if name.startswith('_'):
return object.__getattribute__(self, name)
name = _denorm(name)
p = realpath(join(self._path, name))
if isfile(p):
data = open(p, 'r').read()[:-1]
try:
return int(data)
except ValueError:
return data
elif isdir(p):
return SysFsObject(p)
It's not polished in any way, but IIRC it works :)
From filmor's answer, but with the int() casting removed:
from os import listdir
from os.path import isdir, isfile, islink, join, realpath, normpath
from keyword import iskeyword
_norm = lambda name: name + ('_' if iskeyword(name) else '')
def _denorm(name):
if name.endswith('_') and iskeyword(name[:-1]):
return name[:-1]
else:
return name
def _norm_path(path):
return normpath(realpath(path))
class SysFsObject(object):
__slots__ = ['_path', '__dict__']
#staticmethod
def __id_args__(path='/sys'):
return _norm_path(path)
def __init__(self, path='/sys'):
self._path = _norm_path(path)
if not self._path.startswith('/sys'):
raise RuntimeError("Using this on non-sysfs files is dangerous!")
self.__dict__.update(dict.fromkeys(_norm(i) for i in listdir(self._path)))
def __repr__(self):
return "<SysFsObject %s>" % self._path
def __setattr__(self, name, val):
if name.startswith('_'):
return object.__setattr__(self, name, val)
name = _denorm(name)
p = realpath(join(self._path, name))
if isfile(p):
file(p, 'w').write(val)
else:
raise RuntimeError
def __getattribute__(self, name):
if name.startswith('_'):
return object.__getattribute__(self, name)
name = _denorm(name)
p = realpath(join(self._path, name))
if isfile(p):
return open(p, 'r').read()[:-1]
elif isdir(p):
return SysFsObject(p)
Arbitrarily casting to int is unexpected and even dangerous. For example, if you were to use that code on any of the cpulist files prevalent in sysfs, a string such as "0-7" would always be returned on multi-processor systems. Then someday, someone uses your code on a single-core system and reading the exact same sysfs file that now contains "0" returns an int.
In other words, any function that calls that code and expects to receive the native data type of sysfs (strings) must explicitly cast to str().
Not really sure why you need something specific, they are all text files for the most part, you can just mess with them directly.
There aren't any python modules that does that as far as I know.
Related
I'm going a little bug-eyed here trying to troubleshoot this. In the process, I've tried to create a self-contained function to reproduce the issue, but for some reason it works as expected in the micro-example, but not in my prod code.
I have a subclass of pathlib.Path:
class WalkPath(Path):
_flavour = type(Path())._flavour
def __init__(self, *args, origin: 'WalkPath'=None, dirs: []=None, files: []=None):
super().__init__()
if type(args[0]) is str:
self.origin = origin or self
else:
self.origin = origin or args[0].origin
self._dirs: [WalkPath] = list(map(WalkPath, dirs)) if dirs else None
self._files: [WalkPath] = list(map(WalkPath, files)) if files else None
self._lazy_attr = None
#staticmethod
def sync(wp: Union[str, Path, 'WalkPath']):
"""Syncronize lazy-loaded attributes"""
x = wp.lazy_attr
return wp
#property
def lazy_attr(self):
if self._lazy_attr:
return self._lazy_attr:
# long running op
self._lazy_attr = long_running_op(self)
return self._lazy_attr
class Find:
#staticmethod
def shallow(path: Union[str, Path, 'WalkPath'],
sort_key=lambda p: str(p).lower(),
hide_sys_files=True) -> Iterable['WalkPath']:
origin = WalkPath(path)
if origin.is_file():
return [origin]
for p in sorted(origin.iterdir(), key=sort_key):
if hide_sys_files and is_sys_file(p):
continue
yield WalkPath(p, origin=origin)
Using multiprocessing.Pool, I want to execute that long-running process in a pool.
That looks like this:
_paths = ['/path1', '/path2']
found = list(itertools.chain.from_iterable(Find.shallow(p) for p in _paths))
Find.shallow (see above) basically just does a Path.iterdir on origin and then maps the results to a WalkPath object, setting the origin to the path called. I know this works, because this outputs correctly:
for x in found:
print(x.origin, x.name)
Then we dispatch to a pool:
with mp.Pool() as pool:
done = [x for x in pool.map(WalkPath.sync, found) if x.origin]
But this fails, starting 'WalkPath' has no attribute 'origin'.
Here’s my attempt at reproducing it locally, but for some reason it works! I cannot spot the difference.
#!/usr/bin/env python
import multiprocessing as mp
import time
from itertools import tee, chain
r = None
class P:
def __init__(self, i, static=None):
# self.static = static if not static is None else i
self.static = static or i
# print(static, self.static)
self.i = i
self._a_thing = None
#property
def a_thing(self):
if self._a_thing:
print('Already have thing', self.i, 'static:', self.static)
return self._a_thing
time.sleep(0.05)
print('Did thing', self.i, 'static:', self.static)
self._a_thing = True
return self._a_thing
#staticmethod
def sync(x):
x.a_thing
x.another = 'done'
return x if x.a_thing else None
class Load:
#classmethod
def go(cls):
global r
if r:
return r
paths = [iter(P(i, static='0') for i in range(10)),
iter(P(i, static='0') for i in range(11, 20)),
iter(P(i, static='0') for i in range(21, 30))]
iternums, testnums = tee(chain.from_iterable(paths))
for t in testnums:
print('Want thing', t.i, 'to have static:', t.static)
with mp.Pool() as pool:
rex = [x for x in pool.map(P.sync, list(iternums)) if x.another]
r = rex
for done in rex:
print(done.i, done.static, done.a_thing, done.another)
Load.go()
The crux of the problem is that your Path objects cannot be shared between interpreter processes.
Instead, when using multiprocessing, Python serializes (pickles) all arguments and return values to/from subprocesses.
It seems that pathlib.Path defines custom pickling/unpickling logic that is incompatible with your origin attribute:
import pathlib
import pickle
class WalkPath(pathlib.Path):
_flavour = type(pathlib.Path())._flavour
def __init__(self, *args, origin: 'WalkPath'=None, dirs: []=None, files: []=None):
super().__init__()
if type(args[0]) is str:
self.origin = origin or self
else:
self.origin = origin or args[0].origin
self._dirs: [WalkPath] = list(map(WalkPath, dirs)) if dirs else None
self._files: [WalkPath] = list(map(WalkPath, files)) if files else None
self._lazy_attr = None
path = WalkPath('/tmp', origin='far away')
print(vars(path))
reloaded = pickle.loads(pickle.dumps(path))
print(vars(reloaded))
$ python3.9 test.py
{'origin': 'far away', '_dirs': None, '_files': None, '_lazy_attr': None}
{'origin': WalkPath('/tmp'), '_dirs': None, '_files': None, '_lazy_attr': None}
For fun, here's how I ended up solving this.
What happens here, is that Path implements the __reduce__ function, which is called before __getstate__ or __setstate__ would (which are higher level pickling functions).
Here's the __reduce__ function from PurePath, Path's base class:
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, tuple(self._parts))
Oh no! Well, we can see what happens - this is intentionally designed just to pass a tuple of its parts, dropping state altogether and forming a new version of itself.
I didn't want to mess with that, but I also wanted to make sure my state was preserved here. So I created a serializer that takes these properties as a tuple argument (since... __reduce__ for some ridiculous reason only takes a single tuple as an argument).
I also had to make sure that origin was now a Path object, not a WalkPath object, otherwise I would have ended up with an endless recursion. I added some type coercion and safety to the __init__:
if origin:
self.origin = Path(origin)
elif len(args) > 0:
try:
self.origin = Path(args[0].origin) or Path(args[0])
except:
self.origin = Path(self)
if not self.origin:
raise AttributeError(f"Could not infer 'origin' property when initializing 'WalkPath', for path '{args[0]}'")
Then I added these two methods to WalkPath:
# #overrides(__reduce__)
def __reduce__(self):
# From super()
# Using the parts tuple helps share internal path parts
# when pickling related paths.
# return (self.__class__, tuple(self._parts))
# This override passes its parts to a Path object (which
# natively pickles), then serializes and applies
# its remaining attributes.
args = {**{'_parts': self._parts}, **self.__dict__}
return (self.__class__._from_kwargs, tuple(args.items()))
#classmethod
def _from_kwargs(cls, *args):
kwargs = dict(args)
new = cls(super().__new__(cls,
*kwargs['_parts']),
origin=kwargs['origin'])
new.__dict__ = {**new.__dict__, **kwargs}
return new
Does RobotFramework have legal way to register my own variable finder? For example, i want to register finder that resolves variables thats name starts with #. With robotframework==3.1 I was able to achieve this with code like this:
import robot
from robot.variables.finders import VariableFinder
class MyVariableResolver:
identifiers = '$'
def find(self, name):
if name[2] == '#':
return f'My resolution for {name}'
else:
raise KeyError(name)
VariableFinder._finders = property(
lambda self: (MyVariableResolver(),) + self.__dict__['_finders'],
lambda self, value: self.__dict__.setdefault('_finders', value)
)
class MyLibrary:
pass
With robotframework==4.0 my solution was broken and I had to change MyVariableResolver to:
from robot.variables.finders import NOT_FOUND
# other imports
class MyVariableResolver:
identifiers = '$'
def find(self, name):
if name[2] == '#':
return f'My resolution for {name}'
else:
return NOT_FOUND
But this code still looks very ugly (especially VariableFinder._finders = property(...). Is there a "right" way to do what I want?
If I am importing a module from a 3rd party, but the syntax they use does not line up with mine, is there a good way to pep8 it?
Example: I need to use a 3rd party module that I cannot edit and their naming convention isn't so great.
Example:
thisIsABase_function(self,a,b)
I have some code that pepifies the name to pep8, but I was wondering how I can make the functions accessible by that new pep8 name?
def _pep8ify(name):
"""PEP8ify name"""
import re
if '.' in name:
name = name[name.rfind('.') + 1:]
if name[0].isdigit():
name = "level_" + name
name = name.replace(".", "_")
if '_' in name:
return name.lower()
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
Is there a way I can PEP8 these names on import?
You can use a context manager to automatically pep8ify the symbols from an imported module like:
Example:
with Pep8Importer():
import funky
Code:
class Pep8Importer(object):
#staticmethod
def _pep8ify(name):
"""PEP8ify name"""
import re
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def __enter__(self):
# get list of current modules in namespace
self.orig_names = set(dir(sys.modules[__name__]))
def __exit__(self, exc_type, exc_val, exc_tb):
""" Pep8ify names in any new modules
Diff list of current module names in namespace.
pep8ify names at the first level in those modules
Ignore any other new names under the assumption that they
were imported/created with the name as desired.
"""
if exc_type is not None:
return
new_names = set(dir(sys.modules[__name__])) - self.orig_names
for module_name in (n for n in new_names if not n.startswith('_')):
module = sys.modules[module_name]
for name in dir(module):
pep8ified = self._pep8ify(name)
if pep8ified != name and not name.startswith('_'):
setattr(module, pep8ified, getattr(module, name))
print("In mModule: {}, added '{}' from '{}'".format(
module_name, pep8ified, name))
Test Code:
with Pep8Importer():
import funky
print(funky.thisIsABase_function)
print(funky.this_is_a_base_function)
funky.py
thisIsABase_function = 1
Results:
In module: funky, added 'this_is_a_base_function' from 'thisIsABase_function'
1
1
I think something like this does what you want:
# somemodule.py
def func_a():
print('hello a')
def func_b():
print('hello b')
# yourcode.py
import inspect
import importlib
def pepimports(the_module_name):
mymodule = importlib.import_module(the_module_name)
myfuncs = inspect.getmembers(f, inspect.isfunction)
for f in myfuncs:
setattr(mymodule, _pep8ify(f[1].__name__) , f[1])
return mymodule
mymodule = pepimports('some_module_name')
# you can now call the functions from mymodule
# (the original names still exist, so watch out for clashes)
mymodule.pepified_function()
It's a bit hackish, but I've tried it (python 3.5) and it seems to work (at least on a trivial example).
Note: I see that I need to more clearly work out what it is that I want each property/descriptor/class/method to do before I ask how to do it! I don't think my question can be answered at this time. Thanks all for helping me out.
Thanks to icktoofay and BrenBarn, I'm starting to understand discriptors and properties, but now I have a slightly harder question to ask:
I see now how these work:
class Blub(object):
def __get__(self, instance, owner):
print('Blub gets ' + instance._blub)
return instance._blub
def __set__(self, instance, value):
print('Blub becomes ' + value)
instance._blub = value
class Quish(object):
blub = Blub()
def __init__(self, value):
self.blub = value
And how a = Quish('one') works (produces "Blub becomes one") but take a gander at this code:
import os
import glob
class Index(object):
def __init__(self, dir=os.getcwd()):
self.name = dir #index name is directory of indexes
# index is the list of indexes
self.index = glob.glob(os.path.join(self.name, 'BatchStarted*'))
# which is the pointer to the index (index[which] == BatchStarted_12312013_115959.txt)
self.which = 0
# self.file = self.File(self.index[self.which])
def get(self):
return self.index[self.which]
def next(self):
self.which += 1
if self.which < len(self.index):
return self.get()
else:
# loop back to the first
self.which = 0
return None
def back(self):
if self.which > 0:
self.which -= 1
return self.get()
class File(object):
def __init__(self, file):
# if the file exists, we'll use it.
if os.path.isfile(file):
self.name = file
# otherwise, our name is none and we return.
else:
self.name = None
return None
# 'file' attribute is the actual file object
self.file = open(self.name, 'r')
self.line = Lines(self.file)
class Lines(object):
# pass through the actual file object (not filename)
def __init__(self, file):
self.file = file
# line is the list if this file's lines
self.line = self.file.readlines()
self.which = 0
self.extension = Extension(self.line[self.which])
def __get__(self):
return self.line[self.which]
def __set__(self, value):
self.which = value
def next(self):
self.which += 1
return self.__get__()
def back(self):
self.which -= 1
return self.__get__()
class Extension(object):
def __init__(self, lineStr):
# check to make sure a string is passed
if lineStr:
self.lineStr = lineStr
self.line = self.lineStr.split('|')
self.pathStr = self.line[0]
self.path = self.pathStr.split('\\')
self.fileStr = self.path[-1]
self.file = self.fileStr.split('.')
else:
self.lineStr = None
def __get__(self):
self.line = self.lineStr.split('|')
self.pathStr = self.line[0]
self.path = self.pathStr.split('\\')
self.fileStr = self.path[-1]
self.file = self.fileStr.split('.')
return self.file[-1]
def __set__(self, ext):
self.file[-1] = ext
self.fileStr = '.'.join(self.file)
self.path[-1] = fileStr
self.pathStr = '\\'.join(self.path)
self.line[0] = self.pathStr
self.lineStr = '|'.join(self.line)
Firstly, there may be some typos in here because I've been working on it and leaving it half-arsed. That's not my point. My point is that in icktoofay's example, nothing gets passed to Blub(). Is there any way to do what I'm doing here, that is set some "self" attributes and after doing some processing, taking that and passing it to the next class? Would this be better suited for a property?
I would like to have it so that:
>>> i = Index() # i contains list of index files
>>> f = File(i.get()) # f is now one of those files
>>> f.line
'\\\\server\\share\\folder\\file0.txt|Name|Sean|Date|10-20-2000|Type|1'
>>> f.line.extension
'txt'
>>> f.line.extension = 'rtf'
>>> f.line
'\\\\server\\share\\folder\\file0.rtf|Name|Sean|Date|10-20-2000|Type|1'
You can do that, but the issue there is less about properties/descriptors and more about creating classes that give the behavior you want.
So, when you do f.line, that is some object. When you do f.line.extension, that is doing (f.line).extension --- that is, it first evalautes f.line and then gets the extension attribute of whatever f.line is.
The important thing here is that f.line cannot know whether you are later going to try to access its extension. So you can't have f.line do one thing for "plain" f.line and another thing for f.line.extension. The f.line part has to be the same in both, and the extension part can't change that.
The solution for what you seem to want to do is to make f.line return some kind of object that in some way looks or works like a string, but also allows setting attributes and updating itself accordingly. Exactly how you do this depends on how much you need f.lines to behave like a string and how much you need it to do other stuff. Basically you need f.line to be a "gatekeeper" object that handles some operations by acting like a string (e.g., you apparently want it to display as a string), and handles other objects in custom ways (e.g., you apparently want to be able to set an extension attribute on it and have that update its contents).
Here's a simplistic example:
class Line(object):
def __init__(self, txt):
self.base, self.extension = txt.split('.')
def __str__(self):
return self.base + "." + self.extension
Now you can do:
>>> line = Line('file.txt')
>>> print line
file.txt
>>> line.extension
'txt'
>>> line.extension = 'foo'
>>> print line
file.foo
However, notice that I did print line, not just line. By writing a __str__ method, I defined the behavior that happens when you do print line. But if you evaluate it "raw" without printing it, you'll see it's not really a string:
>>> line
<__main__.Line object at 0x000000000233D278>
You could override this behavior as well (by defining __repr__), but do you want to? That depends on how you want to use line. The point is that you need to decide what you want your line to do in what situations, and then craft a class that does that.
I have written a Parser that takes a JSON configuration and creates objects from it. I first create a well known object, and try to dynamically import a module (which may be from a user), while loading its class via the defined creator method of that module.
Here is some testing code:
import json
import imp
import os.path as path
from lib.config.members import Member
from lib.tasks.task import Task
class Parser(object):
def __init__(self):
self._loadedMods = {"tasks": {}}
def _load_module(self, clazz, modPart):
"""
imports and caches a module.
:param clazz: the filename of the module (i.e email, ping...)
:param modPart: the folder of the module. (i.e services, parsers...)
:return: the imported/cached module, or throws an error if it couldn't find it
"""
mods = self._loadedMods[modPart]
if clazz in mods:
return mods["class"]
else:
#mod = __import__(clazz)
p = path.join("lib", modPart, clazz + ".py")
mod = imp.load_source(clazz, p)
mods[clazz] = mod
return mod
def replace_with_import(self, objList, modPart, items_func, class_check):
"""
replaces configuration dicts with their objects by importing and creating it in the first step.
In the second step the original list of json config dicts gets replaced by the loaded objects
:param objList: the list of objects which is iterated on
:param modPart: the folder from the module (i.e tasks, parsers)
:param items_func: function to get a pointer on the list of json-config-objects to replace. Takes one argument and
should return a list of
:param class_check: currently unsupported
"""
for obj in objList:
repl = []
items = items_func(obj)
for clazzItem in items:
try:
clazz = clazzItem["class"]
mod = self._load_module(clazz, modPart)
item = mod.create(clazzItem)
if class_check(item):
repl.append(item)
else:
print " ignoring class " + clazzItem["class"] + "! It does not pass the class check!"
except ImportError, err:
print "could not import " + clazz + ": " + str(clazzItem) + "! reason:"
print str(err)
except KeyError, k:
print "Key " + str(k) + " not in classItem " + str(clazzItem)
except Exception, e:
print "Error while replacing class ( " + clazz + " :" + str(e) + ")"
del items[:]
items.extend(repl)
def _create_raw_Object(self, jsonDict, msgName, creator):
"""
creates an Main object from the configuration, but just parses raw data and hands it to the object
:param jsonDict: the configuration file part as dict
:param msgName: name of object for error message
:param creator: function pointer which is taking two arguments: identifier of the object and arguments.
:should return an object
:return: a list of objects returned by creator
"""
items = []
for key, val in jsonDict.items():
try:
item = creator(key, val)
items.append(item)
except Exception, e:
print "ignoring " + msgName + ": " + key + "! reason:"
print str(e)
return items
jsonFile = '''
{
"members":{
"homer":{
"name": "Homer Simpson",
"comment": "Security Inspector",
"tasks": [{"class":"email", "type": "donut", "args": {"rcpt": "homer_j_simpson#burnscorp.sp"}},
{"class":"email", "type": "do", "args": {"rcpt": "my_other_mail#burnscorp.sp"}}]
}
}
}
'''
jsonDict = json.loads(jsonFile)
parser = Parser()
creator = lambda name, values: Member(name, **values)
members = parser._create_raw_Object(jsonDict["members"], "Members", creator)
items_func = lambda member: member.get_tasks()
class_check = lambda task: isinstance(task, Task)
parser.replace_with_import(members, "tasks", items_func, class_check)
for d in members:
print d.__dict__
As you can see, a Member can have a list of arbitary tasks, and which one it should import is defined in its class attribute, but as soon as two of them has the same value for the class (which shouldn't break json the way we define it) I get a strange KeyError :
Key 'class' not in classItem {u'args': {u'rcpt': u'my_other_mail#burnscorp.sp'}, u'type': u'do', u'class': u'email'}
Why do I get this strange error? Any hint that my give me a clue whats going on is very welcome, as I feel hopeless, debugging this for hours.
I think that Member and Email/Task class are unrelated but Ill post them for completeness:
lib/config/members.py
class Member:
def __init__(self, id, name="", comment="", tasks=None):
self.id = id
self.name = name
self.tasks = []
self.add_task(tasks)
self.comment = comment
def get_id(self):
return self.id
def add_task(self, task):
if task is None:
return
if isinstance(task, list):
self.tasks.extend(task)
else:
self.tasks.append(task)
def get_tasks(self):
return self.tasks
lib/tasks/[task|email].py
class Task:
"""
Base class for all built-in Tasks.
"""
def set_task_type(self, taskType):
"""
sets the type of this task.
Be aware! this method can only get called once!
:param taskType: the type of this task
"""
if hasattr(self, "_taskType"):
raise Exception("taskType is only allowed to set once!")
self.taskType = taskType
def get_task_type(self):
"""
:return: the type set by set_type_task
"""
return self._taskType
"""
The email task.
"""
from lib.tasks.task import Task
class EmailTask(Task):
def __init__(self, **kwargs):
self.set_task_type(kwargs["type"])
self.recipient = kwargs["args"]["rcpt"]
def execute_task(self, msg):
pass
def create(taskDict):
return EmailTask(**taskDict)
It seems you are eating the actual exception by replacing it with your own custom print in replace_with_import. As I noted in the comment section.
You generally want to keep you try blocks small and very predictable, knowing exactly what can be raised and what you should handle at that point in the code. The less complexity in your try block the better.