Attribute error in python while parsing an XML - python

I am kinda new to Python. I am working on a project that parses an XML in Python and my Python code to do so is :
from xml.dom import minidom
from copy import copy
class Xmlparse:
def __init__(self, xmlfile):
self = minidom.parse(xmlfile)
def findadress(self):
itemlist =self.getElementsByTagName('addresses')
return itemlist[0].attributes['firstname'].value
if __name__ == '__main__':
with open("sample.xml") as f:
parse = Xmlparse(f)
print parse.findadress()
But when I run this code I get an output error:
AttributeError: Xmlparse instance has no attribute 'findadress'
And findadress function is spelled correctly in the main, but for some reason what so ever i am getting this error.
Any help is really appreciated.
And I also wanted to know, how can I validate the xml with an XSD schema in python?

"self = minidom.parse(xmlfile)" overwrites the Xmlparse object you just created. You want to assign the xml doc to a variable instead:
from xml.dom import minidom
from copy import copy
class Xmlparse:
def __init__(self, xmlfile):
self.doc = minidom.parse(xmlfile)
def findadress(self):
itemlist =self.doc.getElementsByTagName('addresses')
return itemlist[0].attributes['firstname'].value

the evil is in self = minidom.parse(xmlfile)

Related

Convert string as module

is there a way to turn a string to an module object? i want to to do a function call after doing an API request but I cant convert the string I got from the request to a module object
I have seen some code that could actually do it however it is done by importing the module.
import controller.mipay.controller as mipay
#api.route('/redirect')
class Request(Resource):
parser = RequestChecker()
def post(self):
req = self.parser.parse_args() # <---- JSON/Dictionary
#method_to_call = getattr(mipay, req['PaymentEndpoint'])
method_to_call = getattr(req['PaymentMethod'], req['PaymentEndpoint'])
return method_to_call()
is there anyway to convert the string to a module object or anything I can do to the string so I could call my function
Using __import__ may be the solution:
module = __import__("module.path")
See importlib — The implementation of import
I assume req['PaymentMethod'] is the module and req['PaymentEndpoint'] is the method. What you can do is use importlib.
#import controller.mipay.controller as mipay
import importlib
my_mapping = {'mipay':'controller.mipay.controller'}
class Request(Resource):
parser = RequestChecker()
def post(self):
req = self.parser.parse_args() # <---- JSON/Dictionary
# method_to_call = getattr(mipay, req['PaymentEndpoint'])
mipay= importlib.import_module(my_mapping[req['PaymentMethod']])
method_to_call = getattr(mymodule, req['PaymentEndpoint'])
return method_to_call()

embedding resources in python scripts

I'd like to figure out how to embed binary content in a python script. For instance, I don't want to have any external files around (images, sound, ... ), I want all this content living inside of my python scripts.
Little example to clarify, let's say I got this small snippet:
from StringIO import StringIO
from PIL import Image, ImageFilter
embedded_resource = StringIO(open("Lenna.png", "rb").read())
im = Image.open(embedded_resource)
im.show()
im_sharp = im.filter(ImageFilter.SHARPEN)
im_sharp.show()
As you can see, the example is reading the external file 'Lenna.png'
Question
How to proceed to embed "Lenna.png" as a resource (variable) into my python script. What's the fastest way to achieve this simple task using python?
You might find the following class rather useful for embedding resources in your program. To use it, call the package method with paths to the files that you want to embed. The class will print out a DATA attribute that should be used to replace the one already found in the class. If you want to add files to your pre-built data, use the add method instead. To use the class in your program, make calls to the load method using context manager syntax. The returned value is a Path object that can be used as a filename argument to other functions or for the purpose of directly loading the reconstituted file. See this SMTP Client for example usage.
import base64
import contextlib
import pathlib
import pickle
import pickletools
import sys
import zlib
class Resource:
"""Manager for resources that would normally be held externally."""
WIDTH = 76
__CACHE = None
DATA = b''
#classmethod
def package(cls, *paths):
"""Creates a resource string to be copied into the class."""
cls.__generate_data(paths, {})
#classmethod
def add(cls, *paths):
"""Include paths in the pre-generated DATA block up above."""
cls.__preload()
cls.__generate_data(paths, cls.__CACHE.copy())
#classmethod
def __generate_data(cls, paths, buffer):
"""Load paths into buffer and output DATA code for the class."""
for path in map(pathlib.Path, paths):
if not path.is_file():
raise ValueError('{!r} is not a file'.format(path))
key = path.name
if key in buffer:
raise KeyError('{!r} has already been included'.format(key))
with path.open('rb') as file:
buffer[key] = file.read()
pickled = pickle.dumps(buffer, pickle.HIGHEST_PROTOCOL)
optimized = pickletools.optimize(pickled)
compressed = zlib.compress(optimized, zlib.Z_BEST_COMPRESSION)
encoded = base64.b85encode(compressed)
cls.__print(" DATA = b'''")
for offset in range(0, len(encoded), cls.WIDTH):
cls.__print("\\\n" + encoded[
slice(offset, offset + cls.WIDTH)].decode('ascii'))
cls.__print("'''")
#staticmethod
def __print(line):
"""Provides alternative printing interface for simplicity."""
sys.stdout.write(line)
sys.stdout.flush()
#classmethod
#contextlib.contextmanager
def load(cls, name, delete=True):
"""Dynamically loads resources and makes them usable while needed."""
cls.__preload()
if name not in cls.__CACHE:
raise KeyError('{!r} cannot be found'.format(name))
path = pathlib.Path(name)
with path.open('wb') as file:
file.write(cls.__CACHE[name])
yield path
if delete:
path.unlink()
#classmethod
def __preload(cls):
"""Warm up the cache if it does not exist in a ready state yet."""
if cls.__CACHE is None:
decoded = base64.b85decode(cls.DATA)
decompressed = zlib.decompress(decoded)
cls.__CACHE = pickle.loads(decompressed)
def __init__(self):
"""Creates an error explaining class was used improperly."""
raise NotImplementedError('class was not designed for instantiation')
The best way to go about this is converting your picture into a python string, and have it in a separate file called something like resources.py, then you simply parse it.
If you are looking to embed the whole thing inside a single binary, then you're looking at something like py2exe. Here is an example embedding external files
In the first scenario, you could even use base64 to (de)code the picture, something like this:
import base64
file = open('yourImage.png');
encoded = base64.b64encode(file.read())
data = base64.b64decode(encoded) # Don't forget to file.close() !

Cannot call xml.dom.minidom.parse inside class

I am unable to call xml.dom.minidom.parse() within my class
As a sheer example,
class XmlReader:
def __init__(self, xml):
self.xml = xml
DOMTree = xml.dom.minidom.parse("test.xml")
xmlReader = XmlReader("test.xml")
Throws
File "handler2.py", line 10, in ?
xmlReader = XmlReader("test.xml")
File "handler2.py", line 8, in __init__
DOMTree = xml.dom.minidom.parse("test.xml")
AttributeError: 'str' object has no attribute 'dom'
However outside I am able to call xml.dom.minidom.parse just fine.
What do I need to change in order to be able to call the function within my XmlReader class?
Inside your constructor, xml refers to the parameter xml instead of the module xml. This is called shadowing. Choose a different name for one of them.
import xml as xml_module
or
from xml.dom import minidom
or
def __init__(self, xml_data):

Importance of __getitem___ : HTML Parsing using Python

class mainprogram():
def getData(self, file, begin, end):
parser = MyHTMLParser()
f = open(file);
rawcontent = f.read()
#Get main content
content = rawcontent.split('<div id="header"')[1];
content = content.split('</html>')[0];
del parsed_data[:]
html = content.split(begin)[1];
html = html.split(end)[0];
parser.feed(html);
result = list(parsed_data);
return result;
I'm in the stage of practising Python and while I was doing an assignment related to Python I was stuck. The above code Snippet uses htmlparser for parsing .msg file to convert into csv format.
Could any one explain me the what does [1] [0] signify in these below lines
content = rawcontent.split('<div id="header"')[1];
content = content.split('</html>')[0];
Presently I'm using Python community version for development, when I highlight that particular [1] or [0] its showing up as
class list
def __getitem__(self, y)
As described in special method names, the method name __getitem__() allows a class to override the foo[bar] syntax. Lists do this to provide both subscripting (e.g. foo[5]) and slicing (foo[1:5]). Dictionaries do this to provide key lookup.

Python - print pretty XML create opening and closing tags for empty tags text

I'm writing a python application that creates an ElementTree (XML) and then writing it to a file, using minidom's toprettyxml();
final_tree = minidom.parseString(ET.tostring(root))
fdout.write(final_tree.toprettyxml(indent = ' ')
The problem is, that tags which i'm not appending any text comes out with only one tag, for example:
<sometag/>
I want it to be:
<sometag>
</sometag>
I want to do it without parsing the whole string (without regex).
Is anybody familiar with such way?
Thanks.
The behavior is hard-wired in minidom.py (have a look at writexml() method in class Element). It is not meant to be changed, but for the current implementation you can monkey-patch it like this:
from xml.dom import minidom
t = minidom.parseString('<a><b></b></a>')
def patcher(method):
def patching(self, *args, **kwargs):
old = self.childNodes
try:
if not self.childNodes:
class Dummy(list):
def __nonzero__(self): # Python2
return True
def __bool__(self): # Python3
return True
old, self.childNodes = self.childNodes, Dummy([])
return method(self, *args, **kwargs)
finally:
self.childNodes = old
return patching
t.firstChild.__class__.writexml = patcher(t.firstChild.__class__.writexml)
print t.toprettyxml()
But of course I cannot recommend such a hack.

Categories

Resources