I need to create some files from a template. I'm using psycopg2 to fetch from a database. Then I loop through. Now I need to write to file.
Thanks!
import sys
from HTMLParser import HTMLParser
from xml.etree import cElementTree as etree
import psycopg2
import psycopg2.extras
class LinksParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.tb = etree.TreeBuilder()
def handle_starttag(self, tag, attributes):
self.tb.start(tag, dict(attributes))
def handle_endtag(self, tag):
self.tb.end(tag)
def handle_data(self, data):
self.tb.data(data)
def close(self):
HTMLParser.close(self)
return self.tb.close()
conn = psycopg2.connect(dbname="**", user="**", password="**", host="/tmp/", port="**")
cur.execute("SELECT * FROM landingpagedata;")
rows = cur.fetchall()
template = 'template.html'
parser = LinksParser()
# parser.feed(open('landingIndex.html').read()) #for testing
# root = parser.close()
for row in rows:
parser.feed(open(template).read())
root = parser.close()
#title
title = root.find(".//title")
title.text = str(row['title'])
f = open(row['page_name'], 'w')
root.write(f)
parser = LinksParser()
The error is:
Traceback (most recent call last):
File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 87, in <module>
main()
File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 75, in main
root.write('page_name')
AttributeError: write
Oh and I'm using open('page', 'w') because these pages exist already?
I think you want f.write(root), not root.write(f). (Assuming that str(root) gives you the HTML you want to write out.)
Related
I'm trying to read the multistream Wikipedia dump into a database. This is my attempt at loading smaller chunks in parallel. Here's the script:
#!/usr/bin/python3
import xml.sax
from bz2 import BZ2File
import mwparserfromhell
import psycopg2
import pathos
import os
import dill
class XmlHandler(xml.sax.handler.ContentHandler):
def __init__(self):
xml.sax.handler.ContentHandler.__init__(self)
self._buffer = None
self._values = {}
self._current_tag = None
self._pages = []
def get_pages(self):
return self._pages
def get_page_count(self):
return len(self._pages)
def get_values(self):
return self._values
def characters(self, content):
if self._current_tag:
self._buffer.append(content)
def startElement(self, name, attrs):
if name in ('title', 'text', 'infobox'):
self._current_tag = name
self._buffer = []
def endElement(self, name):
if name == self._current_tag:
self._values[name] = ' '.join(self._buffer)
if name == 'page':
self.process_article()
def process_article(self):
wikicode = mwparserfromhell.parse(self._values['text'])
infobox_array = wikicode.filter_templates(matches="infobox .*")
infobox = str(infobox_array[0]) if len(infobox_array) > 0 else ""
self._pages.append((self._values['title'], self._values['text'], infobox))
def load_xml(filename):
wiki_handler = XmlHandler()
wiki_parser = xml.sax.make_parser()
wiki_parser.setContentHandler(wiki_handler)
file = os.path.join("chunks", filename)
print("I'm a worker process")
cursor = conn.cursor()
with BZ2File(file, 'r') as f:
for line in f:
wiki_parser.feed(line)
pages = wiki_handler.get_pages()
for page in pages:
cursor.execute("INSERT INTO pages (title, text, infobox) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING", page)
cursor.close()
print("all done")
if __name__ == "__main__":
conn = psycopg2.connect(dbname="wikipedia",
user="postgres",
password="postgres",
host="localhost",
port=5432)
file_list = [f for f in os.listdir("chunks") if os.path.isfile(os.path.join("chunks", f))]
pool = pathos.multiprocessing.ProcessingPool(processes=pathos.multiprocessing.cpu_count())
pool.map(load_xml, file_list)
And the traceback:
Traceback (most recent call last):
File "./loader_parallel.py", line 114, in <module>
pool.map(load_xml, file_list)
File "/home/smh/.local/lib/python3.7/site-packages/multiprocess/pool.py", line 268, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/home/smh/.local/lib/python3.7/site-packages/multiprocess/pool.py", line 657, in get
raise self._value
multiprocess.pool.MaybeEncodingError: Error sending result:
'<multiprocess.pool.ExceptionWithTraceback object at 0x7f87ac0f4470>'.
Reason: 'TypeError("can't pickle pyexpat.xmlparser objects")'
Why can't the pyexpat.xmlparser object be pickled? How can I fix it? I tried testing it by running dill.copy(XmlHandler()) and it did so without error.
I installed pathos via pip3, running Python 3.7, on Debian 10. Pretty new to this, any help is appreciated, thanks in advance!
I'm using HTMLParser (python 2.7)to parse pages I pull down with urllib2,and am coming across AttributeError exceptions when I want to store my data into a list in feed method. But if comment out the __init__ method, the exception was gone
main.py
# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class MyHTMLParser(HTMLParser):
def __init__(self):
self.terms = []
self.definitions = []
def handle_starttag(self, tag, attrs):
# retrive the terms
if tag == 'div':
for attribute, value in attrs:
if value == 'word':
self.terms.append(attrs[1][1])
# retrive the definitions
if value == 'desc':
if attrs[1][1]:
self.definitions.append(attrs[1][1])
else:
self.definitions.append(None)
parser = MyHTMLParser()
# open page and retrive source page
response = urllib2.urlopen('http://localhost/')
html = response.read().decode('utf-8')
response.close()
# extract the terms and definitions
parser.feed(html)
Output
Traceback (most recent call last):
File "/Users/megachweng/Project/Anki-Youdao/combined.py", line 35, in <module>
parser.feed(html)
File "/usr/local/Cellar/python/2.7.13/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 116, in feed
self.rawdata = self.rawdata + data
AttributeError: MyHTMLParser instance has no attribute 'rawdata'
I think that you don't initialize HTMLParser properly. Maybe you don't need to initialize it at all. This works for me:
# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print "Encountered a start tag:", tag
# retrive the terms
if tag == 'div':
for attribute, value in attrs:
if value == 'word':
self.terms.append(attrs[1][1])
# retrive the definitions
if value == 'desc':
if attrs[1][1]:
self.definitions.append(attrs[1][1])
else:
self.definitions.append(None)
parser = MyHTMLParser()
# open page and retrive source page
response = urllib2.urlopen('http://localhost/')
html = response.read().decode('utf-8')
response.close()
# extract the terms and definitions
parser.feed(html)
UPDATE
# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.terms = []
self.definitions = []
def handle_starttag(self, tag, attrs):
# retrive the terms
for attribute in attrs:
if attribute[0] == 'align':
self.terms.append(attribute[1])
self.definitions.append(attribute[1])
parser = MyHTMLParser()
html = "<table align='center'><tr><td align='left'><p>ciao</p></td></tr></table>"
# extract the terms and definitions
parser.feed(html)
print parser.terms
print parser.definitions
Output:
['center', 'left']
['center', 'left']
OK I got the solution,super().__init__ cannot work, must hard code the name
def __init__(self):
HTMLParser.__init__(self)
I've based my solution on:
How do I mock an open used in a with statement (using the Mock framework in Python)?,
AttributeError: <module '__main__' from [..] does not have the attribute 'open',
http://www.voidspace.org.uk/python/mock/helpers.html#mock.mock_open
I have a class, which I can instantiate, which writes to a file. I'm trying to test it, but I'm having problems mocking open(). I'm using the following as the smallest piece of code, which can
import os
import unittest
from unittest.mock import mock_open, patch
__author__ = 'drews'
class MockPathExists(object):
def __init__(self, return_value):
self.received_args = None
self.return_value = return_value
def __call__(self, *args, **kwargs):
self.received_args = args
return self.return_value
class WriteData:
def __init__(self, dir, name='World'):
self.name = name
self.dir = dir
def dump(self):
if os.path.exists(self.dir):
with open('{0}/output.text'.format(self.dir), 'w+') as fp:
fp.write('Hello, {0}!'.format(self.name))
class TestListWindowsPasswords(unittest.TestCase):
def setUp(self):
self._orig_pathexists = os.path.exists
os.path.exists = MockPathExists(True)
def test_dump(self):
m = mock_open()
with patch.object(WriteData, 'open', m, create=True):
data_writer = WriteData(
dir='/my/path/not/exists',
name='Foo'
)
data_writer.dump()
self.assertEqual(os.path.exists.received_args[0], '/my/path/not/exists/output.text')
m.assert_called_once_with('/my/path/not/exists/output.text', 'w+')
handle = m()
handle.write.assert_called_once_with('Hello, Foo!')
def tearDown(self):
os.path.exists = self._orig_pathexists
When I run this, I get the following error:
Error
Traceback (most recent call last):
File "/Users/drews/Development/tool/tests/test_mockopen.py", line 41, in test_dump
data_writer.dump()
File "/Users/drews/Development/tool/tests/test_mockopen.py", line 25, in dump
with open('{0}/output.text'.format(self.dir), 'w+') as fp:
FileNotFoundError: [Errno 2] No such file or directory: '/my/path/not/exists/output.text'
How can I mock open(), so that it just returns a file_pointer, and doesn't try to interact with the file system at all?
Mock builtins.open (or module.open, module = the module name that contains WriteData) with the mock_open:
import builtins
class TestListWindowsPasswords(unittest.TestCase):
def setUp(self):
self._orig_pathexists = os.path.exists
os.path.exists = MockPathExists(True)
def test_dump(self):
with patch('builtins.open', unittest.mock.mock_open()) as m:
data_writer = WriteData(
dir='/my/path/not/exists',
name='Foo'
)
data_writer.dump()
self.assertEqual(os.path.exists.received_args[0], '/my/path/not/exists') # fixed
m.assert_called_once_with('/my/path/not/exists/output.text', 'w+')
handle = m()
handle.write.assert_called_once_with('Hello, Foo!')
You can use the __enter__ magic method to simulate which:
from unittest.mock import patch, MagicMock, call, mock_open
#patch('os')
#patch('builtins.open', new_callable=mock_open())
def test_dump(self, mock_open_file, mock_os):
data_writer = WriteData(dir='/my/path/not/exists', name='Foo')
mock_os.path.exists.assert_called_once_with('/my/path/not/exists')
mock_open_file.assert_called_once_with('/my/path/not/exists/output.text', 'w+')
mock_open_file.return_value.__enter__().write.assert_called_once_with('Hello, Foo!')
Hope this helps!
I'm trying to create a Collection Class in Python to access the various collections in my db. Here's what I've got:
import sys
import os
import pymongo
from pymongo import MongoClient
class Collection():
client = MongoClient()
def __init__(self, db, collection_name):
self.db = db
self.collection_name = collection_name
# self.data_base = getattr(self.client, db)
# self.collObject = getattr(self.data_base, self.collection_name)
def getCollection(self):
data_base = getattr(self.client, self.db)
collObject = getattr(data_base, self.collection_name)
return collObject
def getCollectionKeys(self, collection):
"""Get a set of keys from a collection"""
keys_list = []
collection_list = collection.find()
for document in collection_list:
for field in document.keys():
keys_list.append(field)
keys_set = set(keys_list)
return keys_set
if __name__ == '__main__':
print"Begin Main"
agents = Collection('hkpr_restore','agents')
print "agents is" , agents
agents_collection = agents.getCollection
print agents_collection
print agents.getCollectionKeys(agents_collection)
I get the following output:
Begin Main
agents is <__main__.Collection instance at 0x10ff33e60>
<bound method Collection.getCollection of <__main__.Collection instance at 0x10ff33e60>>
Traceback (most recent call last):
File "collection.py", line 52, in <module>
print agents.getCollectionKeys(agents_collection)
File "collection.py", line 35, in getCollectionKeys
collection_list = collection.find()
AttributeError: 'function' object has no attribute 'find'
The function getCollectionKeys works fine outside of a class. What am I doing wrong?
This line:
agents_collection = agents.getCollection
Should be:
agents_collection = agents.getCollection()
Also, you don't need to use getattr the way you are. Your getCollection method can be:
def getCollection(self):
return self.client[self.db][self.collection_name]
A number of people in my organization have different email names from perforce names, so I need to create an IEmailLookup derivation that overrides getAddress to do my evil bidding:
(From my master.cfg)
class MyIEmailLookup:
from buildbot import interfaces
__implements__ = interfaces.IEmailLookup
def getAddresses(user):
address_dict = {"user1", "user_one#our_domain.com"}
try:
address = address_dict[user]
except KeyError:
address = user + "#our_domain.com"
return address
maillookup = MyIEmailLookup()
from buildbot.status import mail
c['status'].append(mail.MailNotifier(....
....
lookup=maillookup
))
I've tried any number of permutations, but I either get:
Traceback (most recent call last):
File "/Library/Python/2.6/site-packages/buildbot-0.8.3p1-py2.6.egg/buildbot/scripts/runner.py", line 1071, in doCheckConfig
ConfigLoader(configFileName=configFileName)
File "/Library/Python/2.6/site-packages/buildbot-0.8.3p1-py2.6.egg/buildbot/scripts/checkconfig.py", line 46, in __init__
self.loadConfig(configFile, check_synchronously_only=True)
File "/Library/Python/2.6/site-packages/buildbot-0.8.3p1-py2.6.egg/buildbot/master.py", line 727, in loadConfig
exec f in localDict
File "/Users/playbuilder/buildbot/master.cfg", line 207, in <module>
lookup=maillookup
File "/Library/Python/2.6/site-packages/buildbot-0.8.3p1-py2.6.egg/buildbot/status/mail.py", line 293, in __init__
assert interfaces.IEmailLookup.providedBy(lookup)
AssertionError
...or any other number of issues, dependant upon how I try to implement the IEmailLookup interface.
I'm using buildbot 0.8.3p1 and python 2.6.1.
I see precious few examples of how to do this, and every one of them fails in my context. What am I missing here?
I just solved this problem myself.
First you need to add (somewhere at the top of the file)
from zope.interface import implements
and then change
__implements__ = interfaces.IEmailLookup
to
if implements:
implements( interfaces.IEmailLookup )
else:
__implements__ = interfaces.IEmailLookup
If you want to fetch email from perforce user, you can use this class:
# .-----------------------.
# | Perforce Email Lookup |
# `-----------------------'
from twisted.internet import defer, utils
from buildbot.util import ComparableMixin
from buildbot.interfaces import IEmailLookup
from zope.interface import implements
import os
import re
class PerforceEmailLookup(ComparableMixin):
implements(IEmailLookup)
compare_attrs = ["p4port", "p4user", "p4passwd", "p4bin"]
env_vars = ["P4CLIENT", "P4PORT", "P4PASSWD", "P4USER",
"P4CHARSET"]
def __init__(self,
p4port = None,
p4user = None,
p4passwd = None,
p4bin = 'p4'):
self.p4port = p4port
self.p4user = p4user
self.p4passwd = p4passwd
self.p4bin = p4bin
self.email_re = re.compile(r"Email:\s+(?P<email>\S+#\S+)\s*$")
def _get_process_output(self, args):
env = dict([(e, os.environ.get(e)) for e in self.env_vars if os.environ.get(e)])
d = utils.getProcessOutput(self.p4bin, args, env)
return d
#defer.deferredGenerator
def getAddress(self, name):
if '#' in name:
yield name
return
args = []
if self.p4port:
args.extend(['-p', self.p4port])
if self.p4user:
args.extend(['-u', self.p4user])
if self.p4passwd:
args.extend(['-P', self.p4passwd])
args.extend(['user', '-o', name])
wfd = defer.waitForDeferred(self._get_process_output(args))
yield wfd
result = wfd.getResult()
for line in result.split('\n'):
line = line.strip()
if not line: continue
m = self.email_re.match(line)
if m:
yield m.group('email')
return
yield name
usage would look like:
c['status'].append(
MailNotifier(
sendToInterestedUsers = True,
mode = 'failing',
lookup = PerforceEmailLookup(
p4port = "perforce:1666",
p4user = "buildbot",
p4passwd = "buildbot")))
Try this:
from buildbot.interfaces import IEmailLookup
from buildbot.util import ComparableMixin
from zope.interface import implements
class lookup_example_email(ComparableMixin):
implements(IEmailLookup)
def getAddress(self,user):
return "%s#example.com"%(user)
...
mn = MailNotifier(..., lookup=lookup_example_email(), extraRecipients=m)
Here's the piece of code I use which works with buildbot 2.3.1 in python3.6.
from buildbot.interfaces import IEmailLookup
from buildbot.util import ComparableMixin
from zope.interface import implementer
#implementer(IEmailLookup)
class EmailMap(ComparableMixin):
def getAddress(self, name):
return f'{name}#xxxxx'