Python HTMLParser: AttributeError

Python HTMLParser: AttributeError - python

I'm using HTMLParser (python 2.7)to parse pages I pull down with urllib2,and am coming across AttributeError exceptions when I want to store my data into a list in feed method. But if comment out the __init__ method, the exception was gone
main.py
# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class MyHTMLParser(HTMLParser):
def __init__(self):
self.terms = []
self.definitions = []
def handle_starttag(self, tag, attrs):
# retrive the terms
if tag == 'div':
for attribute, value in attrs:
if value == 'word':
self.terms.append(attrs[1][1])
# retrive the definitions
if value == 'desc':
if attrs[1][1]:
self.definitions.append(attrs[1][1])
else:
self.definitions.append(None)
parser = MyHTMLParser()
# open page and retrive source page
response = urllib2.urlopen('http://localhost/')
html = response.read().decode('utf-8')
response.close()
# extract the terms and definitions
parser.feed(html)
Output
Traceback (most recent call last):
File "/Users/megachweng/Project/Anki-Youdao/combined.py", line 35, in <module>
parser.feed(html)
File "/usr/local/Cellar/python/2.7.13/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 116, in feed
self.rawdata = self.rawdata + data
AttributeError: MyHTMLParser instance has no attribute 'rawdata'

I think that you don't initialize HTMLParser properly. Maybe you don't need to initialize it at all. This works for me:
# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print "Encountered a start tag:", tag
# retrive the terms
if tag == 'div':
for attribute, value in attrs:
if value == 'word':
self.terms.append(attrs[1][1])
# retrive the definitions
if value == 'desc':
if attrs[1][1]:
self.definitions.append(attrs[1][1])
else:
self.definitions.append(None)
parser = MyHTMLParser()
# open page and retrive source page
response = urllib2.urlopen('http://localhost/')
html = response.read().decode('utf-8')
response.close()
# extract the terms and definitions
parser.feed(html)
UPDATE
# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.terms = []
self.definitions = []
def handle_starttag(self, tag, attrs):
# retrive the terms
for attribute in attrs:
if attribute[0] == 'align':
self.terms.append(attribute[1])
self.definitions.append(attribute[1])
parser = MyHTMLParser()
html = "<table align='center'><tr><td align='left'><p>ciao</p></td></tr></table>"
# extract the terms and definitions
parser.feed(html)
print parser.terms
print parser.definitions
Output:
['center', 'left']
['center', 'left']

OK I got the solution,super().__init__ cannot work, must hard code the name
def __init__(self):
HTMLParser.__init__(self)

Related

Adding a ProgressBar to a multithreaded Python script

i am trying to add a progressbar to my script but i couldn't succeed because i think it is multi-threaded or maybe it should be added in a separate thread . i found plenty of solutions in stackoverflow , for example tqdm library but i couldn't implement it , also i think i have a confusion where exactly i have to implement the progress bar code to make it works.
this is my code :
# -*- coding: utf-8 -*
from __future__ import unicode_literals
# !/usr/bin/python
import codecs
from multiprocessing.dummy import Pool
start_raw = "myfile"
threads = 10
with codecs.open(start_raw, mode='r', encoding='ascii', errors='ignore') as f:
lists = f.read().splitlines()
lists = list((lists))
def myfunction(x):
try:
print x
except:
pass
def Main():
try:
pp = Pool(int(threads))
pr = pp.map(myfunction, lists)
except:
pass
if __name__ == '__main__':
Main()
i have tried this solution
https://stackoverflow.com/a/45276885/9746396 :
# -*- coding: utf-8 -*
from __future__ import unicode_literals
# !/usr/bin/python
import codecs
from multiprocessing.dummy import Pool
import tqdm
start_raw = "myfile"
threads = 1
with codecs.open(start_raw, mode='r', encoding='ascii', errors='ignore') as f:
lists = f.read().splitlines()
lists = list((lists))
def myfunction(x):
try:
print (x)
except:
pass
def Main():
try:
pp = Pool(int(threads))
pr = pp.map(myfunction, lists)
except:
pass
if __name__ == '__main__':
with Pool(2) as p:
r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30))
but code does not run and i get exception (TypeError: 'NoneType' object is not callable)
0%| | 0/30 [00:00<?, ?it/s]Traceback (most recent call last):
File "file.py", line 35, in <module>
r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30))
File "C:\mypath\Python\Python38-32\lib\site-packages\tqdm\std.py", line 1118, in __iter__
for obj in iterable:
File "C:\mypath\Python\Python38-32\lib\multiprocessing\pool.py", line 865, in next
raise value
File "C:\mypath\Python\Python38-32\lib\multiprocessing\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
TypeError: 'NoneType' object is not callable
0%| | 0/30 [00:00<?, ?it/s]

I presume you wanted to pass myfunction instead of Main to imap, consistently with the first example.
When you pass Main() to p.imap in r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30)), Python calls executes Main method and passes the return value as the first argument to imap.
You should remove the parentheses after Main as: p.imap in r = list(tqdm.tqdm(p.imap(Main, range(30)), total=30)).

How do I read this stringified javascript variable into Python?

I'm trying to read _pageData from https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1 into Python 2.7.11 so that I can process it, using this code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Testing _pageData processing. """
import urllib2
import re
import ast
import json
import yaml
BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1'
def main():
""" Do the business. """
response = urllib2.urlopen(BASE_URL, None)
results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read())
first_result = results[0]
# These all fail
data = ast.literal_eval(first_result)
# data = yaml.load(first_result)
# data = json.loads(first_result)
if __name__ == '__main__':
main()
but get the following error:
Traceback (most recent call last):
File "./temp.py", line 24, in <module>
main()
File "./temp.py", line 19, in main
data = ast.literal_eval(first_result)
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 49, in literal_eval
node_or_string = parse(node_or_string, mode='eval')
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 37, in parse
return compile(source, filename, mode, PyCF_ONLY_AST)
File "<unknown>", line 1
[[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n
^
SyntaxError: invalid syntax
var _pageData is in this format:
"[[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n"
I've tried replacing the \" and \n and decoding the \uXXXX before using, without success. I've also tried replacing ,, with ,"", and ,'', without success.
Thank you.

It seems like there are three kinds of syntactic errors in your string:
, followed by ,
[ followed by ,
, followed by ]
Assuming that those are supposed to be null elements (or ''?), you can just replace those in the original string -- exactly like you did for the ,, case, but you missed the others. Also, you have to do the ,, replacement twice, otherwise you will miss cases such as ,,,,. Then, you can load the JSON string with json.loads.
>>> s = "your messed up json string"
>>> s = re.sub(r",\s*,", ", null,", s)
>>> s = re.sub(r",\s*,", ", null,", s)
>>> s = re.sub(r"\[\s*,", "[ null,", s)
>>> s = re.sub(r",\s*\]", ", null]", s)
>>> json.loads(s)

I started off using ast.literal.eval(...) because I was under the (mistaken?) impression that javascript arrays and Python lists were mutually compatible, so all I had to do was destringify _pageData.
However, I hadn't noticed that Python doesn't like ,, true, false or [,. Fixing them does the trick (thank you #Two-Bit Alchemist and #tobias_k)
So, the following appears to work:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Testing _pageData processing. """
import urllib2
import re
import ast
import json
import yaml
BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1'
def main():
""" Do the business. """
response = urllib2.urlopen(BASE_URL, None)
results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read())
first_result = results[0]
first_result = first_result.replace(',,,,,,', ',None,None,None,None,None,')
first_result = first_result.replace(',,,,,', ',None,None,None,None,')
first_result = first_result.replace(',,,,', ',None,None,None,')
first_result = first_result.replace(',,,', ',None,None,')
first_result = first_result.replace(',,', ',None,')
first_result = first_result.replace('[,', '[None,')
first_result = first_result.replace('\\"', '\'')
first_result = first_result.replace('\\n', '')
first_result = first_result.replace('true', 'True')
first_result = first_result.replace('false', 'False')
data = ast.literal_eval(first_result)
for entry in data:
print entry
if __name__ == '__main__':
main()

local variable 'texts' referenced before assignment

This a code to extract Unicode values from text files but it gives me following error.
# -*- coding: utf-8 -*-
import codecs
import os
#from urllib import urlopen
from bs4 import BeautifulSoup
import re
##import nltk
#def remove_content_li(input_document) :
#soup = BeautifulSoup(input_document)
def extract_unicode(input):
_ascii_letters = re.compile(r'[a-zA-Z]', flags=re.UNICODE)
symbols = re.compile(r'[{} &+( )" =!.?.:.. / | » © : >< # « ,] 1 2 3 4 5 6 7 8 9 _ - + ; [ ] %',flags=re.UNICODE)
soup = BeautifulSoup(open(input,'r'),'lxml')
for li in soup.find_all('li'):
li.decompose()
texts = soup.findAll(text=True)
def contains_unicode(text):
try:
str(text)
except:
return True
return False
result = ' '.join((text for text in texts if contains_unicode(texts)))
result =_ascii_letters.sub(" ", result)
result = symbols.sub(" ",result)
##print(result)
## result = nltk.clean_html(result)
result.replace('*', '')
This is the error I get
File "e3.py", line 50, in <module>
extract_unicode((os.path.join(dirname, filename)))
File "e3.py", line 30, in extract_unicode
result = ' '.join((text for text in texts if contains_unicode(texts)))
UnboundLocalError: local variable 'texts' referenced before assignment

The error is telling you exactly what the problem is. You're using a variable texts before you define it. Perhaps soup.find_all('li') is returning an empty list, since you only set texts if it finds something.

error when parsing yahoo weather xml in python

I have been searching for a solution, so i am trying to get weather information for a location id in my database,it is meant to use location_id from model and store the information in weather log a model in my database this is the code and the error below:
![# -*- coding: UTF-8 -*-
import urllib
from xml.dom import minidom
from xml.dom.minidom import parse
from argparse import ArgumentParser
from pprint import pprint
from datetime import datetime
from django.db import models
from django.core.management.base import NoArgsCommand
Location = models.get_model("weatherapp", "Location")
WeatherLog = models.get_model("weatherapp", "WeatherLog")
SILENT, NORMAL, VERBOSE = 0, 1, 2
WEATHER_URL = 'http://weather.yahooapis.com/forecastrss?p=%s&u=c'
METRIC_PARAMETER = ''
WEATHER_NS = "http://xml.weather.yahoo.com/ns/rss/1.0"
def weather_for_location(location_id, options):
# taken from http://developer.yahoo.com/python/python-xml.html
# and modified a little
url = WEATHER_URL % location_id
try:
dom = minidom.parse(urllib.urlopen(url))
except Exception:
return None
# Get the units of the current feed.
yunits = dom.getElementsByTagNameNS(WEATHER_NS, 'units') \[0\]
# Get the location of the specified location code.
ylocation = dom.getElementsByTagNameNS(WEATHER_NS, 'location') \[0\]
# Get the current conditions.
ycondition = dom.getElementsByTagNameNS(WEATHER_NS, 'condition') \[0\]
forecasts = \[\]
for node in enumerate( dom.getElementsByTagNameNS(WEATHER_NS, 'forecast')):
forecasts.append({
'date': node.getAttribute('date'),
'low': node.getAttribute('low'),
'high': node.getAttribute('high'),
'condition': node.getAttribute('text')
})
return {
'current_condition': ycondition.getAttribute('text'),
'current_temp': ycondition.getAttribute('temp'),
'current_humidity': yatmosphere.getAttribute('humidity'),
'current_visibility': yatmosphere.getAttribute('visibility'),
'current_wind_speed': ywind.getAttribute('speed'),
'forecasts': forecasts,
'title': dom.getElementsByTagName('title')\[0\].firstChild.data,
'guid': dom.getElementsByTagName('guid')\[0\].firstChild.data,
}
class Command(NoArgsCommand):
help = "Aggregates data from weather feed"
def handle_noargs(self, **options):
verbosity = int(options.get('verbosity', NORMAL))
created_count = 0
for l in Location.objects.all():
weather = weather_for_location(l.location_id, options)
if verbosity > NORMAL:
pprint(weather)
timestamp_parts = map(int, weather\['guid'\].split("_")\[1:-1\])
timestamp = datetime(*timestamp_parts)
log, created = WeatherLog.objects.get_or_create(
location=l,
timestamp=timestamp,
defaults={
'temperature': weather\['current_temp'\],
'humidity': weather\['current_humidity'\],
'wind_speed': weather\['current_wind_speed'\],
'visibility': weather\['current_visibility'\],
}
)
if created:
created_count += 1
if verbosity > NORMAL:
print "New weather logs: %d" % created_count][1]
error:
> File
> "/home/temi/rapidsmstuts/myapp/weatherapp/management/commands/check_weather.py",
> line 74, in handle_noargs
> timestamp_parts = map(int, weather['guid'].split("_")[1:-1]) TypeError: 'NoneType' object has no attribute '__getitem__'
File "/home/temi/rapidsmstuts/myapp/weatherapp/management/commands/check_weather.py", line 47, in weather_for_location
'date': node.getAttribute('date'),
AttributeError: 'tuple' object has no attribute 'getAttribute'
File "/home/temi/rapidsmstuts/myapp/weatherapp/management/commands/check_weather.py", line 35, in weather_for_location
yunits = dom.getElementsByTagNameNS(WEATHER_NS, 'units') [0]
IndexError: list index out of range

How to write/overwrite a file with HTMLParser

I need to create some files from a template. I'm using psycopg2 to fetch from a database. Then I loop through. Now I need to write to file.
Thanks!
import sys
from HTMLParser import HTMLParser
from xml.etree import cElementTree as etree
import psycopg2
import psycopg2.extras
class LinksParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.tb = etree.TreeBuilder()
def handle_starttag(self, tag, attributes):
self.tb.start(tag, dict(attributes))
def handle_endtag(self, tag):
self.tb.end(tag)
def handle_data(self, data):
self.tb.data(data)
def close(self):
HTMLParser.close(self)
return self.tb.close()
conn = psycopg2.connect(dbname="**", user="**", password="**", host="/tmp/", port="**")
cur.execute("SELECT * FROM landingpagedata;")
rows = cur.fetchall()
template = 'template.html'
parser = LinksParser()
# parser.feed(open('landingIndex.html').read()) #for testing
# root = parser.close()
for row in rows:
parser.feed(open(template).read())
root = parser.close()
#title
title = root.find(".//title")
title.text = str(row['title'])
f = open(row['page_name'], 'w')
root.write(f)
parser = LinksParser()
The error is:
Traceback (most recent call last):
File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 87, in <module>
main()
File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 75, in main
root.write('page_name')
AttributeError: write
Oh and I'm using open('page', 'w') because these pages exist already?

I think you want f.write(root), not root.write(f). (Assuming that str(root) gives you the HTML you want to write out.)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python HTMLParser: AttributeError - python

OK I got the solution,super().init cannot work, must hard code the name def init(self): HTMLParser.init(self)

Related

Adding a ProgressBar to a multithreaded Python script

How do I read this stringified javascript variable into Python?

local variable 'texts' referenced before assignment

error when parsing yahoo weather xml in python

How to write/overwrite a file with HTMLParser

Categories

Resources

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python HTMLParser: AttributeError - python

OK I got the solution,super().__init__ cannot work, must hard code the name def __init__(self): HTMLParser.__init__(self)

Related

Adding a ProgressBar to a multithreaded Python script

How do I read this stringified javascript variable into Python?

local variable 'texts' referenced before assignment

error when parsing yahoo weather xml in python

How to write/overwrite a file with HTMLParser

Categories

Resources

OK I got the solution,super().init cannot work, must hard code the name def init(self): HTMLParser.init(self)