Attribute Error: 'PdfFileReader' object has no attribute '_checkKids' - python

I am new in Python, and pretty new in programming too. Any advise would be very helpful.
I used a script to read a pdf file and extract a javascript file to use for form autofilling.
I have installed PyPDF2 module , but I am getting this error
An error occured... :( 'PdfFileReader' object has no attribute '_checkKids'
Here is the code I am using :
import os
import sys
from collections import OrderedDict
from PyPDF2 import PdfFileReader
def _getFields(obj, tree=None, retval=None, fileobj=None):
fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU':
'Alternate Field Name',
'/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV':
'Default Value'}
if retval is None:
retval = OrderedDict()
catalog = obj.trailer["/Root"]
if "/AcroForm" in catalog:
tree = catalog["/AcroForm"]
else:
return None
if tree is None:
return retval
obj._checkKids(tree, retval, fileobj)
for attr in fieldAttributes:
if attr in tree:
obj._buildField(tree, retval, fileobj, fieldAttributes)
break
if "/Fields" in tree:
fields = tree["/Fields"]
for f in fields:
field = f.getObject()
obj._buildField(field, retval, fileobj, fieldAttributes)
return retval
def get_form_fields(infile):
infile = PdfFileReader(open(infile, 'rb'))
fields = _getFields(infile)
return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())
def selectListOption(all_lines, k, v):
all_lines.append('function setSelectedIndex(s, v) {')
all_lines.append('for (var i = 0; i < s.options.length; i++) {')
all_lines.append('if (s.options[i].text == v) {')
all_lines.append('s.options[i].selected = true;')
all_lines.append('return;')
all_lines.append('}')
all_lines.append('}')
all_lines.append('}')
all_lines.append('setSelectedIndex(document.getElementById("' + k + '"), "' + v + '");')
def readList(fname):
lst = []
with open(fname, 'r') as fh:
for l in fh:
lst.append(l.rstrip(os.linesep))
return lst
def createBrowserScript(fl, fl_ext, items, pdf_file_name):
if pdf_file_name and len(fl) > 0:
of = os.path.splitext(pdf_file_name)[0] + '.txt'
all_lines = []
for k, v in items.items():
print(k + ' -> ' + v)
if (v in ['/Yes', '/On']):
all_lines.append("document.getElementById('" + k + "').checked = true;\n");
elif (v in ['/0'] and k in fl_ext):
all_lines.append("document.getElementById('" + k + "').checked = true;\n");
elif (v in ['/No', '/Off', '']):
all_lines.append("document.getElementById('" + k + "').checked = false;\n");
elif (v in [''] and k in fl_ext):
all_lines.append("document.getElementById('" + k + "').checked = false;\n");
elif (k in fl):
selectListOption(all_lines, k, v)
else:
all_lines.append("document.getElementById('" + k + "').value = '" + v + "';\n");
outF = open(of, 'w')
outF.writelines(all_lines)
outF.close()
def execute(args):
try:
fl = readList('myview.ini')
fl_ext = readList('myview_ext.ini')
if len(args) == 2:
pdf_file_name = args[1]
items = get_form_fields(pdf_file_name)
createBrowserScript(fl, fl_ext, items, pdf_file_name)
else:
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.pdf')]
for f in files:
items = get_form_fields(f)
createBrowserScript(fl, fl_ext, items, f)
except BaseException as msg:
print('An error occured... :( ' + str(msg))
if __name__ == '__main__':
from pprint import pprint
execute(sys.argv)

The error you're seeing is being triggered because you call:
obj._checkKids(tree, retval, fileobj)
_checkKids has an underscore in front of it. This indicates that the function is meant to be used internally within the class and shouldn't be called externally.
In python, this means:
"1. Single Leading Underscore:
When it comes to variable and method names, the single underscore prefix has a meaning by convention only. It’s a hint to the programmer—and it means what the Python community agrees it should mean, but it does not affect the behavior of your programs.
The underscore prefix is meant as a hint to another programmer that a variable or method starting with a single underscore is intended for internal use. This convention is defined in PEP 8.
This isn’t enforced by Python. Python does not have strong distinctions between “private” and “public” variables like Java does. It’s like someone put up a tiny underscore warning sign that says:
“Hey, this isn’t really meant to be a part of the public interface of this class. Best to leave it alone.”"
https://dbader.org/blog/meaning-of-underscores-in-python
My additional recommendations:
I noticed some of your indents are not within the first function that you've written. I would double check all the indents to make sure the code is being included.

Related

TreeView to JSON in Python

[Edit: apparently this file looks similar to h5 format]
I am trying to extract metadata from a file with extension of (.dm3) using hyperspy in Python, I am able to get all the data but it's getting saved in a treeview, but I need the data in Json I tried to make my own parser to convert it which worked for most cases but then failed:
TreeView data generated
Is there a library or package I can use to convert the treeview to JSON in pyhton?
My parser:
def writearray(file,string):
k = string.split('=')
file.write('"' + k[0] + '":' + '[')
for char in k[1]:
file.write(char)
file.write(']')
def writenum(file,string):
k = string.split('=')
file.write('"' + k[0] + '":' + k[1])
def writestr(file,string):
k = string.split('=')
file.write('"' + k[0] + '":' +'"'+ k[1]+'"')
def startnew(file,string):
file.write('"'+string+'":'+'{\n')
def closenum(file,string):
k = string.split('=')
file.write('"' + k[0] + '":' + k[1] + '\n')
file.write('},\n')
def closestr(file,string):
k = string.split('=')
file.write('"' + k[0] + '":' + '"' + k[1] + '"' + '\n')
file.write('},\n')
def closearr(file,string):
k = string.split('=')
file.write('"' + k[0] + '":' + '[')
for char in k[1]:
file.write(char)
file.write(']\n')
file.write('},\n')
def strfix(string):
temp = ''
for char in string:
if char != ' ':
temp += char
return temp
def writethis(file,string):
stripped = strfix(string)
if "=" in stripped:
temp = stripped.split("=")
if ',' in temp[1]:
writearray(file,stripped)
elif temp[1].isdigit() or temp[1].isdecimal():
writenum(file,stripped)
else:
writestr(file,stripped)
def createMetaData(dm3file):
txtfile = os.path.splitext(dm3file)[0] + '.txt'
jsonfile = os.path.splitext(dm3file)[0] + '.json'
s = hs.load(dm3file)
s.original_metadata.export(txtfile)
file1 = open(txtfile, 'r', encoding="utf-8")
Lines = file1.readlines()
k = []
for line in Lines:
k.append(line)
L = []
for string in k:
temp = ''
for char in string:
if char.isalpha() or char.isdigit() or char == '=' or char == ' ' or char == '<' or char == '>' or char == ',' or char == '.' or char == '-' or char == ':':
temp += char
L.append(temp)
file2 = open(jsonfile, 'w', encoding="utf-8")
file2.write('{\n')
for i in range(0, len(L) - 1):
currentspaces = len(L[i]) - len(L[i].lstrip())
nextspaces = len(L[i + 1]) - len(L[i + 1].lstrip())
sub = nextspaces - currentspaces
if i != len(L) - 2:
if (sub == 0):
writethis(file2, L[i])
if '=' in L[i]:
file2.write(',\n')
else:
file2.write('\n')
elif sub > 0:
startnew(file2, L[i])
else:
if sub == -3:
writethis(file2, L[i])
file2.write('\n},\n')
elif sub == -7:
writethis(file2, L[i])
file2.write('\n}\n},\n')
else:
writethis(file2, L[i])
file2.write('\n}\n}\n}\n}')
file1.close()
os.remove(txtfile)
enter code here
I wrote a parser for the tree-view format:
from ast import literal_eval
from collections import abc
from more_itertools import peekable
def parse_literal(x: str):
try:
return literal_eval(x)
except Exception:
return x.strip()
def _treeview_parse_list(lines: peekable) -> list:
list_as_dict = {}
for line in (x.strip() for x in lines):
raw_k, raw_v = line.split(' = ')
list_as_dict[int(raw_k.split()[-1][1:-1])] = parse_literal(raw_v)
peek = lines.peek(None)
if '╚' in line or (peek is not None and '├' in peek):
break
list_as_list = [None] * (max(list_as_dict) + 1)
for idx, v in list_as_dict.items():
list_as_list[idx] = v
return list_as_list
def _treeview_parse_dict(lines: peekable) -> dict:
node = {}
for line in (x.strip() for x in lines):
if ' = ' in line:
raw_k, raw_v = line.split(' = ')
node[raw_k.split()[-1]] = parse_literal(raw_v)
elif '<list>' in line:
node[line.split()[-2]] = _treeview_parse_list(lines)
else:
try:
idx = line.index('├')
except ValueError:
idx = line.index('└')
peek = lines.peek(None)
if peek is not None and '├' in peek and idx == peek.index('├'):
node[line.split()[-1]] = {}
else:
node[line.split()[-1]] = _treeview_parse_dict(lines)
if '└' in line:
break
return node
def treeview_to_dict(lines: abc.Iterable) -> dict:
return _treeview_parse_dict(peekable(lines))
Usage:
with open('meta.txt') as f:
d = treeview_to_dict(f)
You can obtain the metadata as a JSON file using Python's built-in json library:
import json
with open('meta.txt') as txt_file:
with open('meta.json', 'w') as json_file:
json.dump(treeview_to_dict(txt_file), json_file, indent=4)
I've added indent=4 to make the JSON file more human-readable, so that you can verify it against the original format. As far as I can tell they match up in a sensible way.
As I've written this, it uses the third-party more_itertools.peekable class. If you can't use more_itertools, it shouldn't be too hard to implement that functionality yourself, or just refactor the code so that it is no longer necessary to look ahead.
License:
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ON INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to https://unlicense.org
A more straightforward approach is to use the as_dictionary method to convert the metadata to a python dictionary, then you can convert it to json.
import hyperspy.api as hs
s = hs.load('file.dm3')
metadata_dictionary = s.original_metadata.as_dictionary()
A different approach is to use the new RosettaSciIO library, which has been split from hyperspy to extract the metadata, for more information see the documentation https://hyperspy.org/rosettasciio/

How to read a string in a bitstream in Python

I'm writing the following string in my output file:
bitstream.add("stri", 32)
where
def add(self, data, length):
s = ''
if (type(data) == str):
for char in data:
b = bin(ord(char))[2:]
s = s + "{:0>8}".format(b)
else:
s = bin(data)[2:]
if (len(s) < length):
resto = length - len(s)
for _ in range(0, resto):
s = '0' + s
s = s[0:length]
self.cache = self.cache + s
self.flush()
Later on I need to read the string from the output file. I use Python struct unpack module as follows:
from struct import unpack
key_length_bytes = 32
key = ""
for _ in range(0, key_length_bytes):
carattere = chr(unpack('>B', in_file.read(1))[0])
key = "%s%s" (key, carattere)
I get
key = "%s%s" (key, carattere)
TypeError: 'str' object is not callable
Thank you for any help you could provide.
You're missing a % sign.
key = "%s%s" (key, carattere) needs to be changed into
key = "%s%s" % (key, carattere)

Too many values to unpack python gbf file

Hello i got python error too many values to unpack on python when i run my script
Traceback:
File "C:\Python27.1\perpetuum.py", line 193, in __init__
header_offset, header_length = struct.unpack('8sii', f.read(16))
ValueError: too many values to unpack.
Code:
class DataFile(object):
"""GBF file reader"""
def __init__(self, filename):
self.filename = filename
with open(filename, 'rb') as f:
header_offset, header_length = struct.unpack('8sii', f.read(16))
if magic != 'GXYDATA\x1a':
raise Exception('Invalid data file (wrong magic)', magic)
header = self._get_record(header_offset, header_length)
self._records = self._parse_header(header)
def _decode(self, data):
try:
import numpy as np
i = np.arange(len(data), dtype=np.byte)
buf = np.frombuffer(data, np.byte) ^ ((i + 1) * (i ^ -54) - 84)
return buf.tostring()
except ImportError:
buf = array.array('B', data)
for i in xrange(len(data)):
buf[i] = 0xff & (buf[i] ^ ((i + 1) * (i ^ 0xca) - 84))
return buf.tostring()
def _get_record(self, offset, length):
with open(self.filename, 'rb') as f:
f.seek(offset)
data = f.read(length)
return self._decode(data)
def _parse_header(self, header):
"""
header record format:
int numRecords
for each record:
char[nameLen] nameSZ, 0
int offset
int length
int unkLength
"""
records = {}
num_records = struct.unpack_from('i', header)[0]
pos = 4
for i in xrange(num_records):
name_end = header.find('\0', pos)
name = header[pos:name_end]
pos = name_end + 1
offset, length, unkLength = struct.unpack_from('iii', header, pos)
pos += 12
# f1Length = min(13, unkLength)
# f1 = header[pos:pos+f1Length]
pos += unkLength
records[name] = (offset, length)
return records
PREFIX_MAP = {'\x89PNG': '.png',
'DDS ': '.dds',
'A3MF': '.a3m',
'#': '.txt',
'=': '.txt',
'Extended Module': '.xm',
'RIFF': '.wav',
'OggS': '.ogg'}
def _guess_ext(self, name, data):
for prefix, ext in self.PREFIX_MAP.iteritems():
if data.startswith(prefix):
return ext
return '.bin'
CATEGORY_MAP = OrderedDict([
('def*.png', 'icons'),
('icon*.png', 'icons'),
('entityIcon*.png', 'icons'),
('noIcon*.png', 'icons'),
('gfx_*.png', 'gfx'),
('*.a3m', 'models'),
('snd_*', 'sound'),
('altitude*', 'terrain'),
('terrain*', 'terrain'),
('altitude0*', 'terrain'),
('blocks0*', 'terrain'),
('control0*', 'terrain'),
('plants0*', 'terrain'),
('surface0*', 'terrain'),
('tactical*.png', 'tactical_icons'),
('font*', 'font'),
('textures_*.dds', 'textures'),
('corp*.png', 'corp_icons'),
('credits.txt', 'misc'),
('eula*.txt', 'misc'),
('*.txt', 'text_data')])
def dump_record(self, name, dest_dir, sort=False):
offset, length = self._records[name]
print '%08x: %s (%.2f KB)' % (offset, name, length / 1024.)
data = self._get_record(offset, length)
name += self._guess_ext(name, data)
if sort:
for pattern, category in self.CATEGORY_MAP.iteritems():
if fnmatch.fnmatch(name, pattern):
dest_dir = os.path.join(dest_dir, category)
try:
os.makedirs(dest_dir)
except OSError:
pass
break
rec_filename = os.path.join(dest_dir, name)
with open(rec_filename, 'wb') as f:
f.write(data)
def dump_records(self, patterns, dest_dir, sort=False):
for name in self._records:
if any(fnmatch.fnmatch(name, pattern) for pattern in patterns):
self.dump_record(name, dest_dir, sort)
any ideas ?
The code
struct.unpack('8sii', f.read(16))
unpacks into three values - a 8-byte string, and two integers each of which has length of 4 bytes, while your LHS has only two variables.
See the struct documentation for its format string.
This line is making an incorrect assumption:
header_offset, header_length = struct.unpack('8sii', f.read(16))
You are probably better off checking the size of the tuple returned from struct.unpack and then dealing with the results in a conditional manner.
Note that according to the docs, this method always returns a tuple, even if that tuple is of length one.
Stupid i remove if magic, before header offset.
drhagen stupidity of human take -1 and no answer at all get my skill at photoshop and I could laugh of you any time

Python Key Error=0 - Can't find Dict error in code

basically I have been racking my brains for a good while now as to why my code is not working, I have tested parts separately and have look throughout the web to see if it can help, to no avail.
I am getting an error that the traceback is:
Traceback (most recent call last):
File "yes2.py", line 62, in <module>
g.add_edge(row_index,col_index, b)
File "yes2.py", line 27, in add_edge
self.adj[u].append(edge)
KeyError: 0
The two parts with errors are
def add_edge(self, u, v, w=0):
if u == v:
raise ValueError("u == v")
edge = Edge(u,v,w)
redge = Edge(v,u,0)
edge.redge = redge
redge.redge = edge
self.adj[u].append(edge) #### LINE 27 ####
self.adj[v].append(redge)
self.flow[edge] = 0
self.flow[redge] = 0
and
g = FlowNetwork()
map(g.add_vertex, ['0','1','2','3','4','5','6'])
with open('network.txt', "r") as file:
for row_index, row in enumerate(file):
for col_index, value in enumerate(row.split(",")):
b = int(value)
if b != 0:
g.add_edge(row_index,col_index, b) ### LINE 62 ####
And here is the completed code, as without this it may be difficult to see what is happening
class Edge(object):
def __init__(self, u, v, w):
self.source = u
self.sink = v
self.capacity = w
def __repr__(self):
return "%s->%s:%s" % (self.source, self.sink, self.capacity)
class FlowNetwork(object):
def __init__(self):
self.adj = {}
self.flow = {}
def add_vertex(self, vertex):
self.adj[vertex] = []
def get_edges(self, v):
return self.adj[v]
def add_edge(self, u, v, w=0):
if u == v:
raise ValueError("u == v")
edge = Edge(u,v,w)
redge = Edge(v,u,0)
edge.redge = redge
redge.redge = edge
self.adj[u].append(edge)
self.adj[v].append(redge)
self.flow[edge] = 0
self.flow[redge] = 0
def find_path(self, source, sink, path):
if source == sink:
return path
for edge in self.get_edges(source):
residual = edge.capacity - self.flow[edge]
if residual > 0 and not (edge,residual) in path:
result = self.find_path( edge.sink, sink, path + [(edge,residual)] )
if result != None:
return result
def max_flow(self, source, sink):
path = self.find_path(source, sink, [])
while path != None:
flow = min(res for edge,res in path)
for edge,res in path:
self.flow[edge] += flow
self.flow[edge.redge] -= flow
path = self.find_path(source, sink, [])
return sum(self.flow[edge] for edge in self.get_edges(source))
g = FlowNetwork()
map(g.add_vertex, ['0','1','2','3','4','5','6'])
with open('network.txt', "r") as file:
# enumerate allows you to iterate through the list with an index and an object
for row_index, row in enumerate(file):
# split allows you to break a string apart with a string key
for col_index, value in enumerate(row.split(",")):
#convert value from string to int
b = int(value)
if b != 0:
g.add_edge(row_index,col_index, b)
print g.max_flow('1','6')
Many thanks for your time, much appreciated.
The error you're getting is that self.adj doesn't already have a key 0. You're trying to append to a list that doesn't exist yet.
Consider using a defaultdict instead, replacing this line (in __init__):
self.adj = {}
with this:
self.adj = defaultdict(list)
You'll need to import at the top:
from collections import defaultdict
Now rather than raise a KeyError, self.adj[0].append(edge) will create a list automatically to append to.
The defaultdict solution is better.
But for completeness you could also check and create empty list before the append.
Add the + lines:
+ if not u in self.adj.keys():
+ self.adj[u] = []
self.adj[u].append(edge)
.
.
It only comes when your list or dictionary not available in the local function.
Try this:
class Flonetwork(Object):
def __init__(self,adj = {},flow={}):
self.adj = adj
self.flow = flow

Working with nested dictionaries and formatting for display

I have a partial answer from here Construct a tree from list os file paths (Python) - Performance dependent
My specific problem requires me to go from
this
dir/file 10
dir/dir2/file2 20
dir/dir2/file3 10
dir/file3 10
dir3/file4 10
dir3/file5 10
To
dir/ **50**
dir2/ **30**
file2
file3
file
file3
dir3/ **20**
file4
file5
Basically the numbers at the end are the file sizes and
I have been trying to figure out how to display the size of all the files to the parent directory
Edit:
r = re.compile(r'(.+\t)(\d+)')
def prettify(d, indent=0):
for key, value in d.iteritems():
ss = 0
if key == FILE_MARKER:
if value:
for each in value:
mm = r.match(each)
ss += int(mm.group(2))
print ' ' * indent + each
***print ' ' * indent + format_size(ss)***
else:
print ' ' * indent + str(key)
if isinstance(value, dict):
addSizes(value, indent+1)
else:
print ' ' * (indent+1) + str(value)
This is mac's answer from the above link which i edited to use regExp
Solutions that occurred to me led me to create a new dict or adding an inner function.
I have lost my whole day and wished i had asked for help earlier in the day.
Please help.
Not the most elegant thing in the world, but this should get you where you need to be. You'll need to change the tree creation function to deal with whatever form of input you are getting. Once the tree is generated it's just using a recursive tree traversal to form the output.
import re
input_dirs = """dir/file 10
dir/dir2/file2 20
dir/dir2/file3 10
dir/file 10
dir3/file4 10
dir3/file5 10
dir/dir2/dir4/file2 10"""
def create_file_tree(input_string):
dir_dict = {}
for file_path in input_string.split('\n'):
path_list = re.sub('/',' ',file_path).split()
path_list[-1] = int(path_list[-1])
path_dict = dir_dict
final_item = ""
for item in path_list[:-1]:
parent_dict = path_dict
last_item = item
path_dict = path_dict.setdefault(item,{})
parent_dict[last_item] = path_list[-1]
return dir_dict
def pretty_file_tree(file_tree):
def traverse(sub_dict,indent=0, total=0):
string_out = ""
indent += 1
for key in sorted(sub_dict.keys()):
if type(sub_dict[key]) == dict:
sub_total = traverse(sub_dict[key],indent,0)
total += sub_total[0]
string_out += ' '*indent + key + ' ' + '**' + str(sub_total[0]) + '**' + '\n' + sub_total[1]
else:
string_out += ' '*indent + key + '\n'
total += sub_dict[key]
return total, string_out
output_string = traverse(file_tree)
print(output_string[1])
pretty_file_tree(create_file_tree(input_dirs))
Sorry it's not following the code you posted, but i'd begun to produce this before the edit...
As you process the input build a string with place holders (%d) for the numbers, then print out the string.

Categories

Resources