I have an excel file with a ton of rows, and a column containing HZ, NZ or SZ now is this erroring out, and I have no idea on how to fix it.
region = ws.cell(row=srow, column=3)
if region.value == "HZ":
nregion = "Houston"
elif region.value == "NZ":
nregion = "North"
elif region.value == "SZ":
nregion = "South"
It errors with the following messages:
Traceback (most recent call last):
File "PowerExp.py", line 18, in <module>
wb=load_workbook(filename = wbfile)
File "/usr/local/lib/python2.7/dist-packages/openpyxl/reader/excel.py", line 136, in load_workbook
_load_workbook(wb, archive, filename, use_iterators, keep_vba)
File "/usr/local/lib/python2.7/dist-packages/openpyxl/reader/excel.py", line 171, in _load_workbook
style_table = read_style_table(archive.read(ARC_STYLE))
File "/usr/local/lib/python2.7/dist-packages/openpyxl/reader/style.py", line 42, in read_style_table
font_list = parse_fonts(root, xmlns, color_index)
File "/usr/local/lib/python2.7/dist-packages/openpyxl/reader/style.py", line 160, in parse_fonts
font.size = font_node.find(QName(xmlns, 'sz').text).get('val')
AttributeError: 'NoneType' object has no attribute 'get'
I can't change the value and save the workbook as it is automatically retrieved from the email server and loaded into MySQL.
Related
This is my current code
import ebooklib
from ebooklib import epub
class EpubToTextClass:
def __init__(self, dirToEpub=""):
self.dirToEpub = dirToEpub
def setDirToEpub(self, dirToEpub):
self.dirToEpub = dirToEpub
def convertEpubToText(self):
book = epub.read_epub(self.convertEpubToText)
items = list(book.get_items_of_type(ebooklib.ITEM_DOCUMENT))
text = ""
for i in items:
text = text + i
return text
ebpub = EpubToTextClass()
ebpub.setDirToEpub("-Some-EPUB.epub")
print(ebpub.convertEpubToText())
The goal of this part of the code is to convert a epub book to text, via the ebooklib but this error keeps popping up, and I am not completely sure what the error is about.
If you would like to see my whole error log, here it is:
Traceback (most recent call last):
File "epubToText.py", line 22, in <module>
print(ebpub.convertEpubToText())
File "epubToText.py", line 13, in convertEpubToText
book = epub.read_epub(self.convertEpubToText)
File "/home/jeff/.local/lib/python3.8/site-packages/ebooklib/epub.py", line 1739, in read_epub
book = reader.load()
File "/home/jeff/.local/lib/python3.8/site-packages/ebooklib/epub.py", line 1397, in load
self._load()
File "/home/jeff/.local/lib/python3.8/site-packages/ebooklib/epub.py", line 1686, in _load
self.zf = zipfile.ZipFile(self.file_name, 'r', compression=zipfile.ZIP_DEFLATED, allowZip64=True)
File "/usr/lib/python3.8/zipfile.py", line 1269, in __init__
self._RealGetContents()
File "/usr/lib/python3.8/zipfile.py", line 1332, in _RealGetContents
endrec = _EndRecData(fp)
File "/usr/lib/python3.8/zipfile.py", line 264, in _EndRecData
fpin.seek(0, 2)
AttributeError: 'function' object has no attribute 'seek'
I figured it out later on.
Which was to change this:
book = epub.read_epub(self.convertEpubToText)
to
book = epub.read_epub(self.dirToEpub)
I`m trying to download and then open excel file (report) generated by marketplace with openpyxl.
import requests
import config
import openpyxl
link = 'https://api.telegram.org/file/bot' + config.TOKEN + '/documents/file_66.xlsx'
def save_open(link):
filename = link.split('/')[-1]
r = requests.get(link)
with open(filename, 'wb') as new_file:
new_file.write(r.content)
wb = openpyxl.open ('file_66.xlsx')
ws = wb.active
cell = ws['B2'].value
print (cell)
save_open(link)
After running this code I got the above:
Traceback (most recent call last):
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\base.py", line 55, in _convert
value = expected_type(value)
TypeError: Fill() takes no arguments
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Home\Documents\myPython\bot_WB\main.py", line 20, in <module>
save_open(link)
File "C:\Users\Home\Documents\myPython\bot_WB\main.py", line 14, in save_open
wb = openpyxl.open ('file_66.xlsx')
File "C:\Python 3.9\lib\site-packages\openpyxl\reader\excel.py", line 317, in load_workbook
reader.read()
File "C:\Python 3.9\lib\site-packages\openpyxl\reader\excel.py", line 281, in read
apply_stylesheet(self.archive, self.wb)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 198, in apply_stylesheet
stylesheet = Stylesheet.from_tree(node)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 103, in from_tree
return super(Stylesheet, cls).from_tree(node)
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\serialisable.py", line 103, in from_tree
return cls(**attrib)
File "C:\Python 3.9\lib\site-packages\openpyxl\styles\stylesheet.py", line 74, in __init__
self.fills = fills
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\sequence.py", line 26, in __set__
seq = [_convert(self.expected_type, value) for value in seq]
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\sequence.py", line 26, in <listcomp>
seq = [_convert(self.expected_type, value) for value in seq]
File "C:\Python 3.9\lib\site-packages\openpyxl\descriptors\base.py", line 57, in _convert
raise TypeError('expected ' + str(expected_type))
TypeError: expected <class 'openpyxl.styles.fills.Fill'>
[Finished in 1.6s]
If you run file properties/details you can see that this file was generated by "Go Exelize" (author: xuri). To run this file you need to separate code in two parts. First: download file. Then you need to manually open it with MS Excel, save file and close it (after this "Go Excelize" switch to "Microsoft Excel"). And only after that you can run the second part of the code correctly with no errors. Can anyone help me to handle this problem?
I had the same problem, "TypeError('expected ' + str(expected_type))", using pandas.read_excel, which uses openpyxl. If I open the file, save and close it, it will work with both, pandas and openpyxl.
Upon further attempts I could open the file using the "read_only=True" in openpyxl, but while iterating over the rows I would still get the error, but only when all the rows ended, in the end of the file.
I belive it could be something in the EOF (end of file) and openpyxl don't have ways of treating it.
Here is the code that I used to test and worked for me:
import openpyxl
wb = openpyxl.load_workbook(my_file_name, read_only=True)
ws = wb.worksheets[0]
lis = []
try:
for row in ws.iter_rows():
lis.append([cell.value for cell in row])
except TypeError:
print('Skip error in EOF')
Used openpyxl==3.0.10
Hey I am using python script to create ".json" file and getting following error
Traceback (most recent call last):
File "ngs_rawdata_config_creator.py", line 104, in <module>
per_lib = parse_per_lib(pd.read_csv(args.per_lib_input, dtype=str))
File "ngs_rawdata_config_creator.py", line 32, in parse_per_lib
per_lib_dict['lib_paths'] = assign_libpaths(lib_basepaths)
File "ngs_rawdata_config_creator.py", line 53, in assign_libpaths
libpaths_dict[lib] = basepath_to_filepathsdict(path, "*.fastq.gz", ".*_L(\d+)_R(\d+).*\.fastq\.gz")
File "ngs_rawdata_config_creator.py", line 73, in basepath_to_filepathsdict
if rmatch.group(0) == basename:
AttributeError: 'NoneType' object has no attribute 'group
'
this is the part of the code
for fq in all_fastqs:
basename = os.path.basename(fq)
rmatch = re.match(capture_regex, basename)
if rmatch.group(0) == basename:
lane = rmatch.group(1)
read = rmatch.group(2)
readgroups[lane][read] = fq
If re.match doesn't get a match, it returns None. You need to check for that:
if rmatch and rmatch.group(0) == basename:
I am trying to acquire all files from a windows VHD.
When calling .info or .info.meta.type on a file entry it doesn't always work and throws an error. Is there any way to retrieve these files?
filesystemObject = pytsk3.FS_Info(imagehandle, offset=(partition.start * metric.block_size))
except:
print "Partition has no supported file system"
continue
print "File System Type Dectected ", filesystemObject.info.ftype
directoryObject = filesystemObject.open_dir(path=dirPath)
for entryObject in directoryObject:
if (not hasattr(entryObject, "info") or
not hasattr(entryObject.info, "name") or
not hasattr(entryObject.info.name, "name") or
entryObject.info.name.name in [".", ".."]):
continue
try:
f_type = entryObject.info.meta.type
except:
print "Cannot retrieve type of", entryObject.info.name.name
metric.total_files += 1
metric.failed_files += 1
continue
Traceback:
> Traceback (most recent call last): File "/Users/paulledwith/PycharmProjects/FYP/Server.py", line 352, in <module>
directoryRecurse(directoryObject, []) File "/Users/paulledwith/PycharmProjects/FYP/Server.py", line 185, in directoryRecurse
directoryRecurse(sub_directory, parentPath) File "/Users/paulledwith/PycharmProjects/FYP/Server.py", line 185, in directoryRecurse
directoryRecurse(sub_directory, parentPath) File "/Users/paulledwith/PycharmProjects/FYP/Server.py", line 185, in directoryRecurse
directoryRecurse(sub_directory, parentPath) File "/Users/paulledwith/PycharmProjects/FYP/Server.py", line 172, in directoryRecurse
print entryObject.info.meta.size AttributeError: 'NoneType' object has no attribute 'size'
This does retrieve about 90% of the files on the 5gb VHD I am working on but 90% is as good as 0.
I am using gdata on python to read the rows of specific worksheet from public spreadsheet when i tried the following code
client = gdata.spreadsheet.service.SpreadsheetsService()
key = 'xxxxxxxxxxxxxxxxxxxxxxxxxx'
worksheets_feed = client.GetWorksheetsFeed(key, visibility='public', projection='values')
# print worksheets_feed
for entry in worksheets_feed.entry:
print entry.title.text
worksheet_id = entry.id.text.rsplit('/',1)[1]
rows = client.GetListFeed(key, worksheet_id).entry
getting the error as
Traceback (most recent call last):
File "lib/scrapper.py", line 89, in <module>
start_it()
File "lib/scrapper.py", line 56, in start_it
rows = client.GetListFeed(key, worksheet_id).entry
File "/Library/Python/2.7/site-packages/gdata/spreadsheet/service.py", line 252, in GetListFeed
converter=gdata.spreadsheet.SpreadsheetsListFeedFromString)
File "/Library/Python/2.7/site-packages/gdata/service.py", line 1074, in Get
return converter(result_body)
File "/Library/Python/2.7/site-packages/gdata/spreadsheet/__init__.py", line 474, in SpreadsheetsListFeedFromString
xml_string)
File "/Library/Python/2.7/site-packages/atom/__init__.py", line 93, in optional_warn_function
return f(*args, **kwargs)
File "/Library/Python/2.7/site-packages/atom/__init__.py", line 127, in CreateClassFromXMLString
tree = ElementTree.fromstring(xml_string)
File "<string>", line 125, in XML
cElementTree.ParseError: no element found: line 1, column 0
can somebody correct me where i am wrong
Try:
worksheet_feed = spreadsheet.GetWorksheetsFeed(spreadsheetId)
worksheetfeed = []
for worksheet in worksheet_feed.entry:
worksheetfeed.append(worksheet.id.text.rsplit('/', 1)[0])
list_feed = spreadsheet.GetListFeed(spreadsheetId, worksheetfeed[0])#get first worksheet
entryList = []
for entry in list_feed.entry:
tempDict = {}
for key in entry.custom:
tempDict[str(key)] = str(entry.custom[key].text)
where spreadsheetId has been defined and you have been previously authenticated.