I have wanted to transform DOCX file using docx library. Everytime I run it i get this error
OSError: [Errno 9] Bad file descriptor
The code is :
from docx import Document
def bionify(path_to_text: str) -> None:
doc = Document(path_to_text)
new_doc = Document()
all_paragraphs = doc.paragraphs
for paragraph in all_paragraphs:
word_list = paragraph.text.split(' ')
new_paragraph = new_doc.add_paragraph()
for word in word_list:
i = 0
while i < len(word):
if i == 0 or i == 1:
new_paragraph.add_run(word[i]).bold = True
else:
new_paragraph.add_run(word[i]).bold = False
i += 1
new_paragraph.add_run(' ')
# Input the path to the document that you wish to save to:
new_doc.save('sample_output.docx')
if __name__ == '__main__':
# Input the path to the document containing your text file you wish to read from:
bionify(r'C:\Users\###\Desktop\bionic python reader transformer\BionicTexterizer\sample_input.docx')
I have changed the destination, python package, python version to run it. But every time I get OSError: [Errno 9] Bad file descriptor
Complete tracepack:
Traceback (most recent call last):
File "c:\Users\####\Desktop\bionic python reader transformer\BionicTexterizer\main.py", line 62, in <module>
bionify(r'C:\Users\####\Desktop\bionic python reader transformer\BionicTexterizer\sample_input.docx')
File "c:\Users\####\Desktop\bionic python reader transformer\BionicTexterizer\main.py", line 57, in bionify
new_doc.save('sample_output.docx')
File "C:\Python310\lib\site-packages\docx\document.py", line 135, in save
self._part.save(path_or_stream)
File "C:\Python310\lib\site-packages\docx\parts\document.py", line 111, in save
self.package.save(path_or_stream)
File "C:\Python310\lib\site-packages\docx\opc\package.py", line 172, in save
PackageWriter.write(pkg_file, self.rels, self.parts)
File "C:\Python310\lib\site-packages\docx\opc\pkgwriter.py", line 33, in write
PackageWriter._write_content_types_stream(phys_writer, parts)
File "C:\Python310\lib\site-packages\docx\opc\pkgwriter.py", line 45, in _write_content_types_stream
phys_writer.write(CONTENT_TYPES_URI, cti.blob)
File "C:\Python310\lib\site-packages\docx\opc\phys_pkg.py", line 155, in write
self._zipf.writestr(pack_uri.membername, blob)
File "C:\Python310\lib\zipfile.py", line 1810, in writestr
with self.open(zinfo, mode='w') as dest:
File "C:\Python310\lib\zipfile.py", line 1176, in close
self._fileobj.seek(self._zinfo.header_offset)
OSError: [Errno 9] Bad file descriptor
Exception ignored in: <function ZipFile.__del__ at 0x0000022D9BF4BEB0>
Traceback (most recent call last):
File "C:\Python310\lib\zipfile.py", line 1815, in __del__
self.close()
File "C:\Python310\lib\zipfile.py", line 1837, in close
self._fpclose(fp)
File "C:\Python310\lib\zipfile.py", line 1937, in _fpclose
fp.close()
Windows 11. It is a problem with windows 11. I have ran the code without any problems on windows 10. There seems to some package permission issues.
Related
I have a couple of excel files I want to merge into one.
I need the second column on all the files to be copied into separate columns in a new Microsoft Excel file.
For this, I am using the openpyxl library in a python script.
This is my code:
import os
from openpyxl import load_workbook
def mergeDataFiles():
path = "C:\\Users\\ethan\\Desktop\\Benzoyl Chloride\\Benzoyl Chloride"
# source excel files
origin_files = list()
for path, subdirs, files in os.walk(path):
for file_index in range(len(files)):
origin_files.append(files[file_index])
# destination excel file
destination_file = path + ".xlsx"
destination_workbook = load_workbook(destination_file)
destination_sheet = destination_workbook["Sheet1"]
# copy data from source files to destination file
for origin_file_index in range(1, len(origin_files)):
origin_workbook = load_workbook(path + "\\" + origin_files[origin_file_index - 1])
origin_sheet = origin_workbook['Data']
destination_sheet.cell(row=1, column=origin_file_index).value = origin_files[origin_file_index - 1]
for i in range(1, 500):
# read cell value from source excel file
data = origin_sheet.cell(row=i, column=2)
# write the value to destination excel file
destination_sheet.cell(row=i + 1, column=origin_file_index).value = data.value
# saving the destination excel file
destination_workbook.save(destination_file)
if __name__ == "__main__":
mergeDataFiles()
When I run the code, I get an error on the last line in the function: OSError: [Errno 9] Bad file descriptor.
Full traceback:
C:\Users\ethan\.venv\Scripts\python.exe "C:/Users/ethan/Coding/Python/Copy Excel Data/main.py"
Traceback (most recent call last):
File "C:\Users\ethan\Coding\Python\Copy Excel Data\main.py", line 32, in <module>
mergeDataFiles()
File "C:\Users\ethan\Coding\Python\Copy Excel Data\main.py", line 28, in mergeDataFiles
destination_workbook.save(destination_file)
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\workbook\workbook.py", line 407, in save
save_workbook(self, filename)
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\Users\ethan\.venv\Lib\site-packages\openpyxl\writer\excel.py", line 67, in write_data
archive.writestr(ARC_APP, tostring(props.to_tree()))
File "C:\Program Files\Python311\Lib\zipfile.py", line 1830, in writestr
with self.open(zinfo, mode='w') as dest:
File "C:\Program Files\Python311\Lib\zipfile.py", line 1204, in close
self._fileobj.seek(self._zinfo.header_offset)
OSError: [Errno 9] Bad file descriptor
Exception ignored in: <function ZipFile.__del__ at 0x000001D101443D80>
Traceback (most recent call last):
File "C:\Program Files\Python311\Lib\zipfile.py", line 1870, in __del__
self.close()
File "C:\Program Files\Python311\Lib\zipfile.py", line 1892, in close
self._fpclose(fp)
File "C:\Program Files\Python311\Lib\zipfile.py", line 1992, in _fpclose
fp.close()
OSError: [Errno 9] Bad file descriptor
Process finished with exit code 1
I have tried changing the file names and locations, having the destination file open and closed, scouring the internet for solutions and at this point I'm not sure what else I can try.
I am running the code on Windows 10 22H2, with an intel i5 cpu.
Please assist me with this issue, if you know how to solve it.
I'm making a console based game that saves input history, to help with debugging I created a function that will automatically input commands on start
def __readfile (self) -> None:
lines = None
with open("insts.txt", "r") as f:
lines = f.read().split("\n")
if (lines == None):
print("attempted to read insts.txt for instructions, could not find file")
return
self.__initfile = True
for line in lines:
self.parse_input(line)
self.__initfile = False
in the instance of the error "self.parse_input" ultimately leads to "readline.read_history_file" and none of the code in between the two has any effect on the error
but it gives this error message:
Traceback (most recent call last):
File "main.py", line 9, in <module>
game.start()
File "/Users/tristans/Documents/GitHub/console-rpg/classes.py", line 1353, in start
self.__readfile()
File "/Users/tristans/Documents/GitHub/console-rpg/classes.py", line 1345, in __readfile
self.parse_input(line)
File "/Users/tristans/Documents/GitHub/console-rpg/classes.py", line 1223, in parse_input
self._parse_dialog(text)
File "/Users/tristans/Documents/GitHub/console-rpg/classes.py", line 1128, in _parse_dialog
self._parse_dialog("leave")
File "/Users/tristans/Documents/GitHub/console-rpg/classes.py", line 1101, in _parse_dialog
self._load_hist_scope()
File "/Users/tristans/Documents/GitHub/console-rpg/classes.py", line 1331, in _load_hist_scope
readline.read_history_file("history.txt")
PermissionError: [Errno 1] Operation not permitted
I have looked everywhere for an answer to where this error is coming from and can't find one
I've tried changing the file read operation from the "with open("insts.txt")" to a hardcoded list but that didn't work, os.access("history.txt", os.R_OK) also returns true when called just before "readline.read_history_file"
It appears to be a Mac-specific issue with readline. According to this answer, you need to use gnureadline on the Mac, rather than readline.
import gnureadline as readline
I'm having difficulty getting the character encoding of a file. The offending code is here:
rawdata = open(file, "r").read()
encoding = chardet.detect(rawdata.encode())['encoding']
#return encoding
(Code courtesy of Ashish Greycube: https://github.com/frappe/frappe/pull/8061
I've copied a segment of the csv file I'm working on as a more manageable 'test' file. When I run the above code on it, it says it's 'ascii'. That might be part of the problem. Basically, I've found out that I need to know the encoding type for this prpogram.
The error report is as follows:
Traceback (most recent call last):
File ".\program.py", line 26, in <module>
my_encoding = get_file_encoding(data)
File ".\program.py", line 20, in get_file_encoding
encoding = chardet.detect(rawdata.encode())['encoding']
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\__init__.py", line 38, in detect
detector.feed(byte_str)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\universaldetector.py", line 211, in feed
if prober.feed(byte_str) == ProbingState.FOUND_IT:
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\charsetgroupprober.py", line 71, in feed
state = prober.feed(byte_str)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\hebrewprober.py", line 227, in feed
byte_str = self.filter_high_byte_only(byte_str)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\charsetprober.py", line 63, in filter_high_byte_only
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\re.py", line 208, in sub
return _compile(pattern, flags).sub(repl, string, count)
MemoryError
PS C:\Users\stsho\dev\csv_sanitizer_1.2> python .\program.py
Please enter filename: ANQAR
Traceback (most recent call last):
File ".\program.py", line 26, in <module>
my_encoding = get_file_encoding(data)
File ".\program.py", line 19, in get_file_encoding
rawdata = open(file, "r").read()
FileNotFoundError: [Errno 2] No such file or directory: 'ANQAR.csv'
PS C:\Users\stsho\dev\csv_sanitizer_1.2> python .\program.py
Please enter filename: ANQAR
Traceback (most recent call last):
File ".\program.py", line 26, in <module>
my_encoding = get_file_encoding(data)
File ".\program.py", line 19, in get_file_encoding
rawdata = open(file, "r").read()
FileNotFoundError: [Errno 2] No such file or directory: 'ANQAR.csv'
PS C:\Users\stsho\dev\csv_sanitizer_1.2> python .\program.py
Please enter filename: ANQAR
Traceback (most recent call last):
File ".\program.py", line 26, in <module>
my_encoding = get_file_encoding(data)
File ".\program.py", line 20, in get_file_encoding
encoding = chardet.detect(rawdata.encode())['encoding']
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\__init__.py", line 38, in detect
detector.feed(byte_str)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\universaldetector.py", line 211, in feed
if prober.feed(byte_str) == ProbingState.FOUND_IT:
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\charsetgroupprober.py", line 71, in feed
state = prober.feed(byte_str)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\hebrewprober.py", line 227, in feed
byte_str = self.filter_high_byte_only(byte_str)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\site-packages\chardet\charsetprober.py", line 63, in filter_high_byte_only
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
File "C:\Users\stsho\AppData\Local\Programs\Python\Python38-32\lib\re.py", line 208, in sub
return _compile(pattern, flags).sub(repl, string, count)
MemoryError
A MemoryError usually implies you're trying to load data too large for your memory, either the address space or available storage (RAM + swap/page file space). You seem to be running a 32 bit build of Python, which would limit you to 2 GB of address space; I'd suggest switching to a 64 bit build, as most machines nowadays have more than 4 GB of RAM, and not using a 64 bit build means you can't use most of it.
Additional issue: When you read the file in text mode, you're already assuming you know the encoding. Don't do that. Open it in binary mode ("rb") to get the raw, unmodified bytes, so chardet gets them directly before you try decoding them in a possibly incorrect encoding.
This works:
import chardet
rawdata = open(file, "rb").read()
encoding = chardet.detect(rawdata)['encoding']
like #ShadowRanger said try to build it in 64bit and don't read file in text mode try this
enter co rawdata = open(file, "rb").read()
encoding = chardet.detect(rawdata.encode())['encoding']
and make sure your file is present and write its name correctly.
I am trying to read a password protected word document on Python using zipfile.
The following code works with a non-password protected document, but gives an error when used with a password protected file.
try:
from xml.etree.cElementTree import XML
except ImportError:
from xml.etree.ElementTree import XML
import zipfile
psw = "1234"
WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
PARA = WORD_NAMESPACE + 'p'
TEXT = WORD_NAMESPACE + 't'
def get_docx_text(path):
document = zipfile.ZipFile(path, "r")
document.setpassword(psw)
document.extractall()
xml_content = document.read('word/document.xml')
document.close()
tree = XML(xml_content)
paragraphs = []
for paragraph in tree.getiterator(PARA):
texts = [node.text
for node in paragraph.getiterator(TEXT)
if node.text]
if texts:
paragraphs.append(''.join(texts))
return '\n\n'.join(paragraphs)
When running get_docx_text() with a password protected file, I received the following error:
Traceback (most recent call last):
File "<ipython-input-15-d2783899bfe5>", line 1, in <module>
runfile('/Users/username/Workspace/Python/docx2txt.py', wdir='/Users/username/Workspace/Python')
File "/Applications/Spyder-Py2.app/Contents/Resources/lib/python2.7/spyderlib/widgets/externalshell/sitecustomize.py", line 680, in runfile
execfile(filename, namespace)
File "/Applications/Spyder-Py2.app/Contents/Resources/lib/python2.7/spyderlib/widgets/externalshell/sitecustomize.py", line 78, in execfile
builtins.execfile(filename, *where)
File "/Users/username/Workspace/Python/docx2txt.py", line 41, in <module>
x = get_docx_text("/Users/username/Desktop/file.docx")
File "/Users/username/Workspace/Python/docx2txt.py", line 23, in get_docx_text
document = zipfile.ZipFile(path, "r")
File "zipfile.pyc", line 770, in __init__
File "zipfile.pyc", line 811, in _RealGetContents
BadZipfile: File is not a zip file
Does anyone have any advice to get this code to work?
I don't think this is an encryption problem, for two reasons:
Decryption is not attempted when the ZipFile object is created. Methods like ZipFile.extractall, extract, and open, and read take an optional pwd parameter containing the password, but the object constructor / initializer does not.
Your stack trace indicates that the BadZipFile is being raised when you create the ZipFile object, before you call setpassword:
document = zipfile.ZipFile(path, "r")
I'd look carefully for other differences between the two files you're testing: ownership, permissions, security context (if you have that on your OS), ... even filename differences can cause a framework to "not see" the file you're working on.
Also --- the obvious one --- try opening the encrypted zip file with your zip-compatible command of choice. See if it really is a zip file.
I tested this by opening an encrypted zip file in Python 3.1, while "forgetting" to provide a password. I could create the ZipFile object (the variable zfile below) without any error, but got a RuntimeError --- not a BadZipFile exception --- when I tried to read a file without providing a password:
Traceback (most recent call last):
File "./zf.py", line 35, in <module>
main()
File "./zf.py", line 29, in main
print_checksums(zipfile_name)
File "./zf.py", line 22, in print_checksums
for checksum in checksum_contents(zipfile_name):
File "./zf.py", line 13, in checksum_contents
inner_file = zfile.open(inner_filename, "r")
File "/usr/lib64/python3.1/zipfile.py", line 903, in open
"password required for extraction" % name)
RuntimeError: File apache.log is encrypted, password required for extraction
I was also able to raise a BadZipfile exception, once by trying to open an empty file and once by trying to open some random logfile text that I'd renamed to a ".zip" extension. The two test files produced identical stack traces, down to the line numbers.
Traceback (most recent call last):
File "./zf.py", line 35, in <module>
main()
File "./zf.py", line 29, in main
print_checksums(zipfile_name)
File "./zf.py", line 22, in print_checksums
for checksum in checksum_contents(zipfile_name):
File "./zf.py", line 10, in checksum_contents
zfile = zipfile.ZipFile(zipfile_name, "r")
File "/usr/lib64/python3.1/zipfile.py", line 706, in __init__
self._GetContents()
File "/usr/lib64/python3.1/zipfile.py", line 726, in _GetContents
self._RealGetContents()
File "/usr/lib64/python3.1/zipfile.py", line 738, in _RealGetContents
raise BadZipfile("File is not a zip file")
zipfile.BadZipfile: File is not a zip file
While this stack trace isn't exactly the same as yours --- mine has a call to _GetContents, and the pre-3.2 "small f" spelling of BadZipfile --- but they're close enough that I think this is the kind of problem you're dealing with.
i have successfully imported pydub
but for the code:
from pydub import AudioSegment
song = AudioSegment.from_mp3("c:\mks.mp3")
first_ten_seconds = song[:10000]
song.export("d:\mks.mp3", format="mp3")
But it gives the following error:
python "C:\Users\mKs\Desktop\mks2.py"
Process started >>>
Traceback (most recent call last):
File "C:\Users\mKs\Desktop\mks2.py", line 2, in <module>
song=AudioSegment.from_mp3("c:\mks.mp3");
File "C:\Python27\lib\site-packages\pydub-0.5.2-py2.7.egg\pydub\audio_segment.py", line 194, in from_mp3
return cls.from_file(file, 'mp3')
File "C:\Python27\lib\site-packages\pydub-0.5.2-py2.7.egg\pydub\audio_segment.py", line 189, in from_file
return cls.from_wav(output)
File "C:\Python27\lib\site-packages\pydub-0.5.2-py2.7.egg\pydub\audio_segment.py", line 206, in from_wav
return cls(data=file)
File "C:\Python27\lib\site-packages\pydub-0.5.2-py2.7.egg\pydub\audio_segment.py", line 33, in __init__
raw = wave.open(StringIO(data), 'rb')
File "C:\Python27\lib\wave.py", line 498, in open
return Wave_read(f)
File "C:\Python27\lib\wave.py", line 163, in __init__
self.initfp(f)
File "C:\Python27\lib\wave.py", line 128, in initfp
self._file = Chunk(file, bigendian = 0)
File "C:\Python27\lib\chunk.py", line 63, in __init__
raise EOFError
EOFError
I would love to get help on this topic
The only issue that I see with your code is trailing ";" at the end of last 3 line. Please remove those, and see if you still get the error.
In addition, make sure you have ffmpeg (http://www.ffmpeg.org/) installed. It is required for the support of all of the none wav file formats.
ADDED:
I think you have broken module dependencies in your python installation.
I have tried code that you provided above with python 2.7.2. It worked fine for me:
>>> from pydub import AudioSegment
>>> song = AudioSegment.from_wav('goodbye.wav')
>>> first_ten_seconds = song[:10000]
>>> song.export('goodbye1.wav',format='wav')
<open file 'goodbye1.wav', mode 'wb+' at 0x10cf2b270>