Decrypt PDF with for loop - python

I am trying to run a for loop that scans lines of a txt and attempts to decrypt a PDF on every line. When it hits the right word, it should break and stop the loop. Here is my code. Please help.
import PyPDF2
dictionary = open('C:\\Users\\Admin\\Desktop\\Final Supporting Files\\dictionary.txt')
readDictionary = dictionary.readlines()
pdfReader = PyPDF2.PdfFileReader(
open('C:\\Users\\Admin\\Desktop\\Final Supporting Files\\encryptedhackme.pdf', 'rb'))
for line in readDictionary:
print(f'Do not stop the program, testing {line}')
if pdfReader.decrypt(line) == 1:
print(pdfReader.getPage(0))
break
elif pdfReader.decrypt(line.lower()) == 1:
print(pdfReader.getPage(0))
break
elif pdfReader.decrypt(line.title()) == 1:
print(pdfReader.getPage(0))
break
When I run this code, it is giving me an error that the line variable is an ordinal not in range. It returns an error message at the first line and I truly do not know why it is not working. I double checked and there doesn't seem to be any problem with the dictionary.txt file, it is many lines of words in plain text. Is there any insight as to why it isn't working?
Here is the error message
Traceback (most recent call last):
File "C:\Users\Admin\Desktop\Pycharm Projects\Final\PDF_Hacker.py", line 44, in <module>
main()
File "C:\Users\Admin\Desktop\Pycharm Projects\Final\PDF_Hacker.py", line 5, in main
hack()
File "C:\Users\Admin\Desktop\Pycharm Projects\Final\PDF_Hacker.py", line 31, in hack
if pdfReader.decrypt(key) == 1:
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\PyPDF2\pdf.py", line 1987, in decrypt
return self._decrypt(password)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\PyPDF2\pdf.py", line 2017, in _decrypt
val = utils.RC4_encrypt(new_key, val)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\PyPDF2\utils.py", line 181, in RC4_encrypt
retval += b_(chr(ord_(plaintext[x]) ^ t))
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\site-packages\PyPDF2\utils.py", line 238, in b_
r = s.encode('latin-1')
UnicodeEncodeError: 'latin-1' codec can't encode character '\u014a' in position 0: ordinal not in range(256)

Related

Beginner trying to figure out error and sorting word count

I have to be able to read a text file and count the number of times the words in a line occur. Plus I have to be able to sort the words from most to least occurring. My code so far is below and I keep getting this error:
Traceback (most recent call last):
File "/Users/lritter/Documents/wordcount.py", line 9, in <module>
lines = file.readlines()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x89 in position 7927: invalid start byte
Code:
import os
count = {}
os.chdir('/Users/lritter/Desktop/Python')
item = int(input('Which line would you like to evaluate? '))
print('You entered: ', item)
with open('Obama_speech.txt') as file:
lines = file.readlines()
message = (lines[(item)])
message2 = message.split
for word in message2():
if len(word) >= 5:
count[word] = count.get(word,0)+1
print(count)

read clm chunk from wav file using python wavfile

i am using the enhanced wavfile.py library, and i want to use it to read serum-style wavetables. i know that these files use a 'clm' block to store cue points, but i am having trouble with reading these using the library
right now i'm just trying to read the file (i'll do something with it later); here is my code:
import wavfile as wf
wf.read('wavetable.wav')
when i run the script, i get this error:
[my dir]/wavfile.py:223: WavFileWarning: Chunk b'clm ' skipped
warnings.warn("Chunk " + str(chunk_id) + " skipped", WavFileWarning)
[my dir]/wavfile.py:223: WavFileWarning: Chunk b'' skipped
warnings.warn("Chunk " + str(chunk_id) + " skipped", WavFileWarning)
Traceback (most recent call last):
File "[my dir]/./test.py", line 5, in <module>
wf.read('wavetable.wav')
File "[my dir]/wavfile.py", line 228, in read
_skip_unknown_chunk(fid)
File "[my dir]/wavfile.py", line 112, in _skip_unknown_chunk
size = struct.unpack('<i', data)[0]
struct.error: unpack requires a buffer of 4 bytes
is it even possible to do this using the library? if not, how could i modify the library to make this work?
bear with me, i'm new to working with files and python in general
UPDATE:
here's the output after i add madison courto's code:
Traceback (most recent call last):
File "[my dir]/./test.py", line 5, in <module>
wf.debug('wavetable.wav')
File "[my dir]/wavfile.py", line 419, in debug
format_str = format.decode("utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 1: invalid start byte
and here is the wavetable i'm testing; hopefully sndup left it intact
Adding these conditions to the read function returns a dict of markers, it seems that one of the markers is currupt so I added except pass, it's a bit janky but works.
elif chunk_id == b'':
break
elif chunk_id == b'clm ':
str1 = fid.read(8)
size, numcue = struct.unpack('<ii', str1)
for c in range(numcue):
try:
str1 = fid.read(24)
idx, position, datachunkid, chunkstart, blockstart, sampleoffset = struct.unpack(
'<iiiiii', str1)
# _cue.append(position)
_markersdict[idx][
'position'] = position # needed to match labels and markers
except:
pass

Python 2: ASCII issue when writing in Excel files

Problem sketch:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)
I'm trying to write a simple python program that can auto-complete the blank units with data appeared in the same column above.
Since there're Chinese characters in the file, I've thought of the issue of ASCII, so I tried to change it into UTF-8.
Codes shown below:
#!/usr/bin/python
# -*- coding:utf-8 -*-
from xlrd import open_workbook
from xlwt import Workbook
from xlutils.copy import copy
rb = open_workbook('data.xls', 'utf-8')
wb = copy(rb)
sh = wb.get_sheet(0)
s = rb.sheet_by_index(0)
cols = s.ncols
rows = s.nrows
temp = 0
for cx in range(cols):
for rx in range(rows):
if s.cell_value(rowx = rx, colx = cx).encode('utf-8') != "":
temp = s.cell_value(rowx = rx, colx = cx).encode('utf-8')
print(temp) #to verify
else:
sh.write(rx, cx, temp)
wb.save('data.xls')
However, the issue still happened. Result in terminal:
ZishengdeMacBook-Pro:Downloads zisheng$ python form.py
(printed result ignored, and it looked good)
Traceback (most recent call last):
File "form.py", line 41, in <module>
wb.save('data.xls')
File "/Users/zisheng/anaconda/lib/python2.7/site-packages/xlwt/Workbook.py", line 710, in save
doc.save(filename_or_stream, self.get_biff_data())
File "/Users/zisheng/anaconda/lib/python2.7/site-packages/xlwt/Workbook.py", line 674, in get_biff_data
shared_str_table = self.__sst_rec()
File "/Users/zisheng/anaconda/lib/python2.7/site-packages/xlwt/Workbook.py", line 636, in __sst_rec
return self.__sst.get_biff_record()
File "/Users/zisheng/anaconda/lib/python2.7/site-packages/xlwt/BIFFRecords.py", line 77, in get_biff_record
self._add_to_sst(s)
File "/Users/zisheng/anaconda/lib/python2.7/site-packages/xlwt/BIFFRecords.py", line 92, in _add_to_sst
u_str = upack2(s, self.encoding)
File "/Users/zisheng/anaconda/lib/python2.7/site-packages/xlwt/UnicodeUtils.py", line 50, in upack2
us = unicode(s, encoding)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)
Anyone can help? Thanks in advance!
I've figured it out!
To solve this, we can add UTF-8 notation in the writing process:
sh.write(rx, cx, unicode(temp, 'utf-8'))
And it's done.
Problem solved.
To solve this, we can add UTF-8 notation in the writing process:
sh.write(rx, cx, unicode(temp, 'utf-8'))

How to read a text file with special characters in python

I am trying to read txt file with special characters like:
الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ
I'm using:
import fileinput
fileToSearch = "test_encoding.txt"
with open(fileToSearch, 'r', encoding='utf-8') as file:
counter = 0;
for line in file:
print(line)
But Python crashes with this message:
Traceback (most recent call last):
File "test.py", line 9, in <module>
print(line)
File "C:\Users\atheelm\AppData\Local\Programs\Python\Python35-
32\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-1:
character maps to <undefined>
I have Python 3.5.1 and I'm using Windows.
I'm running this command:
py test.py > out.txt
use 2 diff files and use io:
lines=["Init"]
with io.open(fileToSearch,'r',encoding='utf-8') as file:
counter = 1;
for line in file:
lines.insert(counter,str(line))
counter = counter+1
with io.open(out_file,'w',encoding='utf-8') as file:
for item in lines:
file.write("%s\n" % item)

Replace given line in files in Python

I have several files, and I need to replace third line in them:
files = ['file1.txt', 'file2.txt']
new_3rd_line = 'new third line'
What is the best way to do this?
Files are big enough, several 100mb's files.
I used this solution: Search and replace a line in a file in Python
from tempfile import mkstemp
from shutil import move
from os import remove, close
def replace_3_line(file):
new_3rd_line = 'new_3_line\n'
#Create temp file
fh, abs_path = mkstemp()
new_file = open(abs_path,'w')
old_file = open(file)
counter = 0
for line in old_file:
counter = counter + 1
if counter == 3:
new_file.write(new_3rd_line)
else:
new_file.write(line)
#close temp file
new_file.close()
close(fh)
old_file.close()
#Remove original file
remove(file)
#Move new file
move(abs_path, file)
replace_3_line('tmp.ann')
But it does not work with files that contains non English charecters.
Traceback (most recent call last):
File "D:\xxx\replace.py", line 27, in <module>
replace_3_line('tmp.ann')
File "D:\xxx\replace.py", line 12, in replace_3_line
for line in old_file:
File "C:\Python31\lib\encodings\cp1251.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x98 in position 32: character maps to <undefined>
That is bad. Where's python unicode? (file is utf8, python3).
File is:
фвыафыв
sdadf
试试
阿斯达а
阿斯顿飞

Categories

Resources