For a class assignment, I'm supposed to grab the contents of a file, compute the MD5 hash and store it in a separate file. Then I'm supposed to be able to check the integrity by comparing the MD5 hash. I'm relatively new to Python and JSON, so I thought I'd try to tackle those things with this assignment as opposed to going with something I already know.
Anyway, my program reads from a file, creates a hash, and stores that hash into a JSON file just fine. The problem comes in with my integrity checking. When I return the results of the computed hash of the file, it's different from what is recorded in the JSON file even though no changes have been made to the file. Below is an example of what is happening and I pasted my code as well. Thanks in advance for the help.
For example: These are the contents of my JSON file
Content: b'I made a file to test the md5\n'
digest: 1e8f4e6598be2ea2516102de54e7e48e
This is what is returned when I try to check the integrity of the exact same file (no changes made to it):
Content: b'I made a file to test the md5\n'
digest: ef8b7bf2986f59f8a51aae6b496e8954
import hashlib
import json
import os
import fnmatch
from codecs import open
#opens the file, reads/encodes it, and returns the contents (c)
def read_the_file(f_location):
with open(f_location, 'r', encoding="utf-8") as f:
c = f.read()
f.close()
return c
def scan_hash_json(directory_content):
for f in directory_content:
location = argument + "/" + f
content = read_the_file(location)
comp_hash = create_hash(content)
json_obj = {"Directory": argument, "Contents": {"filename": str(f),
"original string": str(content), "md5": str(comp_hash)}}
location = location.replace(argument, "")
location = location.replace(".txt", "")
write_to_json(location, json_obj)
#scans the file, creates the hash, and writes it to a json file
def read_the_json(f):
f_location = "recorded" + "/" + f
read_json = open(f_location, "r")
json_obj = json.load(read_json)
read_json.close()
return json_obj
#check integrity of the file
def check_integrity(d_content):
#d_content = directory content
for f in d_content:
json_obj = read_the_json(f)
text = f.replace(".json", ".txt")
result = find(text, os.getcwd())
content = read_the_file(result)
comp_hash = create_hash(content)
print("content: " + str(content))
print(result)
print(json_obj)
print()
print("Json Obj: " + json_obj['Contents']['md5'])
print("Hash: " + comp_hash)
#find the file being searched for
def find(pattern, path):
result = ""
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result = os.path.join(root, name)
return result
#create a hash for the file contents being passed in
def create_hash(content):
h = hashlib.md5()
key_before = "reallyBad".encode('utf-8')
key_after = "hashKeyAlgorithm".encode('utf-8')
content = content.encode('utf-8')
h.update(key_before)
h.update(content)
h.update(key_after)
return h.hexdigest()
#write the MD5 hash to the json file
def write_to_json(arg, json_obj):
arg = arg.replace(".txt", ".json")
storage_location = "recorded/" + str(arg)
write_file = open(storage_location, "w")
json.dump(json_obj, write_file, indent=4, sort_keys=True)
write_file.close()
#variable to hold status of user (whether they are done or not)
working = 1
#while the user is not done, continue running the program
while working == 1:
print("Please input a command. For help type 'help'. To exit type 'exit'")
#grab input from user, divide it into words, and grab the command/option/argument
request = input()
request = request.split()
if len(request) == 1:
command = request[0]
elif len(request) == 2:
command = request[0]
option = request[1]
elif len(request) == 3:
command = request[0]
option = request[1]
argument = request[2]
else:
print("I'm sorry that is not a valid request.\n")
continue
#if user inputs command 'icheck'...
if command == 'icheck':
if option == '-l':
if argument == "":
print("For option -l, please input a directory name.")
continue
try:
dirContents = os.listdir(argument)
scan_hash_json(dirContents)
except OSError:
print("Directory not found. Make sure the directory name is correct or try a different directory.")
elif option == '-f':
if argument == "":
print("For option -f, please input a file name.")
continue
try:
contents = read_the_file(argument)
computedHash = create_hash(contents)
jsonObj = {"Directory": "Default", "Contents": {
"filename": str(argument), "original string": str(contents), "md5": str(computedHash)}}
write_to_json(argument, jsonObj)
except OSError:
print("File not found. Make sure the file name is correct or try a different file.")
elif option == '-t':
try:
dirContents = os.listdir("recorded")
check_integrity(dirContents)
except OSError:
print("File not found. Make sure the file name is correct or try a different file.")
elif option == '-u':
print("gonna update stuff")
elif option == '-r':
print("gonna remove stuff")
#if user inputs command 'help'...
elif command == 'help':
#display help screen
print("Integrity Checker has a few options you can use. Each option "
"must begin with the command 'icheck'. The options are as follows:")
print("\t-l <directory>: Reads the list of files in the directory and computes the md5 for each one")
print("\t-f <file>: Reads a specific file and computes its md5")
print("\t-t: Tests integrity of the files with recorded md5s")
print("\t-u <file>: Update a file that you have modified after its integrity has been checked")
print("\t-r <file>: Removes a file from the recorded md5s\n")
#if user inputs command 'exit'
elif command == 'exit':
#set working to zero and exit program loop
working = 0
#if anything other than 'icheck', 'help', and 'exit' are input...
else:
#display error message and start over
print("I'm sorry that is not a valid command.\n")
Where are you defining h, the md5 object being used in this method?
#create a hash for the file contents being passed in
def create_hash(content):
key_before = "reallyBad".encode('utf-8')
key_after = "hashKeyAlgorithm".encode('utf-8')
print("Content: " + str(content))
h.update(key_before)
h.update(content)
h.update(key_after)
print("digest: " + str(h.hexdigest()))
return h.hexdigest()
My suspicion is that you're calling create_hash twice, but using the same md5 object in both calls. That means the second time you call it, you're really hashing "reallyBad*file contents*hashkeyAlgorithmreallyBad*file contents*hashKeyAlgorithm". You should create a new md5 object inside of create_hash to avoid this.
Edit: Here is how your program runs for me after making this change:
Please input a command. For help type 'help'. To exit type 'exit'
icheck -f ok.txt Content: this is a test
digest: 1f0d0fd698dfce7ce140df0b41ec3729 Please input a command. For
help type 'help'. To exit type 'exit' icheck -t Content: this is a
test
digest: 1f0d0fd698dfce7ce140df0b41ec3729 Please input a command. For
help type 'help'. To exit type 'exit'
Edit #2:
Your scan_hash_json function also has a bug at the end of it. You're removing the .txt suffix from the file, and calling write_to_json:
def scan_hash_json(directory_content):
...
location = location.replace(".txt", "")
write_to_json(location, json_obj)
However, write_to_json is expecting the file to end in .txt:
def write_to_json(arg, json_obj):
arg = arg.replace(".txt", ".json")
If you fix that, I think it should do everything as expected...
I see 2 possible problems you are facing:
for hash computation is computing from a binary representation of a string
unless you work only with ASCII encoding, the same international character e.g. č has different representations in the UTF-8 or Unicode encoding.
To consider:
If you need UTF-8 or Unicode, normalize first your content before you save it or calculate a hash
For testing purposes compare content binary representation.
use UTF-8 only for IO operations, codecs.open does all conversion
for you
from codecs import open
with open('yourfile', 'r', encoding="utf-8") as f:
decoded_content = f.read()
Related
I can't figure out how to write the user input to an existing file. The file already contains a series of letters and is called corpus.txt . I want to take the user input and add it to the file , save and close the loop.
This is the code I have :
if user_input == "q":
def write_corpus_to_file(mycorpus,myfile):
fd = open(myfile,"w")
input = raw_input("user input")
fd.write(input)
print "Writing corpus to file: ", myfile
print "Goodbye"
break
Any suggestions?
The user info code is :
def segment_sequence(corpus, letter1, letter2, letter3):
one_to_two = corpus.count(letter1+letter2)/corpus.count(letter1)
two_to_three = corpus.count(letter2+letter3)/corpus.count(letter2)
print "Here is the proposed word boundary given the training corpus:"
if one_to_two < two_to_three:
print "The proposed end of one word: %r " % target[0]
print "The proposed beginning of the new word: %r" % (target[1] + target[2])
else:
print "The proposed end of one word: %r " % (target[0] + target[1])
print "The proposed beginning of the new word: %r" % target[2]
I also tried this :
f = open(myfile, 'w')
mycorpus = ''.join(corpus)
f.write(mycorpus)
f.close()
Because I want the user input to be added to the file and not deleting what is already there, but nothing works.
Please help!
Open the file in append mode by using "a" as the mode.
For example:
f = open("path", "a")
Then write to the file and the text should be appended to the end of the file.
That code example works for me:
#!/usr/bin/env python
def write_corpus_to_file(mycorpus, myfile):
with open(myfile, "a") as dstFile:
dstFile.write(mycorpus)
write_corpus_to_file("test", "./test.tmp")
The "with open as" is a convenient way in python to open a file, do something with it while within the block defined by the "with" and let Python handles the rest once you exit it (like, for example, closing the file).
If you want to write the input from the user, you can replace mycorpus with your input (I am not too sure what you want to do from your code snippets).
Note that no carriage return is added by the write method. You probably want to append a "\n" at the end :-)
problem
Doing a while loop to validate file extension. If a file extension is not .exe or .bat, ask user input again. I am looking for a solution without using import endswith break functions.
code
format = " "
while file[:-4] != ".bat" and file[:-4] != ".exe":
format = input("Enter file you like to open: ")
if format[:-4] == ".bat" or format[:-4] == ".exe":
callFunction(format)
else:
file = input("Enter file you like to open: ")
To follow Asking the user for input until they give a valid response and using os.path.splitext() to extract the file extension:
import os
ALLOWED_EXTENSTIONS = {".bat", ".exe"}
while True:
filename = input("Enter file you like to open: ")
extension = os.path.splitext(filename)[1]
if extension in ALLOWED_EXTENSTIONS:
break
with open(filename) as f:
# do smth with f
Without break:
import os
ALLOWED_EXTENSTIONS = {".bat", ".exe"}
extension = None
while extension not in ALLOWED_EXTENSTIONS:
filename = input("Enter file you like to open: ")
extension = os.path.splitext(filename)[1]
with open(filename) as f:
# do smth with f
Without break and without any imports:
ALLOWED_EXTENSTIONS = (".bat", ".exe")
filename = ""
while not filename.endswith(ALLOWED_EXTENSTIONS):
filename = input("Enter file you like to open: ")
with open(filename) as f:
# do smth with f
Without break and without any imports and without endswith():
ALLOWED_EXTENSTIONS = {"bat", "exe"}
filename = ""
while filename.rsplit(".",1)[-1] not in ALLOWED_EXTENSTIONS:
filename = input("Enter file you like to open: ")
with open(filename) as f:
# do smth with f
You don't need a loop
def ask_exe(prompt='Executable file name? '):
name = input(prompt)
if name[-4:] in {'.exe', '.bat'}: return name
return ask_exe(prompt='The name has to end in ".exe" or ".bat", please retry: ')
[no breaks, no imports, almost no code...]
As noted by ShadowRanger my code, that uses set notation for the membership test, is suboptimal for Python versions prior to 3.2. For these older versions using a tuple avoids computing the set at runtime, each and every time the function is executed.
...
# for python < 3.2
if name[-4:] in ('.exe', '.bat'): return name
...
I've never used Python and have copied some script (with permission) from someone online, so I'm not sure why the code is dropping. I'm hoping someone can understand it and put it right for me!
from os import walk
from os.path import join
#First some options here.
!RootDir = "C:\\Users\\***\\Documents\\GoGames"
!OutputFile = "C:\\Users\\***\\Documents\\GoGames\\protable.csv"
Properties = !!['pb', 'pw', 'br', 'wr', 'dt', 'ev', 're']
print """
SGF Database Maker
==================
Use this program to create a CSV file with sgf info.
"""
def getInfo(filename):
"""Read out file info here and return a dictionary with all the
properties needed."""
result = !![]
file = open(filename, 'r')
data = file.read(1024) read at most 1kb since we assume all relevant info is in the beginning
file.close()
for prop in Properties:
try:
i = data.lower().index(prop)
except !ValueError:
result.append((prop, ''))
continue
try:
value = data![data.index('![', i)+1 : data.index(']', i)]
except !ValueError:
value = ''
result.append((prop, value))
return dict(result)
!ProgressCounter = 0
file = open(!OutputFile, "w")
file.write('^Filename^;^PB^;^BR^;^PW^;^WR^;^RE^;^EV^;^DT^\n')
for root, dirs, files in walk(!RootDir):
for name in files:
if name![-3:].lower() != "sgf":
continue
info = getInfo(join(root, name))
file.write('^'+join(root, name)+'^;^'+info!['pb']+'^;^'+info!['br']+'^;^'+info!['pw']+'^;^'+info!['wr']+'^;^'+info!['re']+'^;^'+info!['ev']+'^;^'+info!['dt']+'^\n')
!ProgressCounter += 1
if (!ProgressCounter) % 100 == 0:
print str(!ProgressCounter) + " games processed."
file.close()
print "A total of " + str(!ProgressCounter) + " have been processed."
Using Netbeans IDE I get the following error:
!RootDir = "C:\\Users\\***\\Documents\\GoGames"
^
SyntaxError: mismatched input '' expecting EOF
I have previously been able to step through the code as far as file.close(), where I go an error "does not match outer indentation level".
Anyone able to put the syntax of this code right for me?
Remove the exclamation marks in front of variable names, list declarations (!![]) and in except clauses (except !ValueError), this is not valid Python syntax.
I'm trying to read in a list of account numbers, then have my program do a search in the appropriate directory for each account number. I want to capture the information from this search, to then split out the file name, date, and time as the output from my program. Currently I'm receiving this error: TypeError: bufsize must be an integer
Here is my code:
def app_files(level):
piv_list_file = raw_input(r"Please enter the full path of the file containing the Pivot ID's you would like to check: ")
piv_id_list = []
proc_out_list = []
input_dir = ''
try:
with open(piv_list_file, 'rbU') as infile:
for line in infile:
line = line.rstrip('\r\n')
piv_id_list.append(line)
except IOError as e:
print 'Unable to open the account number file: %s' % e.strerror
if level == 'p':
input_dir = '[redacted]'
else:
input_dir = '[redacted]'
subprocess.call('cd', input_dir)
for i, e in enumerate(piv_id_list):
proc_out = subprocess.check_output('ls', '-lh', '*CSV*APP*{0}.zip'.format(e))
proc_out_list.append(proc_out)
print(proc_out)
Your subprocess.check_output() function call is wrong. You should provide the complete command as a list (as the first argument). Example -
subprocess.check_output(['ls', '-lh', '*CSV*APP*{0}.zip'.format(e)])
Similar issue with subprocess.call in your code .
The error occurs at line 49 "fileSizeRemainingInBytes = os.path.getsize(inFile)"
inFile contains the file I want to gets size. From what I understood in the python documentation this should be correct. Can someone tell me what is the problem.
import sys, os
buffer = 1000
try:
#open file in binary mode for reading
inFile = open(sys.argv[1],"rb")
print "file name is: ", inFile.name
except IOError:
#check for IOExceptions
print "Eror opening file"
sys.exit()
else:
#create new directory for copying, create out file in new directory
if (os.path.isdir("recv")):
os.chdir("recv")
try:
outFile = open(inFile.name,"wb")
except IOError:
print "something went wrong creating the out file"
sys.exit()
else :
os.mkdir("recv")
os.chdir("recv")
try:
outFile = open(inFile.name,"wb")
except IOError:
print "something went wrong creating the out file"
sys.exit()
#loop to copy bytes to new directory
fileSizeRemainingInBytes = os.path.getsize(inFile)
print "Initial size: ", fileSizeRemainingInBytes
while fileSizeRemainingInBytes > 0 :
print fileSizeRemainingInBytes
bytesToCopy = inFile.read(buffer);
outFile.write(bytesToCopy);
inFile.close()
os.path.getsize takes a file path as an argument, not a file object. So you actually want to call os.path.getsize(inFile.name). Note that this won't give you the number of bytes remaining to copy; it'll just give you the size of the whole file every time it's evaluated. To get the number of bytes remaining, you'll have to keep track of the total number of bytes read and subtract this total from the file size.
Something like this should work:
import sys
import os
buffer = 1000
with open(sys.argv[1], "rb") as in_file:
# Make your `recv` directory as a sub-directory
# or your current directory if it doesn't already exist
if not os.path.isdir("recv"):
os.mkdir("recv")
# Create the path to the file to which you
# want to copy. When opened, you'll have a file
# with the same file name as your input file,
# but it will be in your `recv` subdirectory
out_file_path = os.path.join("recv", in_file.name)
# Read the bytes
with open(out_file_path, "wb") as out_file:
bytes_read = 0
bytes_to_read = os.path.getsize(in_file.name)
while bytes_read < bytes_to_read:
out_file.write(in_file.read(buffer))
bytes_read += min(buffer, bytes_to_read - bytes_read)
print "{} / {} bytes copied".format(bytes_read, bytes_to_read)