AES 256 Encryption with PyCrypto using CBC mode - any weaknesses? - python

I have the following python script to encrypt/decrypt data using AES 256, could you please tell me if there's anything in the code that may make the encryption weak or if there's anything that I've not taken account of for AES 256 encryption using CBC mode? I've tested the script and it works fine, it is encrypting and decrypting data but just wanted a second opinion. Thanks.
from Crypto.Cipher import AES
from Crypto import Random
BLOCK_SIZE = 32
INTERRUPT = u'\u0001'
PAD = u'\u0000'
def AddPadding(data, interrupt, pad, block_size):
new_data = ''.join([data, interrupt])
new_data_len = len(new_data)
remaining_len = block_size - new_data_len
to_pad_len = remaining_len % block_size
pad_string = pad * to_pad_len
return ''.join([new_data, pad_string])
def StripPadding(data, interrupt, pad):
return data.rstrip(pad).rstrip(interrupt)
SECRET_KEY = Random.new().read(32)
IV = Random.new().read(16)
cipher_for_encryption = AES.new(SECRET_KEY, AES.MODE_CBC, IV)
cipher_for_decryption = AES.new(SECRET_KEY, AES.MODE_CBC, IV)
def EncryptWithAES(encrypt_cipher, plaintext_data):
plaintext_padded = AddPadding(plaintext_data, INTERRUPT, PAD, BLOCK_SIZE)
encrypted = encrypt_cipher.encrypt(plaintext_padded)
return encrypted
def DecryptWithAES(decrypt_cipher, encrypted_data):
decoded_encrypted_data = encrypted_data
decrypted_data = decrypt_cipher.decrypt(decoded_encrypted_data)
return StripPadding(decrypted_data, INTERRUPT, PAD)
our_data_to_encrypt = u'abc11100000'
encrypted_data = EncryptWithAES(cipher_for_encryption, our_data_to_encrypt)
print ('Encrypted string:', encrypted_data)
decrypted_data = DecryptWithAES(cipher_for_decryption, encrypted_data)
print ('Decrypted string:', decrypted_data)

I've seen the code posted on the internet. There are - in principle - not too many things wrong with it, but there is no need to invent your own padding. Furthermore, I don't see why the first padding character is called INTERRUPT. I presume that INTERRUPT and PAD is handled as a single byte (I'm not a Python expert).
The most common padding is PKCS#5 padding. It consists of N bytes with the value of the number of padding bytes. The padding used here looks more like 'ISO' padding, which consists of a single bit set to 1 to distinguish it from the data and other padding bits, and the rest is zero's. That would be code point \u0080 in code.
So the encryption (which can provide confidentiality of data) seems to be used correctly. It depends on the use case if you also need integrity protection and/or authentication, e.g. by using a MAC or HMAC. Of course, no legal guarantees or anything provided.

Related

Creating a encrypting function to encrypt a string in python [duplicate]

Surprisingly difficult to find a straight answer to this on Google.
I'm wanting to collect a piece of text and a message from a user such as 1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc hello world.
Then I want to be able to encrypt/decrypt the message with the text somehow so that I can save it in my database and not worry about the data being exposed if my website gets hacked,
encrypt('1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc', 'hello world')
decrypt('1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc', <encrypted_text>)
Is there a simple way to achieve this with python and please can someone provide/direct me to an example.
Perhaps an example of how to create public/private key pairs using a seed such as '1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc'?
Many thanks in advance :)
EDIT: Just to be clear I'm looking for a way to encrypt my users data in a determanistic way not obfuscate the message.
If that means I have to generate a PGP/GPG pub/pri key pair on the fly by using the text 1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc as a seed then that's fine but what's the method to do this?
Here's how to do it properly in CBC mode, including PKCS#7 padding:
import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random
def encrypt(key, source, encode=True):
key = SHA256.new(key).digest() # use SHA-256 over our key to get a proper-sized AES key
IV = Random.new().read(AES.block_size) # generate IV
encryptor = AES.new(key, AES.MODE_CBC, IV)
padding = AES.block_size - len(source) % AES.block_size # calculate needed padding
source += bytes([padding]) * padding # Python 2.x: source += chr(padding) * padding
data = IV + encryptor.encrypt(source) # store the IV at the beginning and encrypt
return base64.b64encode(data).decode("latin-1") if encode else data
def decrypt(key, source, decode=True):
if decode:
source = base64.b64decode(source.encode("latin-1"))
key = SHA256.new(key).digest() # use SHA-256 over our key to get a proper-sized AES key
IV = source[:AES.block_size] # extract the IV from the beginning
decryptor = AES.new(key, AES.MODE_CBC, IV)
data = decryptor.decrypt(source[AES.block_size:]) # decrypt
padding = data[-1] # pick the padding value from the end; Python 2.x: ord(data[-1])
if data[-padding:] != bytes([padding]) * padding: # Python 2.x: chr(padding) * padding
raise ValueError("Invalid padding...")
return data[:-padding] # remove the padding
It's set to work with bytes data, so if you want to encrypt strings or use string passwords make sure you encode() them with a proper codec before passing them to the methods. If you leave the encode parameter to True the encrypt() output will be base64 encoded string, and decrypt() source should be also base64 string.
Now if you test it as:
my_password = b"secret_AES_key_string_to_encrypt/decrypt_with"
my_data = b"input_string_to_encrypt/decrypt"
print("key: {}".format(my_password))
print("data: {}".format(my_data))
encrypted = encrypt(my_password, my_data)
print("\nenc: {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec: {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))
print("\nSecond round....")
encrypted = encrypt(my_password, my_data)
print("\nenc: {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec: {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))
your output would be similar to:
key: b'secret_AES_key_string_to_encrypt/decrypt_with'
data: b'input_string_to_encrypt/decrypt'
enc: 7roSO+P/4eYdyhCbZmraVfc305g5P8VhDBOUDGrXmHw8h5ISsS3aPTGfsTSqn9f5
dec: b'input_string_to_encrypt/decrypt'
data match: True
Second round....
enc: BQm8FeoPx1H+bztlZJYZH9foI+IKAorCXRsMjbiYQkqLWbGU3NU50OsR+L9Nuqm6
dec: b'input_string_to_encrypt/decrypt'
data match: True
Proving that same key and same data still produce different ciphertext each time.
Now, this is much better than ECB but... if you're going to use this for communication - don't! This is more to explain how it should be constructed, not really to be used in a production environment and especially not for communication as its missing a crucial ingredient - message authentication. Feel free to play with it, but you should not roll your own crypto, there are well vetted protocols that will help you avoid the common pitfalls and you should use those.
Based on zwer's answers but shows an example attempt to deal with the case where the source text is exactly a multiple of 16 (AES.block_size). However #zwer explains in a comment how this code will BREAK THE ENCRYPTION of your text by not padding your source text appropriately, making your pipeline insecure.
Code:
from builtins import bytes
import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random
def encrypt(string, password):
"""
It returns an encrypted string which can be decrypted just by the
password.
"""
key = password_to_key(password)
IV = make_initialization_vector()
encryptor = AES.new(key, AES.MODE_CBC, IV)
# store the IV at the beginning and encrypt
return IV + encryptor.encrypt(pad_string(string))
def decrypt(string, password):
key = password_to_key(password)
# extract the IV from the beginning
IV = string[:AES.block_size]
decryptor = AES.new(key, AES.MODE_CBC, IV)
string = decryptor.decrypt(string[AES.block_size:])
return unpad_string(string)
def password_to_key(password):
"""
Use SHA-256 over our password to get a proper-sized AES key.
This hashes our password into a 256 bit string.
"""
return SHA256.new(password).digest()
def make_initialization_vector():
"""
An initialization vector (IV) is a fixed-size input to a cryptographic
primitive that is typically required to be random or pseudorandom.
Randomization is crucial for encryption schemes to achieve semantic
security, a property whereby repeated usage of the scheme under the
same key does not allow an attacker to infer relationships
between segments of the encrypted message.
"""
return Random.new().read(AES.block_size)
def pad_string(string, chunk_size=AES.block_size):
"""
Pad string the peculirarity that uses the first byte
is used to store how much padding is applied
"""
assert chunk_size <= 256, 'We are using one byte to represent padding'
to_pad = (chunk_size - (len(string) + 1)) % chunk_size
return bytes([to_pad]) + string + bytes([0] * to_pad)
def unpad_string(string):
to_pad = string[0]
return string[1:-to_pad]
def encode(string):
"""
Base64 encoding schemes are commonly used when there is a need to encode
binary data that needs be stored and transferred over media that are
designed to deal with textual data.
This is to ensure that the data remains intact without
modification during transport.
"""
return base64.b64encode(string).decode("latin-1")
def decode(string):
return base64.b64decode(string.encode("latin-1"))
Tests:
def random_text(length):
def rand_lower():
return chr(randint(ord('a'), ord('z')))
string = ''.join([rand_lower() for _ in range(length)])
return bytes(string, encoding='utf-8')
def test_encoding():
string = random_text(100)
assert encode(string) != string
assert decode(encode(string)) == string
def test_padding():
assert len(pad_string(random_text(14))) == 16
assert len(pad_string(random_text(15))) == 16
assert len(pad_string(random_text(16))) == 32
def test_encryption():
string = random_text(100)
password = random_text(20)
assert encrypt(string, password) != string
assert decrypt(encrypt(string, password), password) == string
If you are going to use mentioned database to authorise users, you should use hashes or message digests of user's passwords, instead of 2 way encryption algorithms, that would make your data hard to use even in case of db leakage.
You cannot use above method to protect data that needs to be decrypted at some point, but even then you can use more secure way than just encrypting user passwords using some fixed key (which is the worst method). Take a look at OWASP's Password Storage Cheat Sheet.
As you wrote "I want to be able to encrypt/decrypt the message", I'm attaching a simple python source (tested under 2.7) for encr/decr using Blowfish.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import os
from Crypto.Cipher import Blowfish # pip install pycrypto
BS = 8
pad = lambda s: s + (BS - len(s) % BS) * chr(BS - len(s) % BS)
unpad = lambda s : s[0:-ord(s[-1])]
def doEncrypt(phrase, key):
c1 = Blowfish.new(key, Blowfish.MODE_ECB)
return c1.encrypt(pad(phrase))
def doDecrypt(phrase, key):
c1 = Blowfish.new(key, Blowfish.MODE_ECB)
return unpad(c1.decrypt(phrase))
def testing123(phrase, key):
encrypted = doEncrypt(phrase, key)
decrypted = doDecrypt(encrypted, key)
assert phrase == decrypted, "Blowfish ECB enc/dec verification failed"
print ("Blowfish ECB enc/dec verified ok")
print ('phrase/key(hex)/enc+dec: {}/{}/{}'.format(phrase, key.encode('hex'), decrypted))
if __name__== "__main__":
phrase= 'Ala ma kota, a kot ma AIDS.'
key= os.urandom(32)
testing123(phrase, key)
You can do it by using two of the built-in functions on the standard Python library. The first one is the function ord( ), which takes a unicode string character as single input parameter and converts it to its corresponding unicode code (an integer). Two simple examples of the usage of this function are provided:
>>> ord('a')
97
>>> ord('b')
98
Then, you also have the inverse function of ord(): chr( ). And as you can imagine it works all the way around: it has a unicode code as an input (integer) and gets the corresponding unicode character (string):
>>> chr(97)
'a'
>>> chr(98)
'b'
Then you can do a simple encription by adding or substracting by some arbitrary integer... in this case, the number 2:
NOTE: Watch out in not utting very big values or you'll get an error id you reach a negative nber, for example.
def encrypt(message):
newS=''
for car in message:
newS=newS+chr(ord(car)+2)
return newS
print(encrypt('hello world'))
And getting as a result:
jgnnq"yqtnf
Now you can copy and past the same function and generate the decrypt function. In this case, it requires, obviously, to substract by 2:
def decrypt(message):
newS=''
for car in message:
newS=newS+chr(ord(car)-2)
return newS
print(decrypt('jgnnq"yqtnf'))
And the result will be the original message again:
'hello world'
This would be a great way to encrypt messages to non programmers. However, anyone with a little of programming knowledge could write a program that varied the integer we used until they found we have just added (2) to the unicode characters to encrypt the code...
In order to avoid that, I would propose two more complex alternatives.
1. The first one is the simplest: it consists in applying a different sum value to the chr function depending on the position of the character (for example, adding 2 to each unicode code when it occupies an even position in the string and substracting 3 when sits on an odd position).
2. The second one will generate the maximum security. It will consist on adding or substracting every unicode code for a number that will be randomly generated for each character. It will require to store an array of values to decript back the message. Make sure, then, this array of values is not available to third parties.
There it goes a possible solution for 1.:
def encryptHard(message):
newS=''
for i in range(len(message)):
if i%2==0:
newS=newS+chr(ord(message[i])+2)
else:
newS=newS+chr(ord(message[i])-3)
return newS
print(encryptHard('hello world'))
And the result would be:
jbniqyltif
With the information hereby privided the decrypting script is obvious, so I won't bother you with coping, pasing and changing two values.
Finally, let's go into an in-depth-analysis of the second more complex alternative. With this one we can say that the encription will be almost indefitable. The idea is to vary the value we add or substract to each unicode code by a random number comprized between 0 and 255 (this is the range of numbers the chr( ) function admits, so do not try to play with other numbers o you will definitely get an error).
In this case, my proposal also randomizes the operation (sum or subtract), and avoids that the final number be a 0 (i.e. we would get an original character). Finally, its also returns a list with the numers it has been subtracted to, something you will need in order to decrypt the message back.
The chances that you get the same encrypted message if you call two times this function using the same message of length n are somewhat near to 255^n... So don't worry (I say somewhat, as the algorithm created would actually generate more repeated values on the low-end or high-end range of values, for example, in case the most frequent characters were not centered in this distrubution unicode caracrer set (from 0 to 255), which is the case. However, the program, though not perfect, works flawlessly and protects the information.
import random as r
def encryptSuperHard(message):
newS=''
l_trans=[]
for car in message:
code=ord(car)
add_subtract=r.choice([True,False])
if add_subtract:
transpose=r.randint(0,code-1)
newS=newS+chr(code-transpose)
l_trans=l_trans+[-transpose]
else:
transpose=r.randint(code+1,255)
newS=newS+chr(code+transpose)
l_trans=l_trans+[transpose]
return newS, l_trans
print(encryptSuperHard('hello world'))
In this case, this random encrypting script I've made has returned this two value tuple, where the first value is the encrypted message and the second one is the value that has "transposed" every character in order of apearance.
('A0ŤłY\x10řG;,à', [-39, -53, 248, 214, -22, -16, 226, -40, -55, -64, 124])
Decrypting, in this case would need to take the encrypred message and the list and proceed as follows:
def decryptSuperHard(encriptedS,l):
newS=''
for i in range(len(l)):
newS=newS+chr(ord(encriptedS[i])-l[i])
return newS
print(decryptSuperHard('A0ŤłY\x10řG;,à', [-39,-53,248,214,-22,-16,226,-40,-55,-64,124]))
And the results goes back to:
hello world
print(deccryptSuperHard('A0ŤłY\x10řG;,à', [-39, -53, 248, 214, -22, -16, 226, -40, -55, -64, 124])
Have gou considered using the cryptography package? Here’s a simple example using Fernet encryption from their README:
from cryptography.fernet import Fernet
key = Fernet.generate_key()
f = Fernet(key)
token = f.encrypt(b"A secret message")
f.decrypt(token)
Based on this answer, the following AES256-GCM solution is even safer, although it requires a nonce:
import secrets
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
# Generate a random secret key (AES256 needs 32 bytes)
key = secrets.token_bytes(32)
# Encrypt a message
nonce = secrets.token_bytes(12) # GCM mode needs 12 fresh bytes every time
ciphertext = nonce + AESGCM(key).encrypt(nonce, b"Message", b"")
# Decrypt (raises InvalidTag if using wrong key or corrupted ciphertext)
msg = AESGCM(key).decrypt(ciphertext[:12], ciphertext[12:], b"")
To install the cryptography package:
pip install cryptography
Cheers,
Cocco
Here is my solution for anyone who may be interested:
from Crypto.Cipher import AES # pip install pycrypto
import base64
def cypher_aes(secret_key, msg_text, encrypt=True):
# an AES key must be either 16, 24, or 32 bytes long
# in this case we make sure the key is 32 bytes long by adding padding and/or slicing if necessary
remainder = len(secret_key) % 16
modified_key = secret_key.ljust(len(secret_key) + (16 - remainder))[:32]
print(modified_key)
# input strings must be a multiple of 16 in length
# we achieve this by adding padding if necessary
remainder = len(msg_text) % 16
modified_text = msg_text.ljust(len(msg_text) + (16 - remainder))
print(modified_text)
cipher = AES.new(modified_key, AES.MODE_ECB) # use of ECB mode in enterprise environments is very much frowned upon
if encrypt:
return base64.b64encode(cipher.encrypt(modified_text)).strip()
return cipher.decrypt(base64.b64decode(modified_text)).strip()
encrypted = cypher_aes(b'secret_AES_key_string_to_encrypt/decrypt_with', b'input_string_to_encrypt/decrypt', encrypt=True)
print(encrypted)
print()
print(cypher_aes(b'secret_AES_key_string_to_encrypt/decrypt_with', encrypted, encrypt=False))
Result:
b'secret_AES_key_string_to_encrypt'
b'input_string_to_encrypt/decrypt '
b'+IFU4e4rFWEkUlOU6sd+y8JKyyRdRbPoT/FvDBCFeuY='
b'secret_AES_key_string_to_encrypt'
b'+IFU4e4rFWEkUlOU6sd+y8JKyyRdRbPoT/FvDBCFeuY= '
b'input_string_to_encrypt/decrypt'

How to encrypt text with a password in python?

Surprisingly difficult to find a straight answer to this on Google.
I'm wanting to collect a piece of text and a message from a user such as 1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc hello world.
Then I want to be able to encrypt/decrypt the message with the text somehow so that I can save it in my database and not worry about the data being exposed if my website gets hacked,
encrypt('1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc', 'hello world')
decrypt('1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc', <encrypted_text>)
Is there a simple way to achieve this with python and please can someone provide/direct me to an example.
Perhaps an example of how to create public/private key pairs using a seed such as '1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc'?
Many thanks in advance :)
EDIT: Just to be clear I'm looking for a way to encrypt my users data in a determanistic way not obfuscate the message.
If that means I have to generate a PGP/GPG pub/pri key pair on the fly by using the text 1PWP7a6xgoYx81VZocrDr5okEEcnqKkyDc as a seed then that's fine but what's the method to do this?
Here's how to do it properly in CBC mode, including PKCS#7 padding:
import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random
def encrypt(key, source, encode=True):
key = SHA256.new(key).digest() # use SHA-256 over our key to get a proper-sized AES key
IV = Random.new().read(AES.block_size) # generate IV
encryptor = AES.new(key, AES.MODE_CBC, IV)
padding = AES.block_size - len(source) % AES.block_size # calculate needed padding
source += bytes([padding]) * padding # Python 2.x: source += chr(padding) * padding
data = IV + encryptor.encrypt(source) # store the IV at the beginning and encrypt
return base64.b64encode(data).decode("latin-1") if encode else data
def decrypt(key, source, decode=True):
if decode:
source = base64.b64decode(source.encode("latin-1"))
key = SHA256.new(key).digest() # use SHA-256 over our key to get a proper-sized AES key
IV = source[:AES.block_size] # extract the IV from the beginning
decryptor = AES.new(key, AES.MODE_CBC, IV)
data = decryptor.decrypt(source[AES.block_size:]) # decrypt
padding = data[-1] # pick the padding value from the end; Python 2.x: ord(data[-1])
if data[-padding:] != bytes([padding]) * padding: # Python 2.x: chr(padding) * padding
raise ValueError("Invalid padding...")
return data[:-padding] # remove the padding
It's set to work with bytes data, so if you want to encrypt strings or use string passwords make sure you encode() them with a proper codec before passing them to the methods. If you leave the encode parameter to True the encrypt() output will be base64 encoded string, and decrypt() source should be also base64 string.
Now if you test it as:
my_password = b"secret_AES_key_string_to_encrypt/decrypt_with"
my_data = b"input_string_to_encrypt/decrypt"
print("key: {}".format(my_password))
print("data: {}".format(my_data))
encrypted = encrypt(my_password, my_data)
print("\nenc: {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec: {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))
print("\nSecond round....")
encrypted = encrypt(my_password, my_data)
print("\nenc: {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec: {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))
your output would be similar to:
key: b'secret_AES_key_string_to_encrypt/decrypt_with'
data: b'input_string_to_encrypt/decrypt'
enc: 7roSO+P/4eYdyhCbZmraVfc305g5P8VhDBOUDGrXmHw8h5ISsS3aPTGfsTSqn9f5
dec: b'input_string_to_encrypt/decrypt'
data match: True
Second round....
enc: BQm8FeoPx1H+bztlZJYZH9foI+IKAorCXRsMjbiYQkqLWbGU3NU50OsR+L9Nuqm6
dec: b'input_string_to_encrypt/decrypt'
data match: True
Proving that same key and same data still produce different ciphertext each time.
Now, this is much better than ECB but... if you're going to use this for communication - don't! This is more to explain how it should be constructed, not really to be used in a production environment and especially not for communication as its missing a crucial ingredient - message authentication. Feel free to play with it, but you should not roll your own crypto, there are well vetted protocols that will help you avoid the common pitfalls and you should use those.
Based on zwer's answers but shows an example attempt to deal with the case where the source text is exactly a multiple of 16 (AES.block_size). However #zwer explains in a comment how this code will BREAK THE ENCRYPTION of your text by not padding your source text appropriately, making your pipeline insecure.
Code:
from builtins import bytes
import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random
def encrypt(string, password):
"""
It returns an encrypted string which can be decrypted just by the
password.
"""
key = password_to_key(password)
IV = make_initialization_vector()
encryptor = AES.new(key, AES.MODE_CBC, IV)
# store the IV at the beginning and encrypt
return IV + encryptor.encrypt(pad_string(string))
def decrypt(string, password):
key = password_to_key(password)
# extract the IV from the beginning
IV = string[:AES.block_size]
decryptor = AES.new(key, AES.MODE_CBC, IV)
string = decryptor.decrypt(string[AES.block_size:])
return unpad_string(string)
def password_to_key(password):
"""
Use SHA-256 over our password to get a proper-sized AES key.
This hashes our password into a 256 bit string.
"""
return SHA256.new(password).digest()
def make_initialization_vector():
"""
An initialization vector (IV) is a fixed-size input to a cryptographic
primitive that is typically required to be random or pseudorandom.
Randomization is crucial for encryption schemes to achieve semantic
security, a property whereby repeated usage of the scheme under the
same key does not allow an attacker to infer relationships
between segments of the encrypted message.
"""
return Random.new().read(AES.block_size)
def pad_string(string, chunk_size=AES.block_size):
"""
Pad string the peculirarity that uses the first byte
is used to store how much padding is applied
"""
assert chunk_size <= 256, 'We are using one byte to represent padding'
to_pad = (chunk_size - (len(string) + 1)) % chunk_size
return bytes([to_pad]) + string + bytes([0] * to_pad)
def unpad_string(string):
to_pad = string[0]
return string[1:-to_pad]
def encode(string):
"""
Base64 encoding schemes are commonly used when there is a need to encode
binary data that needs be stored and transferred over media that are
designed to deal with textual data.
This is to ensure that the data remains intact without
modification during transport.
"""
return base64.b64encode(string).decode("latin-1")
def decode(string):
return base64.b64decode(string.encode("latin-1"))
Tests:
def random_text(length):
def rand_lower():
return chr(randint(ord('a'), ord('z')))
string = ''.join([rand_lower() for _ in range(length)])
return bytes(string, encoding='utf-8')
def test_encoding():
string = random_text(100)
assert encode(string) != string
assert decode(encode(string)) == string
def test_padding():
assert len(pad_string(random_text(14))) == 16
assert len(pad_string(random_text(15))) == 16
assert len(pad_string(random_text(16))) == 32
def test_encryption():
string = random_text(100)
password = random_text(20)
assert encrypt(string, password) != string
assert decrypt(encrypt(string, password), password) == string
If you are going to use mentioned database to authorise users, you should use hashes or message digests of user's passwords, instead of 2 way encryption algorithms, that would make your data hard to use even in case of db leakage.
You cannot use above method to protect data that needs to be decrypted at some point, but even then you can use more secure way than just encrypting user passwords using some fixed key (which is the worst method). Take a look at OWASP's Password Storage Cheat Sheet.
As you wrote "I want to be able to encrypt/decrypt the message", I'm attaching a simple python source (tested under 2.7) for encr/decr using Blowfish.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import os
from Crypto.Cipher import Blowfish # pip install pycrypto
BS = 8
pad = lambda s: s + (BS - len(s) % BS) * chr(BS - len(s) % BS)
unpad = lambda s : s[0:-ord(s[-1])]
def doEncrypt(phrase, key):
c1 = Blowfish.new(key, Blowfish.MODE_ECB)
return c1.encrypt(pad(phrase))
def doDecrypt(phrase, key):
c1 = Blowfish.new(key, Blowfish.MODE_ECB)
return unpad(c1.decrypt(phrase))
def testing123(phrase, key):
encrypted = doEncrypt(phrase, key)
decrypted = doDecrypt(encrypted, key)
assert phrase == decrypted, "Blowfish ECB enc/dec verification failed"
print ("Blowfish ECB enc/dec verified ok")
print ('phrase/key(hex)/enc+dec: {}/{}/{}'.format(phrase, key.encode('hex'), decrypted))
if __name__== "__main__":
phrase= 'Ala ma kota, a kot ma AIDS.'
key= os.urandom(32)
testing123(phrase, key)
You can do it by using two of the built-in functions on the standard Python library. The first one is the function ord( ), which takes a unicode string character as single input parameter and converts it to its corresponding unicode code (an integer). Two simple examples of the usage of this function are provided:
>>> ord('a')
97
>>> ord('b')
98
Then, you also have the inverse function of ord(): chr( ). And as you can imagine it works all the way around: it has a unicode code as an input (integer) and gets the corresponding unicode character (string):
>>> chr(97)
'a'
>>> chr(98)
'b'
Then you can do a simple encription by adding or substracting by some arbitrary integer... in this case, the number 2:
NOTE: Watch out in not utting very big values or you'll get an error id you reach a negative nber, for example.
def encrypt(message):
newS=''
for car in message:
newS=newS+chr(ord(car)+2)
return newS
print(encrypt('hello world'))
And getting as a result:
jgnnq"yqtnf
Now you can copy and past the same function and generate the decrypt function. In this case, it requires, obviously, to substract by 2:
def decrypt(message):
newS=''
for car in message:
newS=newS+chr(ord(car)-2)
return newS
print(decrypt('jgnnq"yqtnf'))
And the result will be the original message again:
'hello world'
This would be a great way to encrypt messages to non programmers. However, anyone with a little of programming knowledge could write a program that varied the integer we used until they found we have just added (2) to the unicode characters to encrypt the code...
In order to avoid that, I would propose two more complex alternatives.
1. The first one is the simplest: it consists in applying a different sum value to the chr function depending on the position of the character (for example, adding 2 to each unicode code when it occupies an even position in the string and substracting 3 when sits on an odd position).
2. The second one will generate the maximum security. It will consist on adding or substracting every unicode code for a number that will be randomly generated for each character. It will require to store an array of values to decript back the message. Make sure, then, this array of values is not available to third parties.
There it goes a possible solution for 1.:
def encryptHard(message):
newS=''
for i in range(len(message)):
if i%2==0:
newS=newS+chr(ord(message[i])+2)
else:
newS=newS+chr(ord(message[i])-3)
return newS
print(encryptHard('hello world'))
And the result would be:
jbniqyltif
With the information hereby privided the decrypting script is obvious, so I won't bother you with coping, pasing and changing two values.
Finally, let's go into an in-depth-analysis of the second more complex alternative. With this one we can say that the encription will be almost indefitable. The idea is to vary the value we add or substract to each unicode code by a random number comprized between 0 and 255 (this is the range of numbers the chr( ) function admits, so do not try to play with other numbers o you will definitely get an error).
In this case, my proposal also randomizes the operation (sum or subtract), and avoids that the final number be a 0 (i.e. we would get an original character). Finally, its also returns a list with the numers it has been subtracted to, something you will need in order to decrypt the message back.
The chances that you get the same encrypted message if you call two times this function using the same message of length n are somewhat near to 255^n... So don't worry (I say somewhat, as the algorithm created would actually generate more repeated values on the low-end or high-end range of values, for example, in case the most frequent characters were not centered in this distrubution unicode caracrer set (from 0 to 255), which is the case. However, the program, though not perfect, works flawlessly and protects the information.
import random as r
def encryptSuperHard(message):
newS=''
l_trans=[]
for car in message:
code=ord(car)
add_subtract=r.choice([True,False])
if add_subtract:
transpose=r.randint(0,code-1)
newS=newS+chr(code-transpose)
l_trans=l_trans+[-transpose]
else:
transpose=r.randint(code+1,255)
newS=newS+chr(code+transpose)
l_trans=l_trans+[transpose]
return newS, l_trans
print(encryptSuperHard('hello world'))
In this case, this random encrypting script I've made has returned this two value tuple, where the first value is the encrypted message and the second one is the value that has "transposed" every character in order of apearance.
('A0ŤłY\x10řG;,à', [-39, -53, 248, 214, -22, -16, 226, -40, -55, -64, 124])
Decrypting, in this case would need to take the encrypred message and the list and proceed as follows:
def decryptSuperHard(encriptedS,l):
newS=''
for i in range(len(l)):
newS=newS+chr(ord(encriptedS[i])-l[i])
return newS
print(decryptSuperHard('A0ŤłY\x10řG;,à', [-39,-53,248,214,-22,-16,226,-40,-55,-64,124]))
And the results goes back to:
hello world
print(deccryptSuperHard('A0ŤłY\x10řG;,à', [-39, -53, 248, 214, -22, -16, 226, -40, -55, -64, 124])
Have gou considered using the cryptography package? Here’s a simple example using Fernet encryption from their README:
from cryptography.fernet import Fernet
key = Fernet.generate_key()
f = Fernet(key)
token = f.encrypt(b"A secret message")
f.decrypt(token)
Based on this answer, the following AES256-GCM solution is even safer, although it requires a nonce:
import secrets
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
# Generate a random secret key (AES256 needs 32 bytes)
key = secrets.token_bytes(32)
# Encrypt a message
nonce = secrets.token_bytes(12) # GCM mode needs 12 fresh bytes every time
ciphertext = nonce + AESGCM(key).encrypt(nonce, b"Message", b"")
# Decrypt (raises InvalidTag if using wrong key or corrupted ciphertext)
msg = AESGCM(key).decrypt(ciphertext[:12], ciphertext[12:], b"")
To install the cryptography package:
pip install cryptography
Cheers,
Cocco
Here is my solution for anyone who may be interested:
from Crypto.Cipher import AES # pip install pycrypto
import base64
def cypher_aes(secret_key, msg_text, encrypt=True):
# an AES key must be either 16, 24, or 32 bytes long
# in this case we make sure the key is 32 bytes long by adding padding and/or slicing if necessary
remainder = len(secret_key) % 16
modified_key = secret_key.ljust(len(secret_key) + (16 - remainder))[:32]
print(modified_key)
# input strings must be a multiple of 16 in length
# we achieve this by adding padding if necessary
remainder = len(msg_text) % 16
modified_text = msg_text.ljust(len(msg_text) + (16 - remainder))
print(modified_text)
cipher = AES.new(modified_key, AES.MODE_ECB) # use of ECB mode in enterprise environments is very much frowned upon
if encrypt:
return base64.b64encode(cipher.encrypt(modified_text)).strip()
return cipher.decrypt(base64.b64decode(modified_text)).strip()
encrypted = cypher_aes(b'secret_AES_key_string_to_encrypt/decrypt_with', b'input_string_to_encrypt/decrypt', encrypt=True)
print(encrypted)
print()
print(cypher_aes(b'secret_AES_key_string_to_encrypt/decrypt_with', encrypted, encrypt=False))
Result:
b'secret_AES_key_string_to_encrypt'
b'input_string_to_encrypt/decrypt '
b'+IFU4e4rFWEkUlOU6sd+y8JKyyRdRbPoT/FvDBCFeuY='
b'secret_AES_key_string_to_encrypt'
b'+IFU4e4rFWEkUlOU6sd+y8JKyyRdRbPoT/FvDBCFeuY= '
b'input_string_to_encrypt/decrypt'

How to make Python Crypto AES faster

I'm looking for a very fast way for encrypting and decrypting short text snippets. Security is secondary in my use-case. Light encryption with a constant IV is fine. I'm currently doing this:
BS = 16
pad = lambda s: s + (BS - len(s) % BS) * chr(BS - len(s) % BS)
unpad = lambda s : s[:-ord(s[len(s)-1:])]
import base64
from Crypto.Cipher import AES
iv = '0123456789012345'
def encrypt(raw, key):
raw = pad(raw)
cipher = AES.new(key, AES.MODE_CBC, iv)
return base64.b64encode( cipher.encrypt( raw ) )
def decrypt(enc, key):
enc = base64.b64decode(enc)
cipher = AES.new(key, AES.MODE_CBC, iv)
return unpad(cipher.decrypt( enc ))
enc_text = encrypt('Hello World!','xyz1234567890abc')
print decrypt(enc_text, 'xyz1234567890abc')
How can I make this faster? Maybe by using another AES mode (MODE_CBC?), or is there a faster padding function, a faster way of hex-converting the output?
Counter mode (AES.MODE_CTR) will be faster for multi-block messages, as it can be parallelized for both encryption and decryption. CBC is serial on encryption because the resulting output of each block-cipher operation is fed as input to be XOR with the plaintext of the next block before encrypting. Because CTR generates a keystream by encrypting each (sequential) counter value with the key, it does not rely on the output of any previous block operation and can perform the tasks in parallel.
In addition, because CTR operates as a stream cipher, no message padding is required, so you'll save time on that operation in and out.
Note: Don't re-use counters. You mentioned that confidentiality is a secondary concern here, but while re-using IVs in CBC mode is "bad", re-using counters in CTR mode is end of the world bad. Just use a sequential counter (literally i++) combined with 64 bits of the ms since epoch start and you'll be fine. (See Stream Reuse or Many Time Pad Attack for good examples of why).

PyCrypto - How does the Initialization Vector work?

I'm trying to understand how PyCrypto works to use in a project but I'm not fully understanding the significance of the Initialization Vector (IV). I've found that I can use the wrong IV when decoding a string and I still seem to get the message back except for the first 16 bytes (the block size). Am simply using it wrong or not understanding something?
Here's a sample code to demonstrate:
import Crypto
import Crypto.Random
from Crypto.Cipher import AES
def pad_data(data):
if len(data) % 16 == 0:
return data
databytes = bytearray(data)
padding_required = 15 - (len(databytes) % 16)
databytes.extend(b'\x80')
databytes.extend(b'\x00' * padding_required)
return bytes(databytes)
def unpad_data(data):
if not data:
return data
data = data.rstrip(b'\x00')
if data[-1] == 128: # b'\x80'[0]:
return data[:-1]
else:
return data
def generate_aes_key():
rnd = Crypto.Random.OSRNG.posix.new().read(AES.block_size)
return rnd
def encrypt(key, iv, data):
aes = AES.new(key, AES.MODE_CBC, iv)
data = pad_data(data)
return aes.encrypt(data)
def decrypt(key, iv, data):
aes = AES.new(key, AES.MODE_CBC, iv)
data = aes.decrypt(data)
return unpad_data(data)
def test_crypto ():
key = generate_aes_key()
iv = generate_aes_key() # get some random value for IV
msg = b"This is some super secret message. Please don't tell anyone about it or I'll have to shoot you."
code = encrypt(key, iv, msg)
iv = generate_aes_key() # change the IV to something random
decoded = decrypt(key, iv, code)
print(decoded)
if __name__ == '__main__':
test_crypto()
I'm using Python 3.3.
Output will vary on execution, but I get something like this: b"1^,Kp}Vl\x85\x8426M\xd2b\x1aer secret message. Please don't tell anyone about it or I'll have to shoot you."
The behavior you see is specific to the CBC mode. With CBC, decryption can be visualized in the following way (from wikipedia):
You can see that IV only contributes to the first 16 bytes of plaintext. If the IV is corrupted while it is in transit to the receiver, CBC will still correctly decrypt all blocks but the first one. In CBC, the purpose of the IV is to enable you to encrypt the same message with the same key, and still get a totally different ciphertext each time (even though the message length may give something away).
Other modes are less forgiving. If you get the IV wrong, the whole message is garbled at decryption. Take CTR mode for instance, where nonce takes almost the same meaning of IV:
The developer for PyCrypto pulled the specification for AES CBC Mode from NIST:
AES Mode_CBC -> referencing NIST 800-38a (The Recommendation for Cipher Mode Operations)
From that, page 8:
5.3 Initialization Vectors
The input to the encryption processes of the CBC, CFB, and OFB modes includes, in addition to the plaintext, a data block called the initialization vector (IV), denoted IV. The IV is used in an initial step in the encryption of a message and in the corresponding decryption of the message.
The IV need not be secret; however, for the CBC and CFB modes, the IV for any particular
execution of the encryption process must be unpredictable, and, for the OFB mode, unique IVs must be used for each execution of the encryption process. The generation of IVs is discussed in Appendix C.
Thing to remember, you need to use a random IV every time you compose a message, this adds a 'salt' to the message therefore making the message unique; even with the 'salt' being out in the open, it will not help break the encryption if the AES encryption key is unknown. If you do not use a randomized IV, say, you use the same 16 bytes each message, your messages, if you repeat yourself, will look the same going across the wire and you could be subject to frequency and/or replay attacks.
A test for the results of random IVs vs static:
def test_crypto ():
print("Same IVs same key:")
key = generate_aes_key()
iv = b"1234567890123456"
msg = b"This is some super secret message. Please don't tell anyone about it or I'll have to shoot you."
code = encrypt(key, iv, msg)
print(code.encode('hex'))
decoded = decrypt(key, iv, code)
print(decoded)
code = encrypt(key, iv, msg)
print(code.encode('hex'))
decoded = decrypt(key, iv, code)
print(decoded)
print("Different IVs same key:")
iv = generate_aes_key()
code = encrypt(key, iv, msg)
print(code.encode('hex'))
decoded = decrypt(key, iv, code)
print(decoded)
iv = generate_aes_key()
code = encrypt(key, iv, msg)
print(code.encode('hex'))
decoded = decrypt(key, iv, code)
print(decoded)
Hope this helps!

How AES in CTR works for Python with PyCrypto?

I am using python 2.7.1
I want to encrypt sth using AES in CTR mode. I installed PyCrypto library for python. I wrote the following code:
secret = os.urandom(16)
crypto = AES.new(os.urandom(32), AES.MODE_CTR, counter=lambda: secret)
encrypted = crypto.encrypt("asdk")
print crypto.decrypt(encrypted)
i have to run crypto.decrypt as many times as the byte size of my plaintext in order to get correctly the decrypted data. I.e:
encrypted = crypto.encrypt("test")
print crypto.decrypt(encrypted)
print crypto.decrypt(encrypted)
print crypto.decrypt(encrypted)
print crypto.decrypt(encrypted)
The last call to decrypt will give me the plaintext back. The other outputs from decrypt are some gibberish strings .
I am wondering if this is normal or not? Do i have to include into a loop with size equal of my plaintext every time or i have gotten sth wrong?
I'm going to elaborate on #gertvdijk's explanation of why the cipher behaved the way it did in the original question (my edit was rejected), but also point out that setting up the counter to return a static value is a major flaw and show how to set it up correctly.
Reset the counter for new operations
The reason why this behaves as you described in the question is because your plain text (4 bytes / 32 bits) is four times as small as the size of the key stream blocks that the CTR cipher outputs for encryption (16 bytes/128 bits).
Because you're using the same fixed value over and over instead of an actual counter, the cipher keeps spitting out the same 16 byte blocks of keystream. You can observe this by encrypting 16 null bytes repeatedly:
>>> crypto.encrypt('\x00'*16)
'?\\-\xdc\x16`\x05p\x0f\xa7\xca\x82\xdbE\x7f/'
>>> crypto.encrypt('\x00'*16)
'?\\-\xdc\x16`\x05p\x0f\xa7\xca\x82\xdbE\x7f/'
You also don't reset the cipher's state before performing decryption, so the 4 bytes of ciphertext are decrypted against the next 4 bytes of XOR key from the first output stream block. This can also be observed by encrypting and decrypting null bytes:
>>> crypto.encrypt('\x00' * 4)
'?\\-\xdc'
>>> crypto.decrypt('\x00' * 4)
'\x16`\x05p'
If this were to work the way you wanted, the result of both of those operations should be the same. Instead, you can see the first four bytes of the 16 byte block in the first result, and the second four bytes in the second result.
After you've used up the 16 byte block of XOR key by performing four operations on four-byte values (for a 16 byte total), a new block of XOR key is generated. The first four bytes (as well as all the others) of each XOR key block are the same, so when you call decrypt this time, it gives you back the plaintext.
This is really bad! You should not use AES-CTR this way - it's equivalent to simple XOR encryption with a 16 byte repeating key, which can be broken pretty easily.
Solution
You have to reset the state of the cipher before performing an operation on a new stream of data (or another operation on it), as the original instance will no longer be in the correct initial state. Your issue will be solved by instantiating a new crypto object for the decryption, as well as resetting the counter and keystream position.
You also need to use a proper counter function that combines a nonce with a counter value that increases each time a new block of keystream is generated. PyCrypto has a Counter class that can do this for you.
from Crypto.Cipher import AES
from Crypto.Util import Counter
from Crypto import Random
# Set up the counter with a nonce.
# 64 bit nonce + 64 bit counter = 128 bit output
nonce = Random.get_random_bytes(8)
countf = Counter.new(64, nonce)
key = Random.get_random_bytes(32) # 256 bits key
# Instantiate a crypto object first for encryption
encrypto = AES.new(key, AES.MODE_CTR, counter=countf)
encrypted = encrypto.encrypt("asdk")
# Reset counter and instantiate a new crypto object for decryption
countf = Counter.new(64, nonce)
decrypto = AES.new(key, AES.MODE_CTR, counter=countf)
print decrypto.decrypt(encrypted) # prints "asdk"
Start with a new crypto object for new operations
The reason why this behaves as you described in the question is because your plain text (4 bytes / 32 bits) is four times as small as the size the cryptographic engine works on for your chosen AES mode (128 bits) and also reusing the same instance of the crypto object. Simply don't reuse the same object if you're performing an operation on a new stream of data (or another operation on it). Your issue will be solved by instantiating a new crypto object for the decryption, like this:
# *NEVER* USE A FIXED LIKE COUNTER BELOW IN PRODUCTION CODE. READ THE DOCS.
counter = os.urandom(16)
key = os.urandom(32) # 256 bits key
# Instantiate a crypto object first for encryption
encrypto = AES.new(key, AES.MODE_CTR, counter=lambda: counter)
encrypted = encrypto.encrypt("asdk")
# Instantiate a new crypto object for decryption
decrypto = AES.new(key, AES.MODE_CTR, counter=lambda: counter)
print decrypto.decrypt(encrypted) # prints "asdk"
Why it is not about padding with AES-CTR
This answer started out as a response on the answer by Marcus, in which he initially indicated the use of padding would solve it. While I understand it looks like symptoms of a padding issue, it certainly is not.
The whole point of AES-CTR is that you do not need padding, as it's a stream cipher (unlike ECB/CBC and so on)! Stream ciphers work on streams of data, rather chunking data in blocks and chaining them in the actual cryptographic computation.
In addition to what Marcus says, the Crypto.Util.Counter class can be used to build your counter block function.
According to #gertvdijk, AES_CTR is a stream cipher which does not need padding. So I've deleted the related codes.
Here's something I know.
You have to use a same key(the first parameter in AES.new(...)) in encryption and decryption, and keep the key private.
The encryption/decryption methods are stateful, that means crypto.en(de)crypt("abcd")==crypto.en(de)crypt("abcd") is not always true. In your CTR, your counter callback always returns a same thing, so it becomes stateless when encrypt (I am not 100% sure it is the reason), but we still find that it is somewhat stateful in decryption. As a conclusion, we should always use a new object to do them.
The counter callback function in both encryption and decryption should behave the same. In your case, it is to make both of them return the same secret. Yet I don't think the secret is a "secret". You can use a random generated "secret" and pass it across the communicating peers without any encryption so that the other side can directly use it, as long as the secret is not predictable.
So I would write my cipher like this, hope it will offer some help.
import os
import hashlib
import Crypto.Cipher.AES as AES
class Cipher:
#staticmethod
def md5sum( raw ):
m = hashlib.md5()
m.update(raw)
return m.hexdigest()
BS = AES.block_size
#staticmethod
def pad( s ):
"""note that the padding is no necessary"""
"""return s + (Cipher.BS - len(s) % Cipher.BS) * chr(Cipher.BS - len(s) % Cipher.BS)"""
return s
#staticmethod
def unpad( s ):
"""return s[0:-ord(s[-1])]"""
return s
def __init__(self, key):
self.key = Cipher.md5sum(key)
#the state of the counter callback
self.cnter_cb_called = 0
self.secret = None
def _reset_counter_callback_state( self, secret ):
self.cnter_cb_called = 0
self.secret = secret
def _counter_callback( self ):
"""
this function should be stateful
"""
self.cnter_cb_called += 1
return self.secret[self.cnter_cb_called % Cipher.BS] * Cipher.BS
def encrypt(self, raw):
secret = os.urandom( Cipher.BS ) #random choose a "secret" which is not secret
self._reset_counter_callback_state( secret )
cipher = AES.new( self.key, AES.MODE_CTR, counter = self._counter_callback )
raw_padded = Cipher.pad( raw )
enc_padded = cipher.encrypt( raw_padded )
return secret+enc_padded #yes, it is not secret
def decrypt(self, enc):
secret = enc[:Cipher.BS]
self._reset_counter_callback_state( secret )
cipher = AES.new( self.key, AES.MODE_CTR, counter = self._counter_callback )
enc_padded = enc[Cipher.BS:] #we didn't encrypt the secret, so don't decrypt it
raw_padded = cipher.decrypt( enc_padded )
return Cipher.unpad( raw_padded )
Some test:
>>> from Cipher import Cipher
>>> x = Cipher("this is key")
>>> "a"==x.decrypt(x.encrypt("a"))
True
>>> "b"==x.decrypt(x.encrypt("b"))
True
>>> "c"==x.decrypt(x.encrypt("c"))
True
>>> x.encrypt("a")==x.encrypt("a")
False #though the input is same, the outputs are different
Reference: http://packages.python.org/pycrypto/Crypto.Cipher.blockalgo-module.html#MODE_CTR

Categories

Resources