Python- encode base64 print new line - python

I want to try to encrypt my python code with base64.
But when I use \n I get an error:
print("hello
IndentationError: unexpected indent
This is my code I used:
import base64
def encode(data):
try:
# Standard Base64 Encoding
encodedBytes = base64.b64encode(data.encode("utf-8"))
return str(encodedBytes, "utf-8")
except:
return ""
def decode(data):
try:
message_bytes = base64.b64decode(data)
return message_bytes.decode('utf-8')
except:
return ""
your_code = encode("""
print("hello \n world")
""")
exec(decode(your_code))
I could use twice the print() function instead of \n but is there a way to use \n?
I hope you can help me out

First, you have to remove the indent in the your_code section. Second you have to replace \n with \\n
import base64
def encode(data):
try:
# Standard Base64 Encoding
encodedBytes = base64.b64encode(data.encode("utf-8"))
return str(encodedBytes, "utf-8")
except:
return ""
def decode(data):
try:
message_bytes = base64.b64decode(data)
return message_bytes.decode('utf-8')
except:
return ""
your_code = encode("""
print("hello \\n world")
""")
exec(decode(your_code))

Related

Remove Accent accents from characters using pyspark

I have accents in my data and want to remove from character. Example : Frédér8ic# --> frederic
using Pyspark code
I tried the below code:
def simplify(text):
import unicodedata
try:
text = unicode(text, 'utf-8')
except NameError:
pass
text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode("utf-8")
return str(text)
But getting below error
text = unicode(text, 'utf-8')
TypeError: decoding str is not supported
def make_trans():
matching_string = ""
replace_string = ""
for i in range(ord(" "), sys.maxunicode):
name = unicodedata.name(chr(i), "")
if "WITH" in name:
try:
base = unicodedata.lookup(name.split(" WITH")[0])
matching_string += chr(i)
replace_string += base
except KeyError:
pass
return matching_string, replace_string
def clean_text(c):
matching_string, replace_string = make_trans()
return translate(
regexp_replace(c, "\p{M}", ""),
matching_string, replace_string
).alias(c)

Is tokenize.detect_encoding(readline) only in python3?

In python2.7
AttributeError: 'module' object has no attribute 'detect_encoding'
for python2&3 compatibility,you can use:
from lib2to3.pgen2 import tokenize
tokenize.detect_encoding(f.readline)[0] # 'utf-8'
This function isn't available for python2.7, you can see it isn't listed on the https://docs.python.org/2.7/library/tokenize.html. That said, I don't see any reason why the python3.6 version wouldn't work on python2.7, ie:
import re
from codecs import lookup, BOM_UTF8
cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
return "utf-8"
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
return "iso-8859-1"
return orig_enc
def detect_encoding(readline):
try:
filename = readline.__self__.name
except AttributeError:
filename = None
bom_found = False
encoding = None
default = 'utf-8'
def read_or_stop():
try:
return readline()
except StopIteration:
return b''
def find_cookie(line):
try:
# Decode as UTF-8. Either the line is an encoding declaration,
# in which case it should be pure ASCII, or it must be UTF-8
# per default encoding.
line_string = line.decode('utf-8')
except UnicodeDecodeError:
msg = "invalid or missing encoding declaration"
if filename is not None:
msg = '{} for {!r}'.format(msg, filename)
raise SyntaxError(msg)
match = cookie_re.match(line_string)
if not match:
return None
encoding = _get_normal_name(match.group(1))
try:
codec = lookup(encoding)
except LookupError:
# This behaviour mimics the Python interpreter
if filename is None:
msg = "unknown encoding: " + encoding
else:
msg = "unknown encoding for {!r}: {}".format(filename,
encoding)
raise SyntaxError(msg)
if bom_found:
if encoding != 'utf-8':
# This behaviour mimics the Python interpreter
if filename is None:
msg = 'encoding problem: utf-8'
else:
msg = 'encoding problem for {!r}: utf-8'.format(filename)
raise SyntaxError(msg)
encoding += '-sig'
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
default = 'utf-8-sig'
if not first:
return default, []
encoding = find_cookie(first)
if encoding:
return encoding, [first]
if not blank_re.match(first):
return default, [first]
second = read_or_stop()
if not second:
return default, [first]
encoding = find_cookie(second)
if encoding:
return encoding, [first, second]
return default, [first, second]
coding, lines = detect_encoding(open("out.txt", 'rb').readline)
print(coding, lines)

Unicode Decode Error in Python with files

so I'm having this trouble with the decode. I found it in other threads how to do it for simple strings, with the u'string'.encode. But I can't find a way to make it work with files.
Any help would be appreciated!
Here's the code.
text = file.read()
text.replace(txt.encode('utf-8'), novo_txt.encode('utf-8'))
file.seek(0) # rewind
file.write(text.encode('utf-8'))
and here's the whole code, should it help.
#!/usr/bin/env python
# coding: utf-8
"""
Script to helps on translate some code's methods from
portuguese to english.
"""
from multiprocessing import Pool
from mock import MagicMock
from goslate import Goslate
import fnmatch
import logging
import os
import re
import urllib2
_MAX_PEERS = 1
try:
os.remove('traducoes.log')
except OSError:
pass
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('traducoes.log')
logger.addHandler(handler)
def fileWalker(ext, dirname, names):
"""
Find the files with the correct extension
"""
pat = "*" + ext[0]
for f in names:
if fnmatch.fnmatch(f, pat):
ext[1].append(os.path.join(dirname, f))
def encontre_text(file):
"""
find on the string the works wich have '_' on it
"""
text = file.read().decode('utf-8')
return re.findall(r"\w+(?<=_)\w+", text)
#return re.findall(r"\"\w+\"", text)
def traduza_palavra(txt):
"""
Translate the word/phrase to english
"""
try:
# try connect with google
response = urllib2.urlopen('http://google.com', timeout=2)
pass
except urllib2.URLError as err:
print "No network connection "
exit(-1)
if txt[0] != '_':
txt = txt.replace('_', ' ')
txt = txt.replace('media'.decode('utf-8'), 'média'.decode('utf-8'))
gs = Goslate()
#txt = gs.translate(txt, 'en', gs.detect(txt))
txt = gs.translate(txt, 'en', 'pt-br') # garantindo idioma tupiniquim
txt = txt.replace(' en ', ' br ')
return txt.replace(' ', '_') # .lower()
def subistitua(file, txt, novo_txt):
"""
should rewrite the file with the new text in the future
"""
text = file.read()
text.replace(txt.encode('utf-8'), novo_txt.encode('utf-8'))
file.seek(0) # rewind
file.write(text.encode('utf-8'))
def magica(File):
"""
Thread Pool. Every single thread should play around here with
one element from list os files
"""
global _DONE
if _MAX_PEERS == 1: # inviavel em multithread
logger.info('\n---- File %s' % File)
with open(File, "r+") as file:
list_txt = encontre_text(file)
for txt in list_txt:
novo_txt = traduza_palavra(txt)
if txt != novo_txt:
logger.info('%s -> %s [%s]' % (txt, novo_txt, File))
subistitua(file, txt, novo_txt)
file.close()
print File.ljust(70) + '[OK]'.rjust(5)
if __name__ == '__main__':
try:
response = urllib2.urlopen('http://www.google.com.br', timeout=1)
except urllib2.URLError as err:
print "No network connection "
exit(-1)
root = './app'
ex = ".py"
files = []
os.path.walk(root, fileWalker, [ex, files])
print '%d files found to be translated' % len(files)
try:
if _MAX_PEERS > 1:
_pool = Pool(processes=_MAX_PEERS)
result = _pool.map_async(magica, files)
result.wait()
else:
result = MagicMock()
result.successful.return_value = False
for f in files:
pass
magica(f)
result.successful.return_value = True
except AssertionError, e:
print e
else:
pass
finally:
if result.successful():
print 'Translated all files'
else:
print 'Some files were not translated'
Thank you all for the help!
In Python 2, reading from files produces regular (byte) string objects, not unicode objects. There is no need to call .encode() on these; in fact, that'll only trigger an automatic decode to Unicode first, which can fail.
Rule of thumb: use a unicode sandwich. Whenever you read data, you decode to unicode at that stage. Use unicode values throughout your code. Whenever you write data, encode at that point. You can use io.open() to open file objects that encode and decode automatically for you.
That also means you can use unicode literals everywhere; for your regular expressions, for your string literals. So use:
def encontre_text(file):
text = file.read() # assume `io.open()` was used
return re.findall(ur"\w+(?<=_)\w+", text) # use a unicode pattern
and
def subistitua(file, txt, novo_txt):
text = file.read() # assume `io.open()` was used
text = text.replace(txt, novo_txt)
file.seek(0) # rewind
file.write(text)
as all string values in the program are already unicode, and
txt = txt.replace(u'media', u'média')
as u'..' unicode string literals don't need decoding anymore.

Python file handling: output is not as expected.

I tried the following program on a file but didn't get the accurate result.
The decoded file is not the exact copy of the original message.
Some letters are eaten up somewhere.
"""
This file of python script works by encrypting the message in a below fashion:
Don't replace the characters in the even places.
Replace the characters in the odd places by their place numbers
. and if they exceed 'z', then again they will start from 'a'.
example: for a message "hello" would be "ieolt" and for message "maya" would be "naba"
import os
import time
import sys
def openfile(filename): # opens file with name 'filename'
file_to_open = open(filename,'a+')
return file_to_open
def readfile(filename): # returns a long string with the info of the message in 'filename' file.
time.sleep(0.3)
print "Reading from the file "+filename
reading_file = openfile(filename)
read_msg = reading_file.read()
return read_msg
def decode(msg): # returns decoded message of input message 'msg'.
""" reverse function of encode(msg) """
decoded_message = ""
letters = " abcdefghijklmnopqrstuvwxyz"
time.sleep(0.5)
print " Encoding ...."
print "encoding the message...."
index_of_msg = 0
for char in msg.lower():
if char.isalpha():
if index_of_msg%2 == 0 :
decoded_message += letters[(letters.rfind (char)- (index_of_msg+1))%26] # changed msg.rfind(char) to index_of_msg
else:
decoded_message += char
else:
decoded_message += char
index_of_msg +=1
time.sleep(0.5)
print "decoding completed"
return decoded_message
def encode(msg): # returns encoded message of input message 'msg'.
"""Clean up work must be done here.."""
encoded_message = ""
letters = " abcdefghijklmnopqrstuvwxyz"
time.sleep(0.5)
print " Encoding ...."
print "encoding the message...."
index_of_msg = 0
for char in msg.lower():
if char.isalpha():
if index_of_msg%2 == 0 :
encoded_message += letters[(letters.rfind(char)+ (index_of_msg+1))%26] # changed msg.rfind(char) to index_of_msg
else:
encoded_message += char
else:
encoded_message += char
index_of_msg +=1
time.sleep(0.5)
print "encoding completed"
return encoded_message
def write(msg,filename): # writes the message 'msg' given to it, to the file named 'filename'.
print "Opening the file "+filename
time.sleep(0.4)
file_output = openfile(filename)
print filename + " opened and ready to be written"
time.sleep(0.3)
print "Writing the encoded message to the file "+filename
file_output.write(msg)
file_output.close()
time.sleep(0.4)
print "Writing to the file has completed."
def start(): # Starter main function that incorporates all other functions :)
os.chdir('aaest/')
clear = lambda: os.system('clear')
clear()
print "Hi, Welcome to this Encryption Program. \n"
filename = raw_input("Enter the file name in which you stored the message: ")
print "Opening the file " + filename
time.sleep(0.5)
openfile(filename)
print filename +" opened up and ready, retrieving the message from it."
time.sleep(0.5)
message = readfile(filename)
print "The message of the "+filename+" is retrieved."
time.sleep(0.5)
encoded_msg = encode(message)
time.sleep(0.3)
decoded_msg = decode(encoded_msg)
encoded_file = raw_input("Enter the name of the output file in which encoded message will be saved :")
write(encoded_msg,encoded_file)
decoded_file = raw_input("Enter the name of the output file in which decoded message will be saved :")
write(decoded_msg,decoded_file)
start()
Can anyone please help me with this.
Part of your problem is that your letters strings begin with space rather than 'a'. So if you have a 'y' as the first character of the string, it gets replaced with a space. Then when you try to decode, the space fails your isalpha check and is not replaced.
There are a number of ways this code could be cleaner, but that's the first logical error I see. Unless I'm missing something, letters = "abcdefghijklmnopqrstuvwxyz" should fix that particular error. Or better yet, use string.ascii_lowercase.

Python: 'ascii' codec can't encode character u'\\u2026'

I am trying to use the Bing api in python with the following code:
#!/usr/bin/python
from bingapi import bingapi
import re
import json
import urllib
import cgi
import cgitb
from HTMLParser import HTMLParser
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
def strip_tags2(data):
p = re.compile(r'<[^<]*?>')
q = re.compile(r'[&;!##$%^*()]*')
data = p.sub('', data)
return q.sub('', data)
def getUrl(item):
return item['Url']
def getContent(item):
return item['Description']
def getTitle(item):
return item['Title']
def getInfo(qry, siteStr):
qryStr = qry + "+" + siteStr
#qryStr = u"%s" % qryStr.encode('UTF-8')
query = urllib.urlencode({'q' : qryStr})
url = 'http://api.bing.net/json.aspx?Appid=<myappid>&Version=2.2&Market=en-US&Query=%s&Sources=web&Web.Count=10&JsonType=raw' % (query)
search_results = urllib.urlopen(url)
j = json.loads(search_results.read())
results = j['SearchResponse']['Web']['Results']
return results
def updateRecent(qry):
f = open("recent.txt", "r")
lines = f.readlines()
f.close()
lines = lines[1:]
if len(qry) > 50: #truncate if string too long
qry = (qry[:50] + '...')
qry = strip_tags2(qry) #strip out the html if injection try
lines.append("\n%s" % qry)
f = open("recent.txt", "w")
f.writelines(lines)
f.close()
if __name__ == '__main__':
form = cgi.FieldStorage()
qry = form["qry"].value
qry = r'%s' % qry
updateRecent(qry)
siteStr = "(site:answers.yahoo.com OR site:chacha.com OR site:blurtit.com OR site:answers.com OR site:question.com OR site:answerbag.com OR site:stackexchange.com)"
print "Content-type: text/html"
print
header = open("header.html", "r")
contents = header.readlines()
header.close()
for item in contents:
print item
print """
<div id="results">
<center><h1>Results:</h1></center>
"""
for item in getInfo(siteStr, qry):
print "<h3>%s</h3>" % getTitle(item)
print "<br />"
print "%s" % getUrl(item)
print "<br />"
print "<p style=\"color:gray\">%s</p>" % getContent(item)
print "<br />"
print "</div>"
footer = open("footer.html", "r")
contents = footer.readlines()
footer.close()
for thing in contents:
print thing
I prints a few results, and then gives me the following error:
UnicodeEncodeError: 'ascii' codec can't encode character u'\\u2026' in position 72: ordinal not in range(128)
Can someone explain why this is happening? It clearly has something to do with how the url is getting encoded, but what is exactly is wrong? Thanks in advance!
That particular Unicode character is "HORIZONTAL ELLIPSIS". One or more of your getXXXXX() functions are returning Unicode strings, one of which contains a non-ASCII character. I suggest declaring the encoding of your output, for example:
Content-Type: text/html; charset=utf-8
and explicitly encoding your output in that encoding.
We need to know the line number where the exception was thrown, it will be in the backtrace. Anyway, the problem is that you are reading unicode from the files/URLs and then implicitly converting them to US-ASCII, probably in one of the concatenation operations. You should prefix all constant strings with u to indicate that they are unicode strings, like in
u"\n%s" % qry

Categories

Resources