Own implement encoding and decoding base64 files in Python - python

I have a problem with my own implementation of base64 encoding. I have achieved to get the code below. It only works for text files with the English Letters, I suppose. For instance pdf file is encoded and decoded, it differs single characters.
def base64Encode(data):
alphabet = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P","Q","R","S","T","U","V","W","X","Y","Z","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","0","1","2","3","4","5","6","7","8","9","+","/"]
bit_str = ""
base64_str = ""
for char in data:
bin_char = bin(char).lstrip("0b")
bin_char = bin_char.zfill(8)
bit_str += bin_char
brackets = [bit_str[x:x+6] for x in range(0,len(bit_str),6)]
for bracket in brackets:
if(len(bracket) < 6):
bracket = bracket + (6-len(bracket))*"0"
base64_str += alphabet[int(bracket,2)]
# print(brackets[-4:])
#if(bracket[-1:)
#print(len(base64_str))
#if(len(base64_str) != 76):
# base64_str += "="
return base64_str
def base64Decode(text):
alphabet = ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","0","1","2","3","4","5","6","7","8","9","+","/"]
bit_str = ""
text_str = ""
for char in text:
if char in alphabet:
bin_char = bin(alphabet.index(char)).lstrip("0b")
bin_char = bin_char.zfill(6)
bit_str += bin_char
brackets = [bit_str[x:x+8] for x in range(0,len(bit_str),8)]
for bracket in brackets:
text_str += chr(int(bracket,2))
return text_str.encode("UTF-8")
w = open("encode.txt", "w")
with open("bla.txt", "rb") as f:
byte = f.read(57)
while byte:
w.write(base64Encode(byte))
w.write("\n")
byte = f.read(57)
w.close()
f.close()
w = open("decode.txt", "wb")
with open("encode.txt", "r") as f:
byte = f.read(77)
while byte:
w.write(base64Decode(byte))
byte = f.read(77)
w.close()
f.close()
In my opinion, this line "return text_str.encode (" UTF-8 ")" should be without decoding to UTF-8. However, if you leave only "return text_str", gets error: TypeError: 'str' does not support the buffer interface.
bla.txt:
Phil Mercer reports on Cyclone Pam which has ravaged the Pacific nation of Vanuatu. Video courtesy of YouTube/Isso Nihmei at 350.org
Save the Children's Vanuatu country director Tom Skirrow said on Saturday: "The scene here this morning is complete devastation - houses are destroyed, trees are down, roads are blocked and people are wandering the streets looking for help.
ĄŚĆŹŻÓ
encode.txt
UGhpbCBNZXJjZXIgcmVwb3J0cyBvbiBDeWNsb25lIFBhbSB3aGljaCBoYXMgcmF2YWdlZCB0aGUg
UGFjaWZpYyBuYXRpb24gb2YgVmFudWF0dS4gVmlkZW8gY291cnRlc3kgb2YgWW91VHViZS9Jc3Nv
IE5paG1laSBhdCAzNTAub3JnDQoNClNhdmUgdGhlIENoaWxkcmVuJ3MgVmFudWF0dSBjb3VudHJ5
IGRpcmVjdG9yIFRvbSBTa2lycm93IHNhaWQgb24gU2F0dXJkYXk6ICJUaGUgc2NlbmUgaGVyZSB0
aGlzIG1vcm5pbmcgaXMgY29tcGxldGUgZGV2YXN0YXRpb24gLSBob3VzZXMgYXJlIGRlc3Ryb3ll
ZCwgdHJlZXMgYXJlIGRvd24sIHJvYWRzIGFyZSBibG9ja2VkIGFuZCBwZW9wbGUgYXJlIHdhbmRl
cmluZyB0aGUgc3RyZWV0cyBsb29raW5nIGZvciBoZWxwLg0KDQrEhMWaxIbFucW7w5M
decode.txt
Phil Mercer reports on Cyclone Pam which has ravaged the Pacific nation of Vanuatu. Video courtesy of YouTube/Isso Nihmei at 350.org
Save the Children's Vanuatu country director Tom Skirrow said on Saturday: "The scene here this morning is complete devastation - houses are destroyed, trees are down, roads are blocked and people are wandering the streets looking for help.
ÄÅÄŹŻÃ
The same text encoded by page: http://www.motobit.com/util/base64-decoder-encoder.asp
UGhpbCBNZXJjZXIgcmVwb3J0cyBvbiBDeWNsb25lIFBhbSB3aGljaCBoYXMgcmF2YWdlZCB0aGUg
UGFjaWZpYyBuYXRpb24gb2YgVmFudWF0dS4gVmlkZW8gY291cnRlc3kgb2YgWW91VHViZS9Jc3Nv
IE5paG1laSBhdCAzNTAub3JnDQoNClNhdmUgdGhlIENoaWxkcmVuJ3MgVmFudWF0dSBjb3VudHJ5
IGRpcmVjdG9yIFRvbSBTa2lycm93IHNhaWQgb24gU2F0dXJkYXk6ICJUaGUgc2NlbmUgaGVyZSB0
aGlzIG1vcm5pbmcgaXMgY29tcGxldGUgZGV2YXN0YXRpb24gLSBob3VzZXMgYXJlIGRlc3Ryb3ll
ZCwgdHJlZXMgYXJlIGRvd24sIHJvYWRzIGFyZSBibG9ja2VkIGFuZCBwZW9wbGUgYXJlIHdhbmRl
cmluZyB0aGUgc3RyZWV0cyBsb29raW5nIGZvciBoZWxwLg0KDQrEhMWaxIbFucW7w5M=
It is the same, except "=", which omitted to implement due to the error at the very beginning of the file.
And sample originale file in pdf:
%PDF-1.5
%µµµµ
1 0 obj
<</Type/Catalog/Pages 2 0 R/Lang(pl-PL) /StructTreeRoot 8 0 R/MarkInfo<</Marked true>>>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[ 3 0 R] >>
endobj
3 0 obj
<</Type/Page/Parent 2 0 R/Resources<</Font<</F1 5 0 R>>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI] >>/MediaBox[ 0 0 595.32 841.92] /Contents 4 0 R/Group<</Type/Group/S/Transparency/CS/DeviceRGB>>/Tabs/S/StructParents 0>>
endobj
4 0 obj
<</Filter/FlateDecode/Length 110>>
stream
xœUÌ­
€#ྰï0QËÝ®Èiž?(†kb°hòý«ZD˜4ßÀΨ*;…¡xº ¨#“íªFrÄI!w…˜2ËQ81®D<™ÇS=Ó’léŠ82µ·>^åŒÊO- >[´SÀ
endstream
endobj
5 0 obj
<</Type/Font/Subtype/TrueType/Name/F1/BaseFont/ABCDEE+Calibri/Encoding/WinAnsiEncoding/FontDescriptor 6 0 R/FirstChar 32/LastChar 97/Widths 15 0 R>>
endobj
6 0 obj
<</Type/FontDescriptor/FontName/ABCDEE+Calibri/Flags 32/ItalicAngle 0/Ascent 750/Descent -250/CapHeight 750/AvgWidth 521/MaxWidth 1743/FontWeight 400/XHeight 250/StemV 52/FontBBox[ -503 -250 1240 750] /FontFile2 16 0 R>>
endobj
7 0 obj
And after executing script:
%PDF-1.5
%µµµµ
1 0 obj
<</Type/Catalog/Pages 2 0 R/Lang(pl-PL) /StructTreeRoot 8 0 R/MarkInfo<</Marked true>>>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[ 3 0 R] >>
endobj
3 0 obj
<</Type/Page/Parent 2 0 R/Resources<</Font<</F1 5 0 R>>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI] >>/MediaBox[ 0 0 595.32 841.92] /Contents 4 0 R/Group<</Type/Group/S/Transparency/CS/DeviceRGB>>/Tabs/S/StructParents 0>>
endobj
4 0 obj
<</Filter/FlateDecode/Length 110>>
stream
xUÌ­
#ྰï0QËÝ®Èi?(kb°hòý«ZD4ßÀΨ*;¡xº ¨#íªFrÄI!w2ËQ81®D<ÇS=Ólé82µ·>^åÊO- >[´SÀ
endstream
endobj
5 0 obj
<</Type/Font/Subtype/TrueType/Name/F1/BaseFont/ABCDEE+Calibri/Encoding/WinAnsiEncoding/FontDescriptor 6 0 R/FirstChar 32/LastChar 97/Widths 15 0 R>>
endobj
6 0 obj
<</Type/FontDescriptor/FontName/ABCDEE+Calibri/Flags 32/ItalicAngle 0/Ascent 750/Descent -250/CapHeight 750/AvgWidth 521/MaxWidth 1743/FontWeight 400/XHeight 250/StemV 52/FontBBox[ -503 -250 1240 750] /FontFile2 16 0 R>>
endobj
7 0 obj
The differences are for instance at the beginning of line 15 and 16.
My goal is to load the file and encode it in base64 and then decode and obtain the same file. Fit for use.
I suppose that the error is in the data read or write or encoding. Any suggestions?

My first suggestion is troubleshooting: Determine if you are failing to encode or decode properly or both. Encode the file using a working utility and with your app and compare. Decode a properly encoded file with your app and with a working utility and compare.
Second suggestion: Deal with the data as individual bytes, not text that may be interpreted as UTF-8.
Open the PDF file in binary mode. See Reading binary file in Python and looping over each byte on how to do that. Pass the raw bytes to your base64Encode. Do not use the bin function to convert from string to binary.

I was able to accomplish this task. Replace line .encode("UTF-8") on .encode ("latin-1") and It works at least for pdf files.

I've modified the original code. This works on text, PNG and PDF, I haven't tried other file types, but I expect it will work on them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 16 07:38:19 2019
#author: tracyanne
"""
import os
class Base64():
def __init__(self):
## We only need to do this once
self.b64 = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P","Q","R","S","T","U","V","W","X","Y","Z","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","0","1","2","3","4","5","6","7","8","9","+","/"]
def Encode(self, data):
alphabet = self.b64
bit_str = ""
base64_str = ""
for char in data:
bin_char = bin(char).lstrip("0b")
bin_char = bin_char.zfill(8)
bit_str += bin_char
brackets = [bit_str[x:x+6] for x in range(0,len(bit_str),6)]
for bracket in brackets:
if(len(bracket) < 6):
bracket = bracket + (6-len(bracket))*"0"
base64_str += alphabet[int(bracket,2)]
##Add padding characters to maintain compatibility with forced padding
padding_indicator = len(base64_str) % 4
if padding_indicator == 3:
base64_str += "="
elif padding_indicator == 2:
base64_str += "=="
return base64_str
def Decode(self, text, eof):
alphabet = self.b64
bit_str = ""
text_str = ""
for char in text:
if char in alphabet:
bin_char = bin(alphabet.index(char)).lstrip("0b")
bin_char = bin_char.zfill(6)
bit_str += bin_char
brackets = [bit_str[x:x+8] for x in range(0,len(bit_str),8)]
for bracket in brackets:
## When eof ignore last value in brackets to remove \x00
if eof and brackets[len(brackets) -1] == bracket:
pass
else:
text_str += chr(int(bracket,2))
## encode string as Latin-1 == ISO-8859-1
return text_str.encode("ISO-8859-1")
def base64Encode(self, inFile, outFile):
w = open(outFile, "w")
with open(inFile, "rb") as f:
byte = f.read(57)
while byte:
w.write(self.Encode(byte))
w.write("\n")
byte = f.read(57)
w.close()
f.close()
def base64Decode(self, inFile, outFile):
## Get size of input file for later comparison
fsize = os.path.getsize(inFile)
incsize = 0
eof = False
w = open(outFile, "wb")
with open(inFile, "r") as f:
byte = f.read(77)
while byte:
## keep current dataread and if current data read ==
## input file size set eof True
incsize += len(byte)
if fsize - incsize == 0:
eof = True
## Pass in eof to Decode
w.write(base64.base64Decode(byte, eof))
byte = f.read(77)
w.close()
f.close()

Related

DCA toy cipher for encryption

This is a fuction of encrypion of DCA toy cipher1. I tried so any time but don't know what wrong with my code. Please help me solving it.
SBox = [b'0110', b'0100', b'1100', b'0101', b'0000', b'0111', b'0010', b'1110', b'0001', b'1111', b'0011', b'1101', b'1000', b'1010', b'1001', b'1011']
def dca_tc1_encrypt(plaintext,key):
#Split the 2 bit key into two, 16 bit round keys key_0 = key[:16] key_1 = key[16:]
#XOR the plaintext against k0 xor_key_0 = int(plaintext,2) ^ int(key_0,2)
#get the binary representation and split it into 4 blocks of 4 bits bin_value = f'{bin(xor_key_0)[2:]}'.zfill(16) blocks = [bin_value[:4], bin_value[4:8],bin_value[8:12],bin_value[12:16] ]
#print(blocks) #look at the blocks
#iterate over the blocks and get the value from the SBox look up
sbox = b'' for block in blocks:
sbox = sbox + (SBox[int(block,2)])
#print(sbox) #check the new SBox values
#XOR against k1 ciphertext = int(sbox,2) ^ int(key_1,2) #return the ciphertext as a binary string return f'{bin(ciphertext)[2:]}'.zfill(16)
#Example use plaintext = b'1000101000001011' key = b'01111001101110110101001001110010'
print(f'Starting Plaintext: {plaintext}') ciphertext = dca_tc1_encrypt(plaintext,key) print(f'Ciphertext: {ciphertext}')
The program output should be
Program Output
Starting Plaintext: b'1001001001000101'
Ciphertext: 0001001000010100

Print specific lines from a text file in python

I have a text file which I'm trying to print the lines that don't start with a number but the output is not readable.
This is a part of what my code returns:
{\rtf1\ansi\ansicpg1252\cocoartf1671\cocoasubrtf600
{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
{\colortbl;\red255\green255\blue255;\red4\green4\blue4;\red247\green247\blue247;\red0\green0\blue0;
\red255\green255\blue255;\red77\green77\blue77;}
{\*\expandedcolortbl;;\cssrgb\c1176\c1176\c1176;\cssrgb\c97647\c97647\c97647;\cssrgb\c0\c0\c0;
\cssrgb\c100000\c100000\c100000;\cssrgb\c37647\c37647\c37647;}
\paperw11900\paperh16840\margl1440\margr1440\vieww10800\viewh8400\viewkind0
\deftab720
\pard\pardeftab720\partightenfactor0
\f0\fs26 \cf2 \cb3 Since the start of digital video in 1988, new video formats are developed every year\cf4 \cb5 \
\pard\pardeftab720\partightenfactor0
\cf6 \cb3 00:14\cb5 \
And this is my code:
numbers = ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")
aFile = open("/Users/maira/Desktop/text.rtf")
lines = aFile.readlines()
for line in lines:
if not line.startswith((numbers)):
print(line)
aFile.close()
This is an example of the original text:
Since the start of digital video in 1988, new video formats are developed every year
00:14
in an attempt to provide improvements in quality, file size and video playback.
00:18
The popularity of video continues to grow rapidly, with 78% of people watching at least
00:24
one digital video on one of their devices every single day; However video formats and
00:29
how they work is still a subject of much confusion for most people.
I've seen some questions similar to mine but I can't get to a solution.
I appreciate any advices and if there's also a way of deleting the blank lines in between lines, I'd be very thankful.
Thank you.
I used a very complete function provided on this answer to strip all the rtf text, and after that i use a regex to remove the format numbers HH:MM. Maybe this will help you.
def striprtf(text):
pattern = re.compile(r"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", re.I)
# control words which specify a "destionation".
destinations = frozenset((
'aftncn','aftnsep','aftnsepc','annotation','atnauthor','atndate','atnicn','atnid',
'atnparent','atnref','atntime','atrfend','atrfstart','author','background',
'bkmkend','bkmkstart','blipuid','buptim','category','colorschememapping',
'colortbl','comment','company','creatim','datafield','datastore','defchp','defpap',
'do','doccomm','docvar','dptxbxtext','ebcend','ebcstart','factoidname','falt',
'fchars','ffdeftext','ffentrymcr','ffexitmcr','ffformat','ffhelptext','ffl',
'ffname','ffstattext','field','file','filetbl','fldinst','fldrslt','fldtype',
'fname','fontemb','fontfile','fonttbl','footer','footerf','footerl','footerr',
'footnote','formfield','ftncn','ftnsep','ftnsepc','g','generator','gridtbl',
'header','headerf','headerl','headerr','hl','hlfr','hlinkbase','hlloc','hlsrc',
'hsv','htmltag','info','keycode','keywords','latentstyles','lchars','levelnumbers',
'leveltext','lfolevel','linkval','list','listlevel','listname','listoverride',
'listoverridetable','listpicture','liststylename','listtable','listtext',
'lsdlockedexcept','macc','maccPr','mailmerge','maln','malnScr','manager','margPr',
'mbar','mbarPr','mbaseJc','mbegChr','mborderBox','mborderBoxPr','mbox','mboxPr',
'mchr','mcount','mctrlPr','md','mdeg','mdegHide','mden','mdiff','mdPr','me',
'mendChr','meqArr','meqArrPr','mf','mfName','mfPr','mfunc','mfuncPr','mgroupChr',
'mgroupChrPr','mgrow','mhideBot','mhideLeft','mhideRight','mhideTop','mhtmltag',
'mlim','mlimloc','mlimlow','mlimlowPr','mlimupp','mlimuppPr','mm','mmaddfieldname',
'mmath','mmathPict','mmathPr','mmaxdist','mmc','mmcJc','mmconnectstr',
'mmconnectstrdata','mmcPr','mmcs','mmdatasource','mmheadersource','mmmailsubject',
'mmodso','mmodsofilter','mmodsofldmpdata','mmodsomappedname','mmodsoname',
'mmodsorecipdata','mmodsosort','mmodsosrc','mmodsotable','mmodsoudl',
'mmodsoudldata','mmodsouniquetag','mmPr','mmquery','mmr','mnary','mnaryPr',
'mnoBreak','mnum','mobjDist','moMath','moMathPara','moMathParaPr','mopEmu',
'mphant','mphantPr','mplcHide','mpos','mr','mrad','mradPr','mrPr','msepChr',
'mshow','mshp','msPre','msPrePr','msSub','msSubPr','msSubSup','msSubSupPr','msSup',
'msSupPr','mstrikeBLTR','mstrikeH','mstrikeTLBR','mstrikeV','msub','msubHide',
'msup','msupHide','mtransp','mtype','mvertJc','mvfmf','mvfml','mvtof','mvtol',
'mzeroAsc','mzeroDesc','mzeroWid','nesttableprops','nextfile','nonesttables',
'objalias','objclass','objdata','object','objname','objsect','objtime','oldcprops',
'oldpprops','oldsprops','oldtprops','oleclsid','operator','panose','password',
'passwordhash','pgp','pgptbl','picprop','pict','pn','pnseclvl','pntext','pntxta',
'pntxtb','printim','private','propname','protend','protstart','protusertbl','pxe',
'result','revtbl','revtim','rsidtbl','rxe','shp','shpgrp','shpinst',
'shppict','shprslt','shptxt','sn','sp','staticval','stylesheet','subject','sv',
'svb','tc','template','themedata','title','txe','ud','upr','userprops',
'wgrffmtfilter','windowcaption','writereservation','writereservhash','xe','xform',
'xmlattrname','xmlattrvalue','xmlclose','xmlname','xmlnstbl',
'xmlopen',
))
# Translation of some special characters.
specialchars = {
'par': '\n',
'sect': '\n\n',
'page': '\n\n',
'line': '\n',
'tab': '\t',
'emdash': u'\u2014',
'endash': u'\u2013',
'emspace': u'\u2003',
'enspace': u'\u2002',
'qmspace': u'\u2005',
'bullet': u'\u2022',
'lquote': u'\u2018',
'rquote': u'\u2019',
'ldblquote': u'\201C',
'rdblquote': u'\u201D',
}
stack = []
ignorable = False # Whether this group (and all inside it) are "ignorable".
ucskip = 1 # Number of ASCII characters to skip after a unicode character.
curskip = 0 # Number of ASCII characters left to skip
out = [] # Output buffer.
for match in pattern.finditer(text):
word,arg,hex,char,brace,tchar = match.groups()
if brace:
curskip = 0
if brace == '{':
# Push state
stack.append((ucskip,ignorable))
elif brace == '}':
# Pop state
ucskip,ignorable = stack.pop()
elif char: # \x (not a letter)
curskip = 0
if char == '~':
if not ignorable:
out.append(u'\xA0')
elif char in '{}\\':
if not ignorable:
out.append(char)
elif char == '*':
ignorable = True
elif word: # \foo
curskip = 0
if word in destinations:
ignorable = True
elif ignorable:
pass
elif word in specialchars:
out.append(specialchars[word])
elif word == 'uc':
ucskip = int(arg)
elif word == 'u':
c = int(arg)
if c < 0: c += 0x10000
if c > 127: out.append(unichr(c))
else: out.append(chr(c))
curskip = ucskip
elif hex: # \'xx
if curskip > 0:
curskip -= 1
elif not ignorable:
c = int(hex,16)
if c > 127: out.append(unichr(c))
else: out.append(chr(c))
elif tchar:
if curskip > 0:
curskip -= 1
elif not ignorable:
out.append(tchar)
return ''.join(out)
with open('/Users/maira/Desktop/text.rtf', 'r') as file:
rtf = file.read()
text = striprtf(rtf)
text = re.sub('(0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]', '', text)
print(text)
file.close()

Python Iterating New Data into nested dictionary

I have been working on a Python Role-Playing game and I have a function to import item data from a text file. The text file is structured as follows:
WEAPON 3 sword_of_eventual_obsolescence 6 10 2 0 10
WEAPON 4 dagger_of_bluntness 2 5 3 1 0
WEAPON 5 sword_of_extreme_flimsiness 3 8 3 7 0
The data importing goes like this:
def items_get():
import os
global items
items = {
"weapon":{},
"armour":{},
"potion":{},
"misc":{}
}
file_dir = ( os.getcwd() + '\Code\items.txt' )
file_in = open( file_dir, 'r')
for each_line in file_in:
line = file_in.readline()
line = line.split(' ')
if line[0] == "WEAPON":
weapon_id = line[1]
name = line[2]
attack_min = line[3]
attack_max = line[4]
range = line[5]
weight = line[6]
value = line[7]
weapon_data = {
"name": name.replace('_', ' '),
"atk_min": attack_min,
"atk_max": attack_max,
"rng": range,
"wt": weight,
"val": value,
}
items["weapon"][weapon_id] = {}
items["weapon"][weapon_id].update(weapon_data)
However, when I print items["weapon"], I get this:
{'4': {'wt': '1', 'atk_min': '2', 'atk_max': '5', 'val': '0', 'name': 'dagger of bluntness', 'rng': '3'}}
As you can see, there is only 1 item there. On other occasions I have had two even though I actually have 3 items listed. Why is this happening, and how do I get all 3 items in the dictionary?
Thanks!
:P
EDIT: Here is the data for the potions, in case you were wondering.
elif line.split()[0] == "POTION":
_, id, name, hp_bonus, atk_bonus, range_bonus, ac_bonus, str_bonus, con_bonus, dex_bonus, int_bonus, wis_bonus, cha_bonus, wt, val = line.split()
A healing potion looks like this in the file:
POTION 1 potion_of_healing 20 0 0 0 0 0 0 0 0 0 0.1 2
for each_line in file_in:
line = file_in.readline()
each_line already contains the next line, because iterating through a file-like object (say, with a for loop) causes it to go by lines.
On each iteration of the loop, the file pointer is advanced by one line (file-like objects, though rewindable, keep track of their last-accessed position), and then before anything is done it gets advanced once more by the readline(), so the only line that doesn't get skipped entirely is the middle one (4).
To fix this, use the loop variable (each_line) within the loop body directly and nix the file_in.readline().
#noname1014, I know you know this but I want to point out few of the problems with your code (that may occur in some special cases, e.g if you change your file name items.txt to new_items.txt, rare_fruits.txt etc.) and some suggestions.
Do not use \ as path separators in Windows. Use \\ otherwise you may get into problems. \Code\time_items.txt will be evaluated as \Code imeitems.txt because \t is TAB here.
Using \ only works in few cases if \ followed by any character A, p, n, t, ", ' etc. does not construct escape sequences like \n, \t, \f, \r, \b etc.
Have a look at the below example for clarification.
>>> import os
>>>
>>> print(os.getcwd() + '\Code\timeitems.txt')
E:\Users\Rishikesh\Python3\Practice\Code imeitems.txt
>>>
>>> print(os.getcwd() + '\Code\\timeitems.txt')
E:\Users\Rishikesh\Python3\Practice\Code\timeitems.txt
>>>
>>> print(os.getcwd() + '\Code\newitems.txt')
E:\Users\Rishikesh\Python3\Practice\Code
ewitems.txt
>>>
>>> print(os.getcwd() + '\\Code\\newitems.txt')
E:\Users\Rishikesh\Python3\Practice\Code\newitems.txt
>>>
>>> # Do not use it as it may work only in some cases if \ followed by any character does not construct escape sequences.
...
>>> os.getcwd() + '\Code\items.txt'
'E:\\Users\\Rishikesh\\Python3\\Practice\\Code\\items.txt'
>>>
>>> # Use \\ as path separators
...
>>> os.getcwd() + '\\Code\\items.txt'
'E:\\Users\\Rishikesh\\Python3\\Practice\\Code\\items.txt'
>>>
>>> print(os.getcwd() + '\Code\items.txt')
E:\Users\Rishikesh\Python3\Practice\Code\items.txt
>>>
>>> print(os.getcwd() + '\\Code\\items.txt')
E:\Users\Rishikesh\Python3\Practice\Code\items.txt
>>>
If your dictionay is huge and you are facing any issue while looking into its items, pretty it using json module, it has a function called dumps() which is used to pretty print list and dictionary objects.
It is ok to place import statements inside function but placing it on the top is a Pythonic way (https://www.python.org/dev/peps/pep-0008/#imports). It is good for large applications with multiple functions in the same module.
Use with statement for opening files, in this case you do not need to close files.
Your code is fine, I have just modified it as below.
import os
global items
import json
def items_get():
items = {
"weapon":{},
"armour":{},
"potion":{},
"misc":{}
}
# Do not use \ as path separators in Windows. Use \\ (\t, \n, \' have speacial meanings)
file_dir = ( os.getcwd() + '\\Code\\items.txt' )
with open( file_dir, 'r') as file_in:
lines = file_in.readlines();
# ['WEAPON 3 sword_of_eventual_obsolescence 6 10 2 0 10\n', 'WEAPON 4 dagger_of_bluntness 2 5 3 1 0\n', 'WEAPON 5 sword_of_extreme_flimsiness 3 8 3 7 0']
for each_line in lines:
# Use strip() to remove any leading/trailing whitespaces (\n, \t, spaces etc.)
line = each_line.strip().split(' ');
if line[0] == "WEAPON":
weapon_id = line[1]
name = line[2]
attack_min = line[3]
attack_max = line[4]
range = line[5]
weight = line[6]
value = line[7]
weapon_data = {
"name": name.replace('_', ' '),
"atk_min": attack_min,
"atk_max": attack_max,
"rng": range,
"wt": weight,
"val": value,
}
items["weapon"][weapon_id] = {}
items["weapon"][weapon_id].update(weapon_data)
return items
# Calling items_get() to get dictionary
items = items_get();
# Pretty printing dictionary using json.dumps()
print(json.dumps(items, indent=4))
» Output
{
"weapon": {
"3": {
"name": "sword of eventual obsolescence",
"atk_min": "6",
"atk_max": "10",
"rng": "2",
"wt": "0",
"val": "10"
},
"4": {
"name": "dagger of bluntness",
"atk_min": "2",
"atk_max": "5",
"rng": "3",
"wt": "1",
"val": "0"
},
"5": {
"name": "sword of extreme flimsiness",
"atk_min": "3",
"atk_max": "8",
"rng": "3",
"wt": "7",
"val": "0"
}
},
"armour": {},
"potion": {},
"misc": {}
}

Biopython translate() error

I have a file that looks as so:
Type Variant_class ACC_NUM dbsnp genomic_coordinates_hg18 genomic_coordinates_hg19 HGVS_cdna HGVS_protein gene disease sequence_context_hg18 sequence_context_hg19 codon_change codon_number intron_number site location location_reference_point author journal vol page year pmid entrezid sift_score sift_prediction mutpred_score
1 DM CM920001 rs1800433 null chr12:9232351:- NM_000014.4 NP_000005.2:p.C972Y A2M Chronicobstructivepulmonarydisease null CACAAAATCTTCTCCAGATGCCCTATGGCT[G/A]TGGAGAGCAGAATATGGTCCTCTTTGCTCC TGT TAT 972 null null 2 null Poller HUMGENET 88 313 1992 1370808 2 0 DAMAGING 0.594315245478036
1 DM CM004784 rs74315453 null chr22:43089410:- NM_017436.4 NP_059132.1:p.M183K A4GALT Pksynthasedeficiency(pphenotype) null TGCTCTCCGACGCCTCCAGGATCGCACTCA[T/A]GTGGAAGTTCGGCGGCATCTACCTGGACAC ATG AAG 183 null null 2 null Steffensen JBC 275 16723 2000 10747952 53947 0 DAMAGING 0.787878787878788
I want to translate the information from column 13 and 14 to their corresponding amino acids. Here is the script that I've generated:
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
InFile = open("disease_mut_splitfinal.txt", 'rU')
InFile.readline()
OriginalSeq_list = []
MutSeq_list = []
import csv
with open("disease_mut_splitfinal.txt") as f:
reader = csv.DictReader(f, delimiter= "\t")
for row in reader:
OriginalSeq = row['codon_change']
MutSeq = row['codon_number']
region = row["genomic_coordinates_hg19"]
gene = row["gene"]
OriginalSeq_list.append(OriginalSeq)
MutSeq_list.append(MutSeq)
OutputFileName = "Translated.txt"
OutputFile = open(OutputFileName, 'w')
OutputFile.write(''+region+'\t'+gene+'\n')
for i in range(0, len(OriginalSeq_list)):
OrigSeq = OriginalSeq_list[i]
MutSEQ = MutSeq_list[i]
print OrigSeq
translated_original = OrigSeq.translate()
translated_mut= MutSEQ.translate()
OutputFile.write("\n" + OriginalSeq_list[i]+ "\t" + str(translated_original) + "\t" +MutSeq_list[i] + "\t" + str(translated_mut)+ "\n")
However, I keep getting this error:
TypeError: translate expected at least 1 arguments, got 0
I'm kind of at a loss for what I'm doing wrong. Any suggestions?
https://www.dropbox.com/s/cd8chtacj3glb8d/disease_mut_splitfinal.txt?dl=0
(File should still be downloadable even if you don't have a dropbox)
You are using the string method "translate" instead of the biopython seq object method translate, which is what I assume you want to do. You need to convert the string into a seq object and then translate that. Try
from Bio import Seq
OrigSeq = Seq.Seq(OriginalSeq_list[i])
translated_original = OrigSeq.translate()
Alternatively
from Bio.Seq import Seq
OrigSeq = Seq(OriginalSeq_list[i])
translated_original = OrigSeq.translate()

how to make return values of raw_input() hexidecimal in python?

python returns the values from raw_iput as strings. i want those string converted as hexidecimal characters. So:
example = '\x05\x06\x40\x00\x02\x05'
tx = raw_input("\nTX: ") #user enters 05 06 40 00 02 05
what can i do that tx == example?
my code so far:
import base64
import serial
import crcmod
import binascii
s_port = 'COM1'
b_rate = 2400
#method for reading incoming bytes on serial
def read_serial(ser):
buf = ''
while True:
inp = ser.read(size=1) #read a byte
buf = buf + inp #accumalate the response
if '\xff' == inp: #if the incoming byte is 0xff
print buf.encode("hex") # never here
break
return buf.encode("hex")
#method to calc the checksum
def calc_crc(hstr):
crc16 = crcmod.predefined.mkCrcFun('crc-16')
hstr = hstr.replace(' ','')
data = base64.b16decode(hstr)
chsum = hex(crc16(data))
return chsum
#create a serial opening
ser = serial.Serial(
port=s_port,
baudrate=b_rate,
timeout=0.1
)
while True:
example = '\x05\x06\x40\x00\x02\x05\xF6\x5C' #last 2 bytes are CRC
tx = raw_input("\nTX: ") #user enters 05 06 40 00 02 05
crc = calc_crc(tx) #checksum is calculated as 0xf65c, correct
tx = binascii.hexlify(tx.replace(' ', '')) #convert ascii string into hex as is but how???????????
print tx #gives me 303530363430303030323035
cmd = tx + crc # concatenate tx and crc so the command is complete
ser.write(cmd)
rx = read_serial(ser)
print "RX: " + str(rx)
With the following one liner I get True for ==example:
''.join([chr(int(x,16)) for x in tx.split()])
The long form is:
Split the input by space and and make a list by iterating over the splitted input and convert every number in the input to a int with respect to base 16 and the resulting int to the respective character with chr. Finally join the list of characters together to a string.
Although OP uses Python 2.x, in Python 3 there is a built-in method bytes.fromhex to do this:
example = b'\x05\x06\x40\x00\x02\x05'
tx = input("\nTX: ")
result = bytes.fromhex(tx)
assert result == example

Categories

Resources