Python 2.7 split and .col concatenate - python

I am using python 2.7 version.
I am trying to extract array column name using python.
Array column is mentioned below:
`col` array<struct< columnname:string,columnname1:int,columnname2:decimal(10,0),
columnname3:decimal(9,2)>>
What I tried so far:
import re
str=input("enter any string:")
fields=str.split(",")
for x in fields:
name=x.split(":")
seminame=name[0]+','
firstname=seminame.find('`')
lastname=seminame.rfind('`')
fullname=seminame[(firstname+1):lastname]
replacename1=fullname.replace(')', '')
replacename2=fullname.replace('2', '')
replacename3=fullname.replace('9', '')
replacename4=fullname.replace('10', '')
replacename5=fullname.replace('0', '')
finalname='.'+replacename5
print(finalname)
Input:
'`col` array<struct< columnname:string,columnname1:int,columnname2:decimal(10,0),
columnname3:decimal(9,2)>>'
I want the output as
Actual output
.col,
.columnname1,
.columnname2,
.),
Expected output
col.columnname,
col.columnname1,
col.columnname2,
col.columnname3

Why not use re to do the same?
import re
str = "'`col` array<struct< columnname:string,columnname1:int,columnname2:decimal(10,0),columnname3:decimal(9,2)>>'"
word = re.findall("`\w+`",str,) # match for columns
word = " ".join(word)
word = re.sub(r'\W+', '', word) # strip ``
columnnames = re.findall(r"(\w+):",str) # find all words before `:`
for c in columnnames:
c = re.sub(r'\W+', '', c) # to remove `:`
print "%s.%s," %( word,c)
Output :
col.columnname,
col.columnname1,
col.columnname2,
col.columnname3,
To read from file you can use open(filename,mode) method
import re
with open("test.txt","r") as h:
str = h.read()
word = re.findall(r"`\w+`",str,)
word = " ".join(word)
word = re.sub(r'\W+', '', word)
columnnames = re.findall(r"(\w+):",str)
for c in columnnames:
c = re.sub(r'\W+', '', c)
print "%s.%s," %( word,c)
To Write to File:
import re
with open("test.txt","r") as h:
with open("output.dat","a") as w:
str = h.read()
word = re.findall(r"`\w+`",str,)
word = " ".join(word)
word = re.sub(r'\W+', '', word)
columnnames = re.findall(r"(\w+):",str)
for c in columnnames:
c = re.sub(r'\W+', '', c)
data = "%s.%s," %( word,c)
w.write(data+"\n")
w.close()
h.close()

Related

How to get hex value according to skinid

I want to find and get hex value keyword from SkinId input.
Download example file for read by myself : Example file
import re, os
os.system('color 4')
def find_keyword(skinId):
with open("infos/116_JingKe_actorinfo.bytes", "rb") as f:
byte_string = f.read()
key_word = f"116_JingKe/{skinId}".encode('utf-8')
#key_word = b'TypeSystem.String'
matches = re.findall(key_word, byte_string)
start_keyword = b'JTPri'
end_keyword = b'LOD'
# Define the regular expression pattern to match the start and end keywords
#pattern = re.compile(start_keyword + b".*?" + end_keyword + b"\d*", re.DOTALL)
#pattern = re.compile(start_keyword + b".*?(?=LOD|Show\d)", re.DOTALL)
pattern = re.compile(start_keyword + b".*?(?=LOD|Show)" + end_keyword + b"\d*", re.DOTALL)
#pattern = re.compile(start_keyword + b".*?" + end_keyword, re.DOTALL)
# Find all occurrences of the pattern in the file contents
matches = pattern.findall(byte_string)
for match in matches:
if key_word in match:
print(match)
while True:
skinId = input(" >>> SKIN ID :")
find_keyword(skinId)
if skinId == 'x':
break
os.system('pause')
os.system('cls')
Result : https://i.stack.imgur.com/zOGNL.png
Desired result :
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD1'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD2'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringC\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_LOD3'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show1
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show2'
b'JTPri\x19\x00\x00\x00\x08\x00\x00\x00TypeSystem.StringD\x00\x00\x00\x05\x00\x00\x00VPrefab_Characters/Prefab_Hero/116_JingKe/11614_JingKe_Show3'

How can I find the largest number from the given string in Python?

I have two string i.e. 'This is a test as146634546576 string 12312523' and 'This is a test as576 string 12344612523'
Now I want to print the largest numbers i.e. 146634546576 and 12344612523 respectively. I have written the following code but it's printing only 146634546576 and 576. Where it should be 12344612523 instead of 576!
def findLargestNumber(text):
front = -1
li = []
li1 = []
for i in range(len(text)):
if front == -1:
if text[i].isdigit():
front = i
else:
continue
else:
if text[i].isdigit():
continue
else:
li.append(int(text[front:i+1]))
front = -1
return max(li)
#print max(li)
for w in text.split():
li1.append(int(w))
return max(li1)
#print max(li1)
if max(li)>max(li1):
return max(li)
else:
return max(li1)
print findLargestNumber('This is a test as146634546576 string 12312523')
print findLargestNumber('This is a test as576 string 12344612523')
Use max() with re.findall:
import re
a = 'This is a test as576 string 12344612523'
print(max(map(int, re.findall(r'\d+', a))))
# 12344612523
import re
a = 'This is a test as146634546576 string 12312523'
b = 'This is a test as576 string 12344612523'
num_in_a = re.findall(r'[\d]+', a)
num_in_b = re.findall(r'[\d]+', b)
print(max(map(int, num_in_a)))
print(max(map(int, num_in_b)))
Output:
146634546576
12344612523
import re
pa = re.compile(r'(\d+)')
def findLargestNumber(text):
ma = pa.findall(text)
num = [int(x) for x in ma]
print(max(num))
findLargestNumber('This is a test as576 string 12344612523')
findLargestNumber('This is a test as146634546576 string 12312523')

Python : How to translate?

the program is when user input"8#15#23###23#1#19###9#20"
output should be "HOW WAS IT"
However,it could not work to show space(###).
enter code here
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
def from_abstract(s):
result = ''
for word in s.split('*'):
result = result +ABSTRACT_SHIFTED.get(word)
return result
This would do the trick:
#!/usr/bin/env python
InputString = "8#15#23###23#1#19###9#20"
InputString = InputString.replace("###", "##")
InputString = InputString.split("#")
DecodedMessage = ""
for NumericRepresentation in InputString:
if NumericRepresentation == "":
NumericRepresentation = " "
DecodedMessage += NumericRepresentation
continue
else:
DecodedMessage += chr(int(NumericRepresentation) + 64)
print(DecodedMessage)
Prints:
HOW WAS IT
you can also use a regex
import re
replacer ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
reversed = {value:key for key,value in replacer.items()}
# Reversed because regex is greedy and it will match 1 before 15
target = '8#15#23###23#1#19###9#20'
pattern = '|'.join(map(lambda x: x + '+', list(reversed.keys())[::-1]))
repl = lambda x: reversed[x.group(0)]
print(re.sub(pattern, string=target, repl=repl))
And prints:
HOW WAS IT
With a couple minimal changes to your code it works.
1) split on '#', not '*'
2) retrieve ' ' by default if a match isn't found
3) use '##' instead of '###'
def from_abstract(s):
result = ''
for word in s.replace('###','##').split('#'):
result = result +ABSTRACT_SHIFTED.get(word," ")
return result
Swap the key-value pairs of ABSTRACT and use simple split + join on input
ip = "8#15#23###23#1#19###9#20"
ABSTRACT = dict((v,k) for k,v in ABSTRACT.items())
''.join(ABSTRACT.get(i,' ') for i in ip.split('#')).replace(' ', ' ')
#'HOW WAS IT'
The biggest challenge here is that "#" is used as a token separator and as the space character, you have to know the context to tell which you've got at any given time, and that makes it difficult to simply split the string. So write a simple parser. This one will accept anything as the first character in a token and then grab everything until it sees the next "#".
ABSTRACT ={"A":"1","B":"2","C":"3","D":"4","E":"5","F":"6","G":"7","H":"8","I":"9", "J":"10","K":"11","L":"12","M":"13","N":"14","O":"15","P":"16","Q":"17","R":"18","S":"19","T":"20","U":"21","V":"22","W":"23", "X":"24","Y":"25","Z":"26",
" ":"###","":"#" }
ABSTRACT_SHIFTED = {value:key for key,value in ABSTRACT.items()}
user_input = "8#15#23###23#1#19###9#20"
def from_abstract(s):
result = []
while s:
print 'try', s
# tokens are terminated with #
idx = s.find("#")
# ...except at end of line
if idx == -1:
idx = len(s) - 1
token = s[:idx]
s = s[idx+1:]
result.append(ABSTRACT_SHIFTED.get(token, ' '))
return ''.join(result)
print from_abstract(user_input)

Python: split row with multiple delimiters and return in specific format

I have a txt file with the following tuple format
ABC-01 name1,10
DEF-02 name2,11
GHI-03 name3,12
JKH-04 name4,13
I may not be able to use import re. Need to do without re.
I need to split the tuples at the delimiters(ABC-01 and others are one word and I need to keep the hyphen). My output needs to be as follows
Format of the needed result
Out[]:
[(u'name1', u'ABC-01 10'),
(u'name2', u'DEF-02 11'),
(u'name3', u'GHI-03 12 '),
(u'name4', u'JKL-04 13')]
Here's what I have tried till now and the output I get
Solution 1:
def split_func(line):
line_mod = line.split(' ')
line_mod1 = line_mod.split(',')
print line_mod1
Result
Attribute Error : list object has no attribute split
Solution 2:
def split_func(line):
line_mod = line.split(' ')
a,b,c = str(line_mod).split(',')
return (b,a + " " + c)
Result
[(" u'name1", "[u'ABC-01' 10]"),
(" u'name2", "[u'DEF-02' 11]"),
(" u'name3", "[u'GHI-03' 12]"),
(" u'name4", "[u'JKL-04' 13]")]
How can I get the exact format that I am trying to get?
Here is a re example below.
import re
def main():
result = []
with open("test.txt") as f:
for line in f:
result.append(split_func(line.strip()))
print(result)
def split_func(line):
a, b, c = re.split("\s|,+", line)
return b, a + " " + c
if __name__ == '__main__':
main()
OR
Here is one without re
def main():
result = []
with open("test.txt") as f:
for line in f:
result.append(split_func(line.strip()))
print(result)
def split_func(line):
a, b = line.split(' ')
b, c = b.split(',')
return b, a + " " + c
if __name__ == '__main__':
main()
With the output looking like this
[('name1', 'ABC-01 10'), ('name2', 'DEF-02 11'), ('name3', 'GHI-03 12'), ('name4', 'JKH-04 13')]
You can do something like
def split_func(line):
a, b = line.split(' ')
c, d = b.split(',')
return c, ' '.join([a, d])
Your Solution 1 does not work because split() returns a list and you can't use split() on a list.
For Solution2
x = ['ab', 'cd']
str(x) gives "['ab', 'cd']"
What you need is join() function.

Regular expression for this function

I want to simplify this function, using regular expressions.
the sample input can be
text =' At&T, " < I am > , At&T so < < & & '
My code:
def replaceentity(text):
import re
import uuid
from cgi import escape
invalid_chars_map = {'&':'&', '<':'<', '>': '>', '"': """}
replace_values = {'<':'<', '>':'>'}
replaced_dict = {}
for key, value in replace_values.items():
text = text.replace(key, value)
print "after replace >>>>>> " + text
for word in text.split():
if word in invalid_chars_map.values():
print word
uid = str(uuid.uuid4())
text = text.replace(word, uid)
replaced_dict[uid] = word
text = escape(text)
for i in replaced_dict.keys():
text = text.replace(i, replaced_dict[i])
print text
Is this what you want?
>>> from cgi import escape
>>> escaped = escape("""'At&T, " < I am > , At&T so < &lt """)
>>> escaped
'\'At&T, " < I am > , At&T so &#60; &lt '

Categories

Resources