How to read a specific part of a text file (Py 3x) - python

Other questions don't seem to be getting answered or are not getting answered for Python. I'm trying to get it to find the keyword "name", set the position to there, then set a variable to that specific line, and then have it use only that piece of text as a variable. In shorter terms, I'm trying to locate a variable in the .txt file based on "name" or "HP" which will always be there.
I hope that makes sense...
I've tried to use different variables like currentplace instead of namePlace but neither works.
import os
def savetest():
save = open("nametest_text.txt", "r")
print("Do you have a save?")
conf = input(": ")
if conf == "y" or conf == "Y" or conf == "Yes" or conf == "yes":
text = save.read()
namePlace = text.find("name")
currentText = namePlace + 7
save.seek(namePlace)
nameLine = save.readline()
username = nameLine[currentText:len(nameLine)]
print(username)
hpPlace = text.find("HP")
currentText = hpPlace + 5
save.seek(hpPlace)
hpLine = save.readline()
playerHP = hpLine[currentText:len(hpLine)]
print(playerHP)
os.system("pause")
save.close()
savetest()
My text file is simply:
name = Wubzy
HP = 100
I want it to print out whatever is put after the equals sign at name and the same for HP, but not name and HP itself.
So it should just print
Wubzy
100
Press any key to continue . . .
But it instead prints
Wubzy
Press any key to continue . . .

This looks like a good job for a regex. Regexes can match and capture patterns in text, which seems to be exactly what you are trying to do.
For example, the regex ^name\s*=\s*(\w+)$ will match lines that have the exact text "name", followed by 0 or more whitespace characters, an '=', and then another 0 or more whitespace characters then a one or more letters. It will capture the word group at the end.
The regex ^HP\s*=\s*(\d+)$ will match lines that have the exact text "HP", followed by 0 or more whitespace characters, an '=', and then another 0 or more whitespace characters then one or more digits. It will capture the number group at the end.
# This is the regex library
import re
# This might be easier to use if you're getting more information in the future.
reg_dict = {
"name": re.compile(r"^name\s*=\s*(\w+)$"),
"HP": re.compile(r"^HP\s*=\s*(\d+)$")
}
def savetest():
save = open("nametest_text.txt", "r")
print("Do you have a save?")
conf = input(": ")
# instead of checking each one individually, you can check if conf is
# within a much smaller set of valid answers
if conf.lower() in ["y", "yes"]:
text = save.read()
# Find the name
match = reg_dict["name"].search(text)
# .search will return the first match of the text, or if there are
# no occurrences, None
if(match):
# With match groups, group(0) is the entire match, group(1) is
# What was captured in the first set of parenthesis
username = match.group(1)
else:
print("The text file does not contain a username.")
return
print(username)
# Find the HP
match = reg_dict["HP"].search(text)
if(match):
player_hp = match.group(1)
else:
print("The text file does not contain a HP.")
return
print(player_hp)
# Using system calls to pause output is not a great idea for a
# variety of reasons, such as cross OS compatibility
# Instead of os.system("pause") try
input("Press enter to continue...")
save.close()
savetest()

Use a regex to extract based on a pattern:
'(?:name|HP) = (.*)'
This captures anything that follows an equal to sign preceded by either name or HP.
Code:
import re
with open("nametest_text.txt", "r") as f:
for line in f:
m = re.search(r'(?:name|HP) = (.*)', line.strip())
if m:
print(m.group(1))

Simplest way may be to use str.split() and then print everything after the '=' character:
with open("nametest_text.txt", "r") as f:
for line in f:
if line.strip():
print(line.strip().split(' = ')[1])
output:
Wubzy
100

Instead of trying to create and parse a proprietary format (you will most likely hit limitations at some point and will need to change your logic and/or file format), better stick to a well-known and well-defined file format that comes with the required writers and parsers, like yaml, json, cfg, xml, and many more.
This saves a lot of pain; consider the following quick example of a class that holds a state and that can be serialized to a key-value-mapped file format (I'm using yaml here, but you can easily exchange it for json, or others):
#!/usr/bin/python
import os
import yaml
class GameState:
def __init__(self, name, **kwargs):
self.name = name
self.health = 100
self.__dict__.update(kwargs)
#staticmethod
def from_savegame(path):
with open(path, 'r') as savegame:
args = yaml.safe_load(savegame)
return GameState(**args)
def save(self, path, overwrite=False):
if os.path.exists(path) and os.path.isfile(path) and not overwrite:
raise IOError('Savegame exists; refusing to overwrite.')
with open(path, 'w') as savegame:
savegame.write(yaml.dump(self.__dict__))
def __str__(self):
return (
'GameState(\n{}\n)'
.format(
'\n'.join([
' {}: {}'.format(k, v)
for k, v in self.__dict__.iteritems()
]))
)
Using this simple class exemplarily:
SAVEGAMEFILE = 'savegame_01.yml'
new_gs = GameState(name='jbndlr')
print(new_gs)
new_gs.health = 98
print(new_gs)
new_gs.save(SAVEGAMEFILE, overwrite=True)
old_gs = GameState.from_savegame(SAVEGAMEFILE)
print(old_gs)
... yields:
GameState(
health: 100
name: jbndlr
)
GameState(
health: 98
name: jbndlr
)
GameState(
health: 98
name: jbndlr
)

Related

How to change numeric values and only matches word as the key inside files?

I am new at python and trying to make a simple config editor app.
I have a .txt files which is inside of it contains a bunch of word count like:
...
...
max_count=1000
count=123
host_count=000
...
...
In my case, I want to change only the exact word matches with count= as the key to count=0 and whatever number on that (after = sign), I want it to be replaced by a value that user has given from the input field while ignoring the other counts like max_count=, host_count=, etc. Is it possible to do that?
For instance:
When user typed 0 on the input field, the result would be count=0
When user typed 1 on the input field, the result would be count=1
The other xxx_count= or count_xxx= will be ignored
I tried to do it like below but all counts are replaced instead of only the matches word count= it self.
files = Finder(self.path, self.name).find()
for file in files:
with open(file) as target:
content = target.read()
if self.target in content:
print(content)
content = content.replace(self.target, self.value)
with open(file, "w") as target:
target.write(content)
else:
print('{} not found in {}'.format(self.target, file))
please help.
UPDATE
here is my Finder class (it only used to find the files).
import os
class Finder:
result = []
"""
Create new instance.
:param {string} path: the directory of the target file.
:param {string} name: the name of the file.
"""
def __init__(self, path, name):
self.path = path
self.name = name
# Find files in the given path.
def find(self):
directory_exists = os.path.isdir(self.path)
if not directory_exists:
return print('Tidak dapat menemukan file {} di folder: {}.'.format(self.name, self.path))
for root, dirs, files in os.walk(self.path):
for file in files:
if self.name in file:
self.result.append(os.path.join(root, file))
return self.result
and it is the full version of my Modifier class:
from modules.Finder import Finder
from pprint import pprint
class Modifier(Finder):
"""
Create new instance.
:param target: target to be modified.
:param value: value to be used on target.
"""
def __init__(self, path, name, target, value):
self.target = target
self.value = value
Finder.__init__(self, path, name)
def modify(self):
files = Finder(self.path, self.name).find()
if not files:
return files
for file in files:
with open(file) as target:
content = target.read()
if self.target in content:
print(content)
content = content.replace(self.target, self.value)
with open(file, "w") as target:
target.write(content)
else:
print('{} not found in {}'.format(self.target, file))
UPDATE 2
Just to make sure everybody understand what I want to do.. This is my App.py file which controls the program.
from pprint import pprint
from modules.Modifier import Modifier
SMSGW = Modifier('D:\\Smsgw', 'SMSGW.ini', 'count=', 'count=0')
settings = SMSGW.modify()
pprint(settings)
Use regular expression ^count=\d+$ to replace the exact match count=somenumber
Considering:
user_input is the input entered by user and content is the data read from file
import re
re.sub(r'^count=\d+$', 'count={}'.format(user_input), content)
I see that in most cases you use the in statement to check strings. Like here:
if self.name in file:
or here
if self.target in content:
I think this is not what you need. The in statements checks if a string is contained in another strings, so for example 'a' in 'bac' returns True. And also count= in max_count= returns True. So the problem may arise from here.
I guess you need to check for exact equality, so you should change these lines in:
if self.name == file:
or
if self.target == content:
Now to use this solution you need to separate the value from the target. Use the split method.
For example, if content is "count=123" you can do:
sepcontent = content.split('=')
this will create a list sepcontent which is ['count', '0']. You can check for equality in the first element if self.target == sepcontent[0].
And you definitely need to iterate line by line when doing this. So do not use target.read(), which will create a single string of the full file.
In the end you should have something like:
wfile = 'tempfile.txt'
with open(file) as target:
with open(wfile, 'w') as wtarget:
for content in target:
sepcontent = content.split("=")
if sepcontent[0] == "count": #or your self.target
content = content.replace(spline[1], self.value)
wtarget.write(content)
In the end you will have all your edits in a file called tempfile.txt. You may simply rename this file to substitute the original.
I am not following exactly with you code, however the solution to your main question seems to be utilizing regex.
If your string is s = 'x_count=1\ncount=0\ncount_xx=0' or
x_count=1
count=0
count_xx=0
you can use regex to find and replace the number following count=
import re
s = 'x_count=1\ncount=0\ncount_xx=0'
## user inputs the number 1, so we replace the current number with 1
new_s = re.sub(r'\ncount=(\d)', r'\ncount=1', s)
let me know if this is helpful or not!
edit:
import re
user_input = 1
for file_name in list_of_file_names:
with open(file_name) as f:
new_file_content= re.sub(r'\ncount=(\d)', r'\ncount=%d'(user_input), f.read())
with open(file_name, "w") as f:
f.write(new_file_content)

Python 3.6 new line with user input

Made a program that allows the user to enter some text and have a text file be created with that text inside it. However, when the user puts \n it doesn't start a new line. Finding it very difficult to use my program to create text files as it writes all the text on one line lol
Thanks
EDIT - Sorry. Here is my code (Just the part we are concerned with).
class ReadWriteEdit:
def Read(File):
ReadFile = open(File,"r").read()
print(ReadFile)
def Write(File, Text):
WriteFile = open(File,"w")
WriteFile.write(Text)
def Edit(File, Text):
EditFile = open(File,"a")
EditFile.write(Text)
def Write():
print("WRITE")
print("Enter a file location / name")
FileInput = input("-:")
print("Enter some text")
TextInput = input("-:")
ReadWriteEdit.Write(FileInput, TextInput)
As you have found, special escaped characters are not interpolated in strings read from input, because normally we want to preserve characters, not give them special meanings.
You need to do some adjustment after the input, for example:
>>> s=input()
hello\nworld
>>> s
'hello\\nworld'
>>> s = s.replace('\\n', '\n')
>>> s
'hello\nworld'
>>> print(s)
hello
world
You can just add your input to the variable. You didn't provide any code so I'll just have to improvise:
data = open('output.txt', 'w')
a = input('>>')
data.write(a + '\n')
data.close()
But the better solution would be, like the comment below me mentioned, to use sys.stdin.readline()
import sys
data.write(sys.stdin.readline())
The easiest (dirtiest?) way IMHO is to print the message and then use input() without a prompt (or with the last part of it)
print("Are we REALLY sure?\n")
answer = input("[Y/n]")

Locating and extracting a string from multiple text files in Python

I am just picking up and learning Python, For work i go through a lot of pdfs and so I found a PDFMINER tool that converts a directory to a text file. I then made the below code to tell me whether the pdf file is an approved claim or a denied claim. I dont understand how I can say find me the string that starts with "Tracking Identification Number..." AND is the 18 characters after that and stuff it into an array?
import os
import glob
import csv
def check(filename):
if 'DELIVERY NOTIFICATION' in open(filename).read():
isDenied = True
print ("This claim was Denied")
print (isDenied)
elif 'Dear Customer:' in open(filename).read():
isDenied = False
print("This claim was Approved")
print (isDenied)
else:
print("I don't know if this is approved or denied")
def iterate():
path = 'text/'
for infile in glob.glob(os.path.join(path, '*.txt')):
print ('current file is:' + infile)
filename = infile
check(filename)
iterate()
Any help would be appreciated. this is what the text file looks like
Shipper Number............................577140Pickup Date....................................06/27/17
Number of Parcels........................1Weight.............................................1 LBS
Shipper Invoice Number..............30057010Tracking Identification Number...1Z000000YW00000000
Merchandise..................................1 S NIKE EQUALS EVERYWHERE T BK B
WE HAVE BEEN UNABLE TO PROVIDE SATISFACTORY PROOF OF DELIVERY FOR THE ABOVE
SHIPMENT. WE APOLOGIZE FOR THE INCONVENIENCE THIS CAUSES.
NPT8AEQ:000A0000LDI 07
----------------Page (1) Break----------------
update: Many helpful answers, here is the route I took, and is working quite nicely if I do say so myself. this is gonna save tons of time!! Here is my the entire code for any future viewers.
import os
import glob
arrayDenied = []
def iterate():
path = 'text/'
for infile in glob.glob(os.path.join(path, '*.txt')):
print ('current file is:' + infile)
check(infile)
def check(filename):
with open(filename, 'rt') as file_contents:
myText = file_contents.read()
if 'DELIVERY NOTIFICATION' in myText:
start = myText.index("Tracking Identification Number...") + len("Tracking Identification Number...")
myNumber = myText[start : start+18]
print("Denied: " + myNumber)
arrayDenied.append(myNumber)
elif 'Dear Customer:' in open(filename).read():
print("This claim was Approved")
startTrackingNum = myText.index("Tracking Identification Number...") + len("Tracking Identification Number...")
myNumber = myText[startTrackingNum : startTrackingNum+18]
startClaimNumberIndex = myText.index("Claim Number ") + len("Claim Number ")
myClaimNumber = myText[startClaimNumberIndex : startClaimNumberIndex+11]
arrayApproved.append(myNumber + " - " + myClaimNumber)
else:
print("I don't know if this is approved or denied")
iterate()
with open('Approved.csv', "w") as output:
writer = csv.writer(output, lineterminator='\n')
for val in arrayApproved:
writer.writerow([val])
with open('Denied.csv', "w") as output:
writer = csv.writer(output, lineterminator='\n')
for val in arrayDenied:
writer.writerow([val])
print(arrayDenied)
print(arrayApproved)
Update: Added the rest of my finished code, Writes the lists to a CSV file where i go execute some =left()'s and such and boom I have 1000 tracking numbers in a matter of minutes. This is why programming is great.
If your goal is just to find the "Tracking Identification Number..." string and the subsequent 18 characters; you can just find the index of that string, then reach where it ends, and slice from that point until the end of the subsequent 18 characters.
# Read the text file into memory:
with open(filename, 'rt') as txt_file:
myText = txt_file.read()
if 'DELIVERY NOTIFICATION' in myText:
# Find the desired string and get the subsequent 18 characters:
start = myText.index("Tracking Identification Number...") + len("Tracking Identification Number...")
myNumber = myText[start : start+18]
arrayDenied.append(myNumber)
You can also modify the append line into arrayDenied.append(myText + ' ' + myNumber) or things like that.
Regular expressions are the way to go for your task. Here is a way to modify your code to search for the pattern.
import re
pattern = r"(?<=Tracking Identification Number)(?:(\.+))[A-Z-a-z0-9]{18}"
def check(filename):
file_contents = open(filename, 'r').read()
if 'DELIVERY NOTIFICATION' in file_contents:
isDenied = True
print ("This claim was Denied")
print (isDenied)
matches = re.finditer(pattern, test_str)
for match in matches:
print("Tracking Number = %s" % match.group().strip("."))
elif 'Dear Customer:' in file_contents:
isDenied = False
print("This claim was Approved")
print (isDenied)
else:
print("I don't know if this is approved or denied")
Explanation:
r"(?<=Tracking Identification Number)(?:(\.+))[A-Z-a-z0-9]{18}"
(?<=Tracking Identification Number) Looks behind the capturing group to find the string "Tracking Identification Number"
(?:(\.+)) matches one or more dots (.) (we strip these out after)
[A-Z-a-z0-9]{18} matches 18 instances of (capital or lowercase) letters or numbers
More on Regex.
I think this solves your issue, just turn it into a function.
import re
string = 'Tracking Identification Number...1Z000000YW00000000'
no_dots = re.sub('\.', '', string) #Removes all dots from the string
matchObj = re.search('^Tracking Identification Number(.*)', no_dots) #Matches anything after the "Tracking Identification Number"
try:
print (matchObj.group(1))
except:
print("No match!")
If you want to read the documentation it is here: https://docs.python.org/3/library/re.html#re.search

Multiple Word Search not Working Correctly (Python)

I am working on a project that requires me to be able to search for multiple keywords in a file. For example, if I had a file with 100 occurrences of the word "Tomato", 500 for the word "Bread", and 20 for "Pickle", I would want to be able to search the file for "Tomato" and "Bread" and get the number of times it occurs in the file. I was able to find people with the same issue/question, but for other languages on this site.
I a working program that allows me to search for the column name and tally how many times something shows up in that column, but I want to make something a bit more precise. Here is my code:
def start():
location = raw_input("What is the folder containing the data you like processed located? ")
#location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
if os.path.exists(location) == True: #Tests to see if user entered a valid path
file_extension = raw_input("What is the file type (.txt for example)? ")
search_for(location,file_extension)
else:
print "I'm sorry, but the file location you have entered does not exist. Please try again."
start()
def search_for(location,file_extension):
querylist = []
n = 5
while n == 5:
search_query = raw_input("What would you like to search for in each file? Use'Done' to indicate that you have finished your request. ")
#list = ["CD90-N5722-15C", "CD90-NB810-4C", "CP90-N2475-8", "CD90-VN530-22B"]
if search_query == "Done":
print "Your queries are:",querylist
print ""
content = os.listdir(location)
run(content,file_extension,location,querylist)
n = 0
else:
querylist.append(search_query)
continue
def run(content,file_extension,location,querylist):
for item in content:
if item.endswith(file_extension):
search(location,item,querylist)
quit()
def search(location,item,querylist):
with open(os.path.join(location,item), 'r') as f:
countlist = []
for search in querylist: #any search value after the first one is incorrectly reporting "0"
countsearch = 0
for line in f:
if search in line:
countsearch = countsearch + 1
countlist.append(search)
countlist.append(countsearch) #mechanism to update countsearch is not working for any value after the first
print item, countlist
start()
If I use that code, the last part (def search) is not working correctly. Any time I put a search in, any search after the first one I enter in returns "0", despite there being up to 500,000 occurrences of the search word in a file.
I was also wondering, since I have to index 5 files with 1,000,000 lines each, if there was a way I could write either an additional function or something to count how many times "Lettuce" occurs over all the files.
I cannot post the files here due to their size and content. Any help would be greatly appreciated.
Edit
I also have this piece of code here. If I use this, I get the correct count of each, but it would be much better to have a user be able to enter as many searches as they want:
def check_start():
#location = raw_input("What is the folder containing the data you like processed located? ")
location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
content = os.listdir(location)
for item in content:
if item.endswith("processed"):
countcol1 = 0
countcol2 = 0
countcol3 = 0
countcol4 = 0
#print os.path.join(currentdir,item)
with open(os.path.join(location,item), 'r') as f:
for line in f:
if "CD90-N5722-15C" in line:
countcol1 = countcol1 + 1
if "CD90-NB810-4C" in line:
countcol2 = countcol2 + 1
if "CP90-N2475-8" in line:
countcol3 = countcol3 + 1
if "CD90-VN530-22B" in line:
countcol4 = countcol4 + 1
print item, "CD90-N5722-15C", countcol1, "CD90-NB810-4C", countcol2, "CP90-N2475-8", countcol3, "CD90-VN530-22B", countcol4
You are trying to iterate over your file more than once. After the first time, the file pointer is at the end so subsequent searches will fail because there's nothing left to read.
If you add the line:
f.seek(0), this will reset the pointer before every read:
def search(location,item,querylist):
with open(os.path.join(location,item), 'r') as f:
countlist = []
for search in querylist: #any search value after the first one is incorrectly reporting "0"
countsearch = 0
for line in f:
if search in line:
countsearch = countsearch + 1
countlist.append(search)
countlist.append(countsearch) #mechanism to update countsearch is not working for any value after the first
f.seek(0)
print item, countlist
PS. I've guessed at the indentation... You really shouldn't use tabs.
I'm not sure I get your question completely, but how about something like this?
def check_start():
raw_search_terms = raw_input('Enter search terms seperated by a comma:')
search_term_list = raw_search_terms.split(',')
#location = raw_input("What is the folder containing the data you like processed located? ")
location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
content = os.listdir(location)
for item in content:
if item.endswith("processed"):
# create a dictionary of search terms with their counts (initialized to 0)
search_term_count_dict = dict(zip(search_term_list, [0 for s in search_term_list]))
for line in f:
for s in search_term_list:
if s in line:
search_term_count_dict[s] += 1
print item
for key, value in search_term_count_dict.iteritems() :
print key, value

Reading a file and adding a specific match to a dictionary in python

Id like to read a file for a specific match in the following style "word = word", specifically Im looking to find files with usernames and passwords in them. These files would be scripts created by admins using bad practices with clear credentials being used in logonscripts etc.
The code I have created so far does the job but its very messy and prints an entire line if the match is found (I cant help but think there is a more elegant way to do this). This creates ugly output, id like to print only the match in the line. I cant seem to find a way to do that. If I can create the correct regex for a match of something like the below match, is it possible to only print the match found in the line rather than the entire line?
(I am going to try describe the type of match im looking for)
Key
* = wildcard
- = space
^ = anycharacter until a space
Match
*(U|u)ser^-=-^
dirt = "/dir/path/"
def get_files():
for root, dirs, files in os.walk(dirt):
for filename in files:
if filename.endswith(('.bat', '.vbs', '.ps', '.txt')):
readfile = open(os.path.join(root, filename), "r")
for line in readfile:
if re.match("(.*)(U|u)ser(.*)", line) and re.match("(.*)(=)(.*)", line) or re.match("(.*)(P|p)ass(.*)", line) and re.match("(.*)(=)(.*)", line):
print line
TEST SCRIPT
strComputer = "atl-ws-01"
strNamespace = “root\cimv2”
strUser = "Administrator"
strPassword = "4rTGh2#1"
user = AnotherUser #Test
pass = AnotherPass #test
Set objWbemLocator = CreateObject("WbemScripting.SWbemLocator")
Set objWMIService = objwbemLocator.ConnectServer _
(strComputer, strNamespace, strUser, strPassword)
objWMIService.Security_.authenticationLevel = WbemAuthenticationLevelPktPrivacy
Set colItems = objWMIService.ExecQuery _
("Select * From Win32_OperatingSystem")
For Each objItem in ColItems
Wscript.Echo strComputer & ": " & objItem.Caption
Next
Latest Code after taking on bored the responses
This is the latest code I am using. It seems to be doing the job as expected, apart from the output isnt managed as well as Id like. Id like to add the items into a dictionary. Key being the file name. And two vaules, the username and password. Although this will be added as a separate question.
Thanks all for the help
dirt = "~/Desktop/tmp"
def get_files():
regs = ["(.*)((U|u)ser(.*))(\s=\s\W\w+\W)", "(.*)((U|u)ser(.*))(\s=\s\w+)", "(.*)((P|p)ass(.*))\s=\s(\W(.*)\W)", "(.*)((P|p)ass(.*))(\s=\s\W\w+\W)"]
combined = "(" + ")|(".join(regs) + ")"
results = dict()
for root, dirs, files in os.walk(dirt):
for filename in files:
if filename.endswith(('.bat', '.vbs', '.ps', '.txt')):
readfile = open(os.path.join(root, filename), "r")
for line in readfile:
m = re.match(combined, line)
if m:
print os.path.join(root, filename)
print m.group(0)
Latest Code output
~/Desktop/tmp/Domain.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts/Logon/logonscript1.vbs
strUser = "guytom"
~/Desktop/tmp/DLsec.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts /Logon/logonscript1.vbs
strPassword = "P#ssw0rd1"
~/Desktop/tmp/DLsec.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts/Logon/logonscript2.bat
strUsername = "guytom2"
~/Desktop/tmp/DLsec.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts/Logon/logonscript2.bat
strPass = "SECRETPASSWORD"
https://docs.python.org/2/library/re.html
group([group1, ...])
Returns one or more subgroups of the match. If there is a single argument, the result is a single string; if there are multiple arguments, the result is a tuple with one item per argument. Without arguments, group1 defaults to zero (the whole match is returned). If a groupN argument is zero, the corresponding return value is the entire matching string;
match.group(0)
Since you can have many object=value you need to use regular expressions. Here is some sample code for you.
line1 = " someuser = bob "
line2 = " bob'spasswd= secretpassword"
#re.I will do case insensitive search
userMatchObj=re.search('.*user.*=\\s*([\\S]*).*', line1, re.I)
pwdMatchObj=re.search(r'.*pass.*=\s*(.*)', line2, re.I)
if userMatchObj: print "user="+userMatchObj.group(1)
if pwdMatchObj: print "password="+pwdMatchObj.group(1)
output:
user=bob
password=secretpassword
References: https://docs.python.org/2/library/re.html , http://www.tutorialspoint.com/python/python_reg_expressions.htm
Thanks all for the help. Below is my working code (needs further work on the output but the matching is working well)
dirt = "~/Desktop/tmp"
def get_files():
regs = ["(.*)((U|u)ser(.*))(\s=\s\W\w+\W)", "(.*)((U|u)ser(.*))(\s=\s\w+)", "(.*)((P|p)ass(.*))\s=\s(\W(.*)\W)", "(.*)((P|p)ass(.*))(\s=\s\W\w+\W)"]
combined = "(" + ")|(".join(regs) + ")"
results = dict()
for root, dirs, files in os.walk(dirt):
for filename in files:
if filename.endswith(('.bat', '.vbs', '.ps', '.txt')):
readfile = open(os.path.join(root, filename), "r")
for line in readfile:
m = re.match(combined, line)
if m:
print os.path.join(root, filename)
print m.group(0)
Latest Code output
~/Desktop/tmp/Domain.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts/Logon/logonscript1.vbs
strUser = "guytom"
~/Desktop/tmp/DLsec.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts/Logon/logonscript1.vbs
strPassword = "P#ssw0rd1"
~/Desktop/tmp/DLsec.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts/Logon/logonscript2.bat
strUsername = "guytom2"
~/Desktop/tmp/DLsec.local/Policies/{31B2F340-016D-11D2-945F-00C04FB984F9}/USER/Scripts/Logon/logonscript2.bat
strPass = "SECRETPASSWORD"

Categories

Resources