I wrote some code to extract email and IP addresses from bulk text. However, the code extracts only the email addresses. (The original text, which I would like to make understandable, is a typical log file). I don't know why the generated file does not give me back the IP addresses.
import os
import re
# 1
filename = 'errors.txt'
newfilename = 'emaillist-rev.txt'
# 2
if os.path.exists(filename):
data = open(filename,'r')
bulkemails = data.read()
else:
print "File not found."
raise SystemExit
# 3
r = re.compile(r'[\w\.-]+#[\w\.-]+')
results = r.findall(bulkemails)
emails = ""
for x in results:
emails += str(x)+"\n"
# 4
ip = re.compile('^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
result = ip.findall(bulkemails)
ip =""
for y in result:
ip += str(y)+"\n"
# 5
def writefile():
f = open(newfilename, 'w')
f.write(emails + ip)
f.close()
print "File written."
# 6
def overwrite_ok():
response = raw_input("Are you sure you want to overwrite "+str(newfilename)+"? Yes or No\n")
if response == "Yes":
writefile()
elif response == "No":
print "Aborted."
else:
print "Please enter Yes or No."
overwrite_ok()
# 7
if os.path.exists(newfilename):
overwrite_ok()
else:
writefile()
When declaring the ip regex, replace the anchors with word boundaries and mind you need to use a raw string literal.
ip = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')
Related
I wrote some code to extract email and IP addresses from bulk text. However, the code extracts only the email addresses. (The original text, which I would like to make understandable, is a typical log file). I don't know why the generated file does not give me back the IP addresses.
import os
import re
# 1
filename = 'errors.txt'
newfilename = 'emaillist-rev.txt'
# 2
if os.path.exists(filename):
data = open(filename,'r')
bulkemails = data.read()
else:
print "File not found."
raise SystemExit
# 3
r = re.compile(r'[\w\.-]+#[\w\.-]+')
results = r.findall(bulkemails)
emails = ""
for x in results:
emails += str(x)+"\n"
# 4
ip = re.compile('^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
result = ip.findall(bulkemails)
ip =""
for y in result:
ip += str(y)+"\n"
# 5
def writefile():
f = open(newfilename, 'w')
f.write(emails + ip)
f.close()
print "File written."
# 6
def overwrite_ok():
response = raw_input("Are you sure you want to overwrite "+str(newfilename)+"? Yes or No\n")
if response == "Yes":
writefile()
elif response == "No":
print "Aborted."
else:
print "Please enter Yes or No."
overwrite_ok()
# 7
if os.path.exists(newfilename):
overwrite_ok()
else:
writefile()
When declaring the ip regex, replace the anchors with word boundaries and mind you need to use a raw string literal.
ip = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')
I am trying to extract IPv4 addresses from a text file and save them as a list to a new file, however, I can not use regex to parse the file, Instead, I have check the characters individually. Not really sure where to start with that, everything I find seems to have import re as the first line.
So far this is what I have,
#Opens and prints wireShark txt file
fileObject = open("wireShark.txt", "r")
data = fileObject.read()
print(data)
#Save IP adresses to new file
with open('wireShark.txt') as fin, open('IPAdressess.txt', 'wt') as fout:
list(fout.write(line) for line in fin if line.rstrip())
#Opens and prints IPAdressess txt file
fileObject = open("IPAdressess.txt", "r")
data = fileObject.read()
print(data)
#Close Files
fin.close()
fout.close()
So I open the file, and I have created the file that I will put the extracted IP's in, I just don't know ow to pull them without using REGEX.
Thanks for the help.
Here is a possible solution.
The function find_first_digit, position the index at the next digit in the text if any and return True. Else return False
The functions get_dot and get_num read a number/dot and, lets the index at the position just after the number/dot and return the number/dot as str. If one of those functions fails to get the number/dot raise an MissMatch exception.
In the main loop, find a digit, save the index and then try to get an ip.
If sucess, write it to output file.
If any of the called functions raises a MissMatch exception, set the current index to the saved index plus one and start over.
class MissMatch(Exception):pass
INPUT_FILE_NAME = 'text'
OUTPUT_FILE_NAME = 'ip_list'
def find_first_digit():
while True:
c = input_file.read(1)
if not c: # EOF found!
return False
elif c.isdigit():
input_file.seek(input_file.tell() - 1)
return True
def get_num():
num = input_file.read(1) # 1st digit
if not num.isdigit():
raise MissMatch
if num != '0':
for i in range(2): # 2nd 3th digits
c = input_file.read(1)
if c.isdigit():
num += c
else:
input_file.seek(input_file.tell() - 1)
break
return num
def get_dot():
if input_file.read(1) == '.':
return '.'
else:
raise MissMatch
with open(INPUT_FILE_NAME) as input_file, open(OUTPUT_FILE_NAME, 'w') as output_file:
while True:
ip = ''
if not find_first_digit():
break
saved_position = input_file.tell()
try:
ip = get_num() + get_dot() \
+ get_num() + get_dot() \
+ get_num() + get_dot() \
+ get_num()
except MissMatch:
input_file.seek(saved_position + 1)
else:
output_file.write(ip + '\n')
I'm a newbie, and this is my first question on stackoverflow, so with that said, here's my question
CLIENT CODE
import socket
cs = socket.socket()
ADDR = ('192.168.29.139',9999)
cs.connect(ADDR)
l = int(cs.recv(2048).decode())
data = cs.recv(l).decode()
data = eval(data)
cont = data["file"]["cont"]
f = open(data['file']['name'] + data['file']['ext'], "wb")
f.write(cont)
f.close()
SERVER CODE
## SERVER SIDE
import socket
ss = socket.socket()
ADDR = ('192.168.29.139',9999)
ss.bind(ADDR)
ss.listen()
conn, addr = ss.accept()
msg = input("Enter message: ")
filepath = input("Enter filepath: ")
fileName = input("Enter filename : ")
fileExt = input("Enter fileExt:" )
f = open(filepath,"rb")
r = f.read()
f.close()
fileDict = {"name": fileName, "ext": fileExt, "cont": r}
msg_dict = {"msg":msg, "file": fileDict}
msg_dict = str(msg_dict).encode()
conn.send(str(len(msg_dict)).encode())
conn.send(msg_dict)
This method works totally fine when I transfer files within the same computer (even bigger files, like several GBs, in this test I was using the windows 7 test video, that was about 25MB) but when i use it on LAN to transfer the same file, between two computers within the same network it shows an error
right during this statement
data = eval(data)
the error was, after printing so many lines of characters like \xo... and empty lines
EOL while scanning string literal ^
using pickle also gave error
Thank you for reading... Please HELP!
I hope the title wasn't too confusing, but you'll see what I meant by that in a bit. In the meantime, some backstory-- I'm working on a function that generates random usernames and passwords and writes them in a text file as username:password for another program that collects the username:password line as:
string = line.split(":")
username = string[0]
pwd = string[1]
Why does this matter? Well, when I run my function:
Code:
# To generate users and passwords for the password file:
"""
Usage: count-- how many accounts to generate
file-- where to dump the accounts
method-- dict is where it loops through words
and chooses random ones as users and passwords,
and brute (not implemented yet) is where it chooses
random characters and strings them together as users
and passwords.
users-- if you want any filled in users, put them in here.
passes-- if you want any filled in passes, put them in here.
"""
def genAccts(count, file, method="dict", users=[], passes=[]):
try:
f = open(file, "w")
if method == "dict":
dictionary = "Dictionary.txt"#input("[*] Dictionary file: ")
d = open(dictionary, "r")
words = d.readlines()
d.close()
accts = []
for b in range(0, count):
global user
global pwd
user = random.choice(words)
pwd = random.choice(words)
if b < len(users)-1:
user = users[b]
if b < len(passes)-1:
pwd = passes[b]
acct = [user, pwd]
accts.append(acct)
print("[+] Successfully generated",count,"accounts")
for acct in accts:
combined = acct[0]+":"+acct[1]
print(combined)
f.write(combined)
f.close()
print("[+] Successfully wrote",count,"accounts in",file+"!")
except Exception as error:
return str(error)
genAccts(50, "brute.txt")
In my password file brute.txt, I get an output like
quainter
:slightest
litany
:purples
reciprocal
:already
delicate
:four
and so I'm wondering why is a \n added after the username?
You can fix this by replacing:
words = d.readlines()
with:
words = [x.strip() for x in d.readlines()]
words = d.readlines()
The above function returns a list which contains each line as an item. Every word will contain \n character at the end. So to get the required output, you have to trim the white space characters for username.
user = random.choice(words).strip()
Above line will solve your issue!
Use this:
def genAccts(count, file, method="dict", users=[], passes=[]):
try:
f = open(file, "w")
if method == "dict":
dictionary = "Dictionary.txt"#input("[*] Dictionary file: ")
d = open(dictionary, "r")
words = d.readlines().strip()
d.close()
accts = []
for b in range(0, count):
global user
global pwd
user = random.choice(words)
pwd = random.choice(words)
if b < len(users)-1:
user = users[b]
if b < len(passes)-1:
pwd = passes[b]
acct = [user, pwd]
accts.append(acct)
print("[+] Successfully generated",count,"accounts")
for acct in accts:
combined = acct[0]+":"+acct[1]
print(combined)
f.write(combined)
f.close()
print("[+] Successfully wrote",count,"accounts in",file+"!")
except Exception as error:
return str(error)
genAccts(50, "brute.txt")
I'm trying to extract only the IPs from a file, organize them numerically and put the result in another file.
The data looks like this:
The Spammer (and all his/her info):
Username: user
User ID Number: 0
User Registration IP Address: 77.123.134.132
User IP Address for Selected Post: 177.43.168.35
User Email: email#address.com
Here is my code, which does not sort the IPs correctly (i.e. it lists 177.43.168.35 before 77.123.134.132):
import re
spammers = open('spammers.txt', "r")
ips = []
for text in spammers.readlines():
text = text.rstrip()
print text
regex = re.findall(r'(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})$',text)
if regex is not None and regex not in ips:
ips.append(regex)
for ip in ips:
OrganizedIPs = open("Organized IPs.txt", "a")
addy = "".join(ip)
if addy is not '':
print "IP: %s" % (addy)
OrganizedIPs.write(addy)
OrganizedIPs.write("\n")
spammers.close()
OrganizedIPs.close()
organize = open("Organized IPs.txt", "r")
ips = organize.readlines();
ips = list(set(ips))
print ips
for i in range(len(ips)):
ips[i] = ips[i].replace('\n', '')
print ips
ips.sort()
finish = open('organized IPs.txt', 'w')
finish.write('\n'.join(ips))
finish.close()
clean = open('spammers.txt', 'w')
clean.close()
I had tried using this IP sorter code but it needs a string were as the regex returns a list.
Or this (saving you string formatting cost):
def ipsort (ip):
return tuple (int (t) for t in ip.split ('.') )
ips = ['1.2.3.4', '100.2.3.4', '62.1.2.3', '62.1.22.4']
print (sorted (ips, key = ipsort) )
import re
LOG = "spammers.txt"
IPV4 = re.compile(r"(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})")
RESULT = "organized_ips.txt"
def get_ips(fname):
with open(fname) as inf:
return IPV4.findall(inf.read())
def numeric_ip(ip):
return [int(i) for i in ip.split(".")]
def write_to(fname, iterable, fmt):
with open(fname, "w") as outf:
for i in iterable:
outf.write(fmt.format(i))
def main():
ips = get_ips(LOG)
ips = list(set(ips)) # uniquify
ips.sort(key=numeric_ip)
write_to(RESULT, ips, "IP: {}\n")
if __name__=="__main__":
main()
Try this:
sorted_ips = sorted(ips, key=lambda x: '.'.join(["{:>03}".format(octet) for octet in x.split(".")])