I wrote some code to extract email and IP addresses from bulk text. However, the code extracts only the email addresses. (The original text, which I would like to make understandable, is a typical log file). I don't know why the generated file does not give me back the IP addresses.
import os
import re
# 1
filename = 'errors.txt'
newfilename = 'emaillist-rev.txt'
# 2
if os.path.exists(filename):
data = open(filename,'r')
bulkemails = data.read()
else:
print "File not found."
raise SystemExit
# 3
r = re.compile(r'[\w\.-]+#[\w\.-]+')
results = r.findall(bulkemails)
emails = ""
for x in results:
emails += str(x)+"\n"
# 4
ip = re.compile('^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
result = ip.findall(bulkemails)
ip =""
for y in result:
ip += str(y)+"\n"
# 5
def writefile():
f = open(newfilename, 'w')
f.write(emails + ip)
f.close()
print "File written."
# 6
def overwrite_ok():
response = raw_input("Are you sure you want to overwrite "+str(newfilename)+"? Yes or No\n")
if response == "Yes":
writefile()
elif response == "No":
print "Aborted."
else:
print "Please enter Yes or No."
overwrite_ok()
# 7
if os.path.exists(newfilename):
overwrite_ok()
else:
writefile()
When declaring the ip regex, replace the anchors with word boundaries and mind you need to use a raw string literal.
ip = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')
Related
I'm a newbie, and this is my first question on stackoverflow, so with that said, here's my question
CLIENT CODE
import socket
cs = socket.socket()
ADDR = ('192.168.29.139',9999)
cs.connect(ADDR)
l = int(cs.recv(2048).decode())
data = cs.recv(l).decode()
data = eval(data)
cont = data["file"]["cont"]
f = open(data['file']['name'] + data['file']['ext'], "wb")
f.write(cont)
f.close()
SERVER CODE
## SERVER SIDE
import socket
ss = socket.socket()
ADDR = ('192.168.29.139',9999)
ss.bind(ADDR)
ss.listen()
conn, addr = ss.accept()
msg = input("Enter message: ")
filepath = input("Enter filepath: ")
fileName = input("Enter filename : ")
fileExt = input("Enter fileExt:" )
f = open(filepath,"rb")
r = f.read()
f.close()
fileDict = {"name": fileName, "ext": fileExt, "cont": r}
msg_dict = {"msg":msg, "file": fileDict}
msg_dict = str(msg_dict).encode()
conn.send(str(len(msg_dict)).encode())
conn.send(msg_dict)
This method works totally fine when I transfer files within the same computer (even bigger files, like several GBs, in this test I was using the windows 7 test video, that was about 25MB) but when i use it on LAN to transfer the same file, between two computers within the same network it shows an error
right during this statement
data = eval(data)
the error was, after printing so many lines of characters like \xo... and empty lines
EOL while scanning string literal ^
using pickle also gave error
Thank you for reading... Please HELP!
I hope the title wasn't too confusing, but you'll see what I meant by that in a bit. In the meantime, some backstory-- I'm working on a function that generates random usernames and passwords and writes them in a text file as username:password for another program that collects the username:password line as:
string = line.split(":")
username = string[0]
pwd = string[1]
Why does this matter? Well, when I run my function:
Code:
# To generate users and passwords for the password file:
"""
Usage: count-- how many accounts to generate
file-- where to dump the accounts
method-- dict is where it loops through words
and chooses random ones as users and passwords,
and brute (not implemented yet) is where it chooses
random characters and strings them together as users
and passwords.
users-- if you want any filled in users, put them in here.
passes-- if you want any filled in passes, put them in here.
"""
def genAccts(count, file, method="dict", users=[], passes=[]):
try:
f = open(file, "w")
if method == "dict":
dictionary = "Dictionary.txt"#input("[*] Dictionary file: ")
d = open(dictionary, "r")
words = d.readlines()
d.close()
accts = []
for b in range(0, count):
global user
global pwd
user = random.choice(words)
pwd = random.choice(words)
if b < len(users)-1:
user = users[b]
if b < len(passes)-1:
pwd = passes[b]
acct = [user, pwd]
accts.append(acct)
print("[+] Successfully generated",count,"accounts")
for acct in accts:
combined = acct[0]+":"+acct[1]
print(combined)
f.write(combined)
f.close()
print("[+] Successfully wrote",count,"accounts in",file+"!")
except Exception as error:
return str(error)
genAccts(50, "brute.txt")
In my password file brute.txt, I get an output like
quainter
:slightest
litany
:purples
reciprocal
:already
delicate
:four
and so I'm wondering why is a \n added after the username?
You can fix this by replacing:
words = d.readlines()
with:
words = [x.strip() for x in d.readlines()]
words = d.readlines()
The above function returns a list which contains each line as an item. Every word will contain \n character at the end. So to get the required output, you have to trim the white space characters for username.
user = random.choice(words).strip()
Above line will solve your issue!
Use this:
def genAccts(count, file, method="dict", users=[], passes=[]):
try:
f = open(file, "w")
if method == "dict":
dictionary = "Dictionary.txt"#input("[*] Dictionary file: ")
d = open(dictionary, "r")
words = d.readlines().strip()
d.close()
accts = []
for b in range(0, count):
global user
global pwd
user = random.choice(words)
pwd = random.choice(words)
if b < len(users)-1:
user = users[b]
if b < len(passes)-1:
pwd = passes[b]
acct = [user, pwd]
accts.append(acct)
print("[+] Successfully generated",count,"accounts")
for acct in accts:
combined = acct[0]+":"+acct[1]
print(combined)
f.write(combined)
f.close()
print("[+] Successfully wrote",count,"accounts in",file+"!")
except Exception as error:
return str(error)
genAccts(50, "brute.txt")
I wrote some code to extract email and IP addresses from bulk text. However, the code extracts only the email addresses. (The original text, which I would like to make understandable, is a typical log file). I don't know why the generated file does not give me back the IP addresses.
import os
import re
# 1
filename = 'errors.txt'
newfilename = 'emaillist-rev.txt'
# 2
if os.path.exists(filename):
data = open(filename,'r')
bulkemails = data.read()
else:
print "File not found."
raise SystemExit
# 3
r = re.compile(r'[\w\.-]+#[\w\.-]+')
results = r.findall(bulkemails)
emails = ""
for x in results:
emails += str(x)+"\n"
# 4
ip = re.compile('^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
result = ip.findall(bulkemails)
ip =""
for y in result:
ip += str(y)+"\n"
# 5
def writefile():
f = open(newfilename, 'w')
f.write(emails + ip)
f.close()
print "File written."
# 6
def overwrite_ok():
response = raw_input("Are you sure you want to overwrite "+str(newfilename)+"? Yes or No\n")
if response == "Yes":
writefile()
elif response == "No":
print "Aborted."
else:
print "Please enter Yes or No."
overwrite_ok()
# 7
if os.path.exists(newfilename):
overwrite_ok()
else:
writefile()
When declaring the ip regex, replace the anchors with word boundaries and mind you need to use a raw string literal.
ip = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')
I have a text file called sample_ips.txt containing some random IP addresses as follows:-
182.0.0.15
182.0.0.16
182.0.0.17
I am giving an IP address as an input to check if that IP exist in the text file it prints true else false.
Here is my code snippet:-
ip_input = str(input("Enter IP:"))
ip = open("sample_ips", "r")
data = ip.readlines()
for ips in data:
ips = ips.strip("\n")
if ip_input in ips:
print "true"
else:
print "false"
It's throwing a syntax error
File "<string>", line 1
182.0.0.15
^
SyntaxError: invalid sytanx
I think it is not able to take the input as a string despite me declaring it as a string in my code. Any help?
Thanks
input_ip = raw_input("Enter IP:") # In python 2.x. If you use input() then type your inputs as string (>>Enter IP:"182.0.0.15")
#input_ip = input("Enter IP:") for python 3.x
with open("ip.txt", "r") as ip:
data = ip.readlines()
for ips in data:
ips = ips.strip("\n")
if input_ip in ips:
print ("true")
else:
print ("false")
Well, this works:
input = raw_input("Enter IP:")
ip = open("sample_ips.txt", "r")
data = ip.readlines()
for ips in data:
ips = ips.strip("\n")
if input in ips:
print "true"
else:
print "false"
:edit:
Python3 version
input = input("Enter IP:")
ip = open("sample_ips.txt", "r")
data = ip.readlines()
for ips in data:
ips = ips.strip("\n")
if input in ips:
print ("true")
else:
print ("false")
I'm trying to extract only the IPs from a file, organize them numerically and put the result in another file.
The data looks like this:
The Spammer (and all his/her info):
Username: user
User ID Number: 0
User Registration IP Address: 77.123.134.132
User IP Address for Selected Post: 177.43.168.35
User Email: email#address.com
Here is my code, which does not sort the IPs correctly (i.e. it lists 177.43.168.35 before 77.123.134.132):
import re
spammers = open('spammers.txt', "r")
ips = []
for text in spammers.readlines():
text = text.rstrip()
print text
regex = re.findall(r'(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})$',text)
if regex is not None and regex not in ips:
ips.append(regex)
for ip in ips:
OrganizedIPs = open("Organized IPs.txt", "a")
addy = "".join(ip)
if addy is not '':
print "IP: %s" % (addy)
OrganizedIPs.write(addy)
OrganizedIPs.write("\n")
spammers.close()
OrganizedIPs.close()
organize = open("Organized IPs.txt", "r")
ips = organize.readlines();
ips = list(set(ips))
print ips
for i in range(len(ips)):
ips[i] = ips[i].replace('\n', '')
print ips
ips.sort()
finish = open('organized IPs.txt', 'w')
finish.write('\n'.join(ips))
finish.close()
clean = open('spammers.txt', 'w')
clean.close()
I had tried using this IP sorter code but it needs a string were as the regex returns a list.
Or this (saving you string formatting cost):
def ipsort (ip):
return tuple (int (t) for t in ip.split ('.') )
ips = ['1.2.3.4', '100.2.3.4', '62.1.2.3', '62.1.22.4']
print (sorted (ips, key = ipsort) )
import re
LOG = "spammers.txt"
IPV4 = re.compile(r"(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})")
RESULT = "organized_ips.txt"
def get_ips(fname):
with open(fname) as inf:
return IPV4.findall(inf.read())
def numeric_ip(ip):
return [int(i) for i in ip.split(".")]
def write_to(fname, iterable, fmt):
with open(fname, "w") as outf:
for i in iterable:
outf.write(fmt.format(i))
def main():
ips = get_ips(LOG)
ips = list(set(ips)) # uniquify
ips.sort(key=numeric_ip)
write_to(RESULT, ips, "IP: {}\n")
if __name__=="__main__":
main()
Try this:
sorted_ips = sorted(ips, key=lambda x: '.'.join(["{:>03}".format(octet) for octet in x.split(".")])