I'm trying to extract only the IPs from a file, organize them numerically and put the result in another file.
The data looks like this:
The Spammer (and all his/her info):
Username: user
User ID Number: 0
User Registration IP Address: 77.123.134.132
User IP Address for Selected Post: 177.43.168.35
User Email: email#address.com
Here is my code, which does not sort the IPs correctly (i.e. it lists 177.43.168.35 before 77.123.134.132):
import re
spammers = open('spammers.txt', "r")
ips = []
for text in spammers.readlines():
text = text.rstrip()
print text
regex = re.findall(r'(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})$',text)
if regex is not None and regex not in ips:
ips.append(regex)
for ip in ips:
OrganizedIPs = open("Organized IPs.txt", "a")
addy = "".join(ip)
if addy is not '':
print "IP: %s" % (addy)
OrganizedIPs.write(addy)
OrganizedIPs.write("\n")
spammers.close()
OrganizedIPs.close()
organize = open("Organized IPs.txt", "r")
ips = organize.readlines();
ips = list(set(ips))
print ips
for i in range(len(ips)):
ips[i] = ips[i].replace('\n', '')
print ips
ips.sort()
finish = open('organized IPs.txt', 'w')
finish.write('\n'.join(ips))
finish.close()
clean = open('spammers.txt', 'w')
clean.close()
I had tried using this IP sorter code but it needs a string were as the regex returns a list.
Or this (saving you string formatting cost):
def ipsort (ip):
return tuple (int (t) for t in ip.split ('.') )
ips = ['1.2.3.4', '100.2.3.4', '62.1.2.3', '62.1.22.4']
print (sorted (ips, key = ipsort) )
import re
LOG = "spammers.txt"
IPV4 = re.compile(r"(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})")
RESULT = "organized_ips.txt"
def get_ips(fname):
with open(fname) as inf:
return IPV4.findall(inf.read())
def numeric_ip(ip):
return [int(i) for i in ip.split(".")]
def write_to(fname, iterable, fmt):
with open(fname, "w") as outf:
for i in iterable:
outf.write(fmt.format(i))
def main():
ips = get_ips(LOG)
ips = list(set(ips)) # uniquify
ips.sort(key=numeric_ip)
write_to(RESULT, ips, "IP: {}\n")
if __name__=="__main__":
main()
Try this:
sorted_ips = sorted(ips, key=lambda x: '.'.join(["{:>03}".format(octet) for octet in x.split(".")])
Related
I wrote some code to extract email and IP addresses from bulk text. However, the code extracts only the email addresses. (The original text, which I would like to make understandable, is a typical log file). I don't know why the generated file does not give me back the IP addresses.
import os
import re
# 1
filename = 'errors.txt'
newfilename = 'emaillist-rev.txt'
# 2
if os.path.exists(filename):
data = open(filename,'r')
bulkemails = data.read()
else:
print "File not found."
raise SystemExit
# 3
r = re.compile(r'[\w\.-]+#[\w\.-]+')
results = r.findall(bulkemails)
emails = ""
for x in results:
emails += str(x)+"\n"
# 4
ip = re.compile('^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
result = ip.findall(bulkemails)
ip =""
for y in result:
ip += str(y)+"\n"
# 5
def writefile():
f = open(newfilename, 'w')
f.write(emails + ip)
f.close()
print "File written."
# 6
def overwrite_ok():
response = raw_input("Are you sure you want to overwrite "+str(newfilename)+"? Yes or No\n")
if response == "Yes":
writefile()
elif response == "No":
print "Aborted."
else:
print "Please enter Yes or No."
overwrite_ok()
# 7
if os.path.exists(newfilename):
overwrite_ok()
else:
writefile()
When declaring the ip regex, replace the anchors with word boundaries and mind you need to use a raw string literal.
ip = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')
I have to use a text file and extract the most frequent ip address and count how many times they come up
def anaylse_log(parameter):
myfile = open("sample_log_1 test.txt", "r")
iPdata = myfile.readlines()
mydict = {}
ipAddress = []
item_list = []
result_file = []
counter = ()
def extract_log(myfile):
#split the file line by line
for line in myfile:
splitData = line.split()
ipAddress = splitData[0]
numbers = splitData[1]
ipAddress.append(ipAddress)
numbers.append(numbers)
if numbers in mydict:
#if numbers is already a key in the dictionary
#increase the count
mydict[numbers] += 1
else:
# Otherwise if it's not yet in the dictionary
# Initialise it to 1
mydict[numbers] = 1
return numbers
myfile.close()
def find_most_frequent(maximum,iPdata):
with open("sample_log_1 text", "r") as myfile:
for text in myfile:
if str(maximum) in text:
return maximum
with open("resultss.csv", "w") as file:
file.write(maximum(maximum))
#This will put the dictionary into tuples and give each key a value
item_list = [(k, v) for k, v in mydict.items()]
#This will sort the list by v
item_list.sort(key=lambda x:x[1], reverse=True)
maximum = mydict()
def main(myfile,mydict,iPdata):
result_file = open("resultss.csv", "w")
main()
i had to fix the spacing for the code to be edited, i hope this is ok and you are able to run it, i have stuck on this for a while and i thought i was calling the functions too
Suppose your log file is like
15.25.7.3
25.25.2.5
25.25.2.5
115.25.7.3
215.25.7.3
25.25.2.5
Here is a simple way to count ips
ip_count_dict = {}
with open('ip.log', 'r') as f:
ip_file = f.read()
# if separated by coma
# ip_list = ip_file.split(',')
# if separated by \n new line
ip_list = ip_file.splitlines()
for ip in ip_list:
ip = ip.strip()
if ip in ip_count_dict:
ip_count_dict[ip] += 1
else:
ip_count_dict[ip] = 1
print(ip_count_dict)
Output: {'15.25.7.3': 1, '25.25.2.5': 3, '115.25.7.3': 1, '215.25.7.3': 1}
Instead of manually counting IPs as you loop through your log, try this:
from collections import Counter
log_entries = open("resultss.csv").read().split("\n")
ip_list = [log.split(",")[0] for log in log_entries]
counts = Counter(ip_list)
print(counts)
This works with a CSV file format like:
10.10.10.1,asdf,31
5.9.7.11,aajbczxz,54
5.9.7.11,zzzzz,2
I have text file which contains lines of text and IPs with port number and I want to remove port number and print just IP.
Example text file:
77.55.211.77:8080
NoIP
79.127.57.42:80
Desired output:
77.55.211.77
79.127.57.42
My code:
import re
with open('IPs.txt', 'r') as infile:
for ip in infile:
ip = ip.strip('\n')
IP_without_port_number = re.sub(r'((?::))(?:[0-9]+)$', "", ip)
re_for_IP = re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$',ip)
print(IP_without_port_number)
I am not understand why I see all lines as output when I am printing to console "IP_without_port_number"
All you need is the second match:
import re
with open('IPs.txt', 'r') as infile:
for ip in infile:
re_for_IP = re.match(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', ip)
if re_for_IP:
print(re_for_IP[0])
Output:
77.55.211.77
79.127.57.42
One-liner:
import re
ips = []
with open('IPs.txt', 'r') as infile:
ips = [ip[0] for ip in [re.match(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', ip) for ip in infile] if ip]
print(ips)
You don't need regex, use the split function on the : character when reading the line. Then you would be left with an array with two positions, the first containing only the IP address and the other containing the port.
Try this:
import re
regex = '''^(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(
25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(
25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.(
25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)$'''
with open('IP.txt', 'r') as infile:
for ip in infile:
ip = ip.strip('\n')
IP_without_port_number = re.sub(r':.*$', "", ip)
re_for_IP = re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$',ip)
if(re.search(regex, IP_without_port_number)):
print(IP_without_port_number)
Output:
77.55.211.77
79.127.57.42
I came up wit this regex code, it works for me and its easy.
import re
text = input("Input text: ")
pattern = re.findall(r'\d+\.\d+\.\d+\.\d+', text)
print(pattern)
I have this script below how check the countries of domaine names. and i want to put each country code in a specific file.
For example :
FR ==> FR.txt
US ==> US.TXT
The script output is like this :
domain.com [xx.xx.xx.xx]: US
Script:
#!/usr/bin/python
import socket
from geolite2 import geolite2
def origin(ip, domain_str, result):
print("{0} [{1}]: {2}".format(domain_str.strip(), ip, result))
def getip(domain_str):
ip = socket.gethostbyname(domain_str.strip())
reader = geolite2.reader()
output = reader.get(ip)
result = output['country']['iso_code']
origin(ip, domain_str, result)
with open("hostnames.txt", "r") as ins:
for domain_str in ins:
try:
getip(domain_str)
except socket.error as msg:
print("{0} [could not resolve]".format(domain_str.strip()))
if len(domain_str) > 2:
subdomain = domain_str.split('.', 1)[1]
try:
getip(subdomain)
except:
continue
geolite2.close()
I appreciate any help provided
def origin(ip, domain_str, result):
file_name = "{}.txt".format(result)
with open(file_name, 'a') as f:
f.write("{}".format(domain_str.strip()))
I wrote some code to extract email and IP addresses from bulk text. However, the code extracts only the email addresses. (The original text, which I would like to make understandable, is a typical log file). I don't know why the generated file does not give me back the IP addresses.
import os
import re
# 1
filename = 'errors.txt'
newfilename = 'emaillist-rev.txt'
# 2
if os.path.exists(filename):
data = open(filename,'r')
bulkemails = data.read()
else:
print "File not found."
raise SystemExit
# 3
r = re.compile(r'[\w\.-]+#[\w\.-]+')
results = r.findall(bulkemails)
emails = ""
for x in results:
emails += str(x)+"\n"
# 4
ip = re.compile('^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
result = ip.findall(bulkemails)
ip =""
for y in result:
ip += str(y)+"\n"
# 5
def writefile():
f = open(newfilename, 'w')
f.write(emails + ip)
f.close()
print "File written."
# 6
def overwrite_ok():
response = raw_input("Are you sure you want to overwrite "+str(newfilename)+"? Yes or No\n")
if response == "Yes":
writefile()
elif response == "No":
print "Aborted."
else:
print "Please enter Yes or No."
overwrite_ok()
# 7
if os.path.exists(newfilename):
overwrite_ok()
else:
writefile()
When declaring the ip regex, replace the anchors with word boundaries and mind you need to use a raw string literal.
ip = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')