Reconstruct HTTP Webpage from libpcap python script - python

I am trying to reconstruct a webpage from a libpcap file from a python script. I have all the packets so the goal I guess is to have a libpcap file as input and you find all the necessary packets and somehow have a webpage file as output with all pictures and data from that page. Can anyone get me started off in the right direction. I think I will need dkpt and/or scaPY.
Update 1: Code is below! Here is the code I have come up so far with in Python. It is suppose to grab the first set of packets from a single HTTP session beginning with a packet with the SYN and ACK flags set to 1 and ends with a packet that has the FIN flag set to 1.
Assuming there is only one website visited during the packet capture does this code append all the necessary packets needed to reconstruct the visited webpage?
Assuming I have all the necessary packets how do I reconstruct the webpage?
import scaPy
pktList = list() #create a list to store the packets we want to keep
pcap = rdpcap('myCapture.pcap') #returns a packet list with every packet in the pcap
count = 0 #will store the index of the syn-ack packet in pcap
for pkt in pcap: #loops through packet list named pcap one packet at a time
count = count + 1 #increments by 1
if pkt[TCP].flags == 0x12 and pkt[TCP].sport == 80: #if it is a SYN-ACK packet session has been initiated as http
break #breaks out of the for loop
currentPkt = count #loop from here
while pcap[currentPkt].flags&0x01 != 0x01: #while the FIN bit is set to 0 keep loops stop when it is a 1
if pcap[currentPkt].sport == 80 and pcap[currentPkt].dport == pcap[count].dport and pcap[currentPkt].src == pcap[count].src and pcap[currentPkt].dst == pcap[count].dst:
#if the src, dst ports and IP's are the same as the SYN-ACK packet then the http packets belong to this session and we want to keep them
pktList.append(pcap[currentPkt])
#once the loop exits we have hit the packet with the FIN flag set and now we need to reconstruct the packets from this list.
currentPkt = currentPkt + 1

Perhaps something like tcpick -r your.pcap -wRS does the job for you.
http://tcpick.sourceforge.net/?t=1&p=OPTIONS

This python script will extract all unencrypted HTTP webpages that are in a PCAP File and output them as HTML Files. It uses scaPY to work with the individual packets (another good python module is dpkt).
from scapy.all import *
from operator import *
import sys
def sorting(pcap):
newerList = list()
#remove everything not HTTP (anything not TCP or anything TCP and not HTTP (port 80)
#count = 0 #dont need this it was for testing
for x in pcap:
if x.haslayer(TCP) and x.sport == 80 and bin(x[TCP].flags)!="0b10100":
newerList.append(x);
newerList = sorted(newerList, key=itemgetter("IP.src","TCP.dport"))
wrpcap("sorted.pcap", newerList)
return newerList
def extract(pcap,num, count):
listCounter = count
counter = 0
#print listCounter
#Exit if we have reached the end of the the list of packets
if count >= len(pcap):
sys.exit()
#Create a new file and find the packet with the payload containing the beginning HTML code and write it to file
while listCounter != len(pcap):
thisFile = "file" + str(num) + ".html"
file = open(thisFile,"a")
s = str(pcap[listCounter][TCP].payload)
#print "S is: ", s
x,y,z = s.partition("<")
s = x + y + z #before was y+z
if s.find("<html") != -1:
file.write(s)
listCounter = listCounter + 1
break
listCounter = listCounter + 1
#Continue to loop through packets and write their contents until we find the close HTML tag and
#include that packet as well
counter = listCounter
while counter != len(pcap):
s = str(pcap[counter][TCP].payload)
if s.find("</html>") != -1:
file.write(s)
file.close
break
else:
file.write(s)
counter = counter + 1
#Recursively call the function incrementing the file name by 1
#and giving it the last spot in the PCAP we were in so we continue
#at the next PCAP
extract(pcap, num+1, counter)
if __name__ == "__main__":
#Read in file from user
f = raw_input("Please enter the name of your pcap file in this directory. Example: myFile.pcap")
pcapFile = rdpcap(f)
print "Filtering Pcap File of non HTTP Packets and then sorting packets"
#Sort and Filter the PCAP
pcapFile = sorting(pcapFile)
print "Sorting Complete"
print "Extracting Data"
#Extract the Data
extract(pcapFile,1,0)
Print "Extracting Complete"

Related

Need help creating a mac filter because I am stuck making a while loop

Alright so for a school assignment me and my classmate decided to make a macfiler/ids script in python3. We got pretty far but we are stuck at 1 thing. We need the scan to go on infinite and break when a new connection in the network is found and continue when the new connection is blocked or not. Could someone help us in the right direction?
from scapy.all import ARP, Ether, srp
import os
import netifaces
#Ask for the subnet mask and calculate it to CIDR notation
netmask = input("What is the subnetmask of your network? (xxx.xxx.xxx.xxx:)")
CIDR = sum(bin(int(x)).count('1') for x in netmask.split('.'))
#Find the defaultgateway ip
gateways = netifaces.gateways()
default_gateway = gateways['default'][netifaces.AF_INET][0]
#Conversions to make the code easier to read
target_ip = default_gateway + "/" + str(CIDR)
arp = ARP(pdst=target_ip)
ether = Ether(dst="ff:ff:ff:ff:ff:ff")
packet = ether/arp
result = srp(packet, timeout=3, verbose=0)[0]
#Empty list for the found clients in the network
clients = []
#Empty list for the blocked addresses
blockedMac = []
#This needs an infinite loop and break when a new connection is in the network is found
for sent, received in result:
clients.append({'ip': received.psrc, 'mac': received.hwsrc})
#if new connection is found do this
print("Available devices in the network:\n")
print("IP" + " "*18+"MAC")
for client in clients:
print("{:16} {}".format(client['ip'], client['mac']))
blockedConnectionStatus = input("\nWould you like to block a connection? Y/n:")
if blockedConnectionStatus == 'Y' or blockedConnectionStatus == 'y':
macAdress = input("\nEnter the MAC Address to block: \n")
#Open rules.v4 and read if the mac address is allready blocked
with open("/etc/iptables/rules.v4", "r") as f:
data = f.read()
if macAdress.upper() in data:
blockedMac.append(macAdress) #Append the blocked mac address to list blockedMac
print("Allready blocked!")
else:
print(macAdress + " is now being blocked!")
os.system("iptables -A INPUT -p ALL -m mac --mac-source " + macAdress + " -j DROP")
os.system("iptables-save > /etc/iptables/rules.v4")
blockedMac.append(macAdress) #Append the blocked mac address to list blockedMac
#Go back to infinite loop```
From my understanding, you want a while loop that only runs while something is true you could do something like this
loop = 'True'
while loop == 'True':
print('Worked!')
loop = 'False'
Let me know if it is not.

python retrieving web data

I am new at Python and I have been trying to figure out the following exercise.
Exercise 5: (Advanced) Change the socket program so that it only shows data after the headers and a blank line have been received. Remember that recv is receiving characters (newlines and all), not lines.
I attached below the code I came up with, unfortunately I don't think it is working:
import socket
mysocket=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
mysocket.connect(('data.pr4e.org', 80))
mysocket.send('GET http://data.pr4e.org/romeo.txt HTTP/1.0\r\n\r\n'.encode())
count=0
while True:
data = mysocket.recv(200)
if (len(data) < 1): break
count=count+len(data.decode().strip())
print(len(data),count)
if count >=399:
print(data.decode(),end="")
mysocket.close()
Instead of counting the number of lines received, just grab all the data you get and then split on the first double CRLF you find.
resp = []
while True:
data = mysocket.recv(200)
if not data: break
resp.append(data.decode())
mysocket.close()
resp = "".join(resp)
body = resp.partition('\r\n\r\n')[2]
print(body)

"IndexError: string index out of range" when loop is already ended! - Python 3

I'm recently studiying sockets trying to make them work inside a Python script (Python3), inside Windows.
Here the Python script of the server side.
import socket
import time
MSGLEN = 2048
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.bind(('localhost', 8000))
server.listen(1)
while 1:
#accept connections from outside
(clientsocket, address) = server.accept()
chunks = []
bytes_recd = 0
while bytes_recd < MSGLEN:
chunk = clientsocket.recv(min(MSGLEN - bytes_recd, 2048)) #should enough this row without checks if transmission guaranteed inside buffer dimension
#print(chunk)
#i=0
chunk = chunk.decode()
bytes_recd = bytes_recd + len(chunk)
chunks.append(chunk)
for i in range(bytes_recd):
if(chunk[i] == "_"):
print("Breaking(_Bad?)")
break
buff_str = chunk[:i]
print(buff_str)
if chunk == '':
print("Server notification: connection broken")
break
mex = ''.join(chunks)
print("Server notification: \n\tIncoming data: " + mex)
i=1;
while i==1:
chunk = clientsocket.recv(128)
chunk = chunk.decode()
if chunk == '':
i = 0
totalsent = 0
msg = "Server notification: data received"
while totalsent < MSGLEN:
sent = clientsocket.send(bytes(msg[totalsent:], 'UTF-8'))
if sent == 0 :
print ("Server notification: end transmitting")
break
totalsent = totalsent + sent
I'm checking when a "_" is received and make some decision in it. This because I'm using blocking sockets. You should forget the very last part and the whole program functionality since I'm working on it and the incriminated part is here:
for i in range(bytes_recd):
if(chunk[i] == "_"):
print("Breaking(_Bad?)")
break
buff_str = chunk[:i]
Something weird happens: the check works fine and break the loop by printing the rest at the right index value. BUT! This wild and apparently non-sense error appears:
>>>
Breaking(_Bad?), i: 2
13
Traceback (most recent call last):
File "C:\Users\TheXeno\Dropbox\Firmwares\Altri\server.py", line 24, in <module>
if(chunk[i] == "_"):
IndexError: string index out of range
As you can see from the console output, it finds the number before the "_", in this case is the string "13" and is located at i = 2, which is compliant with the receiving string format form the socket: "charNumber_String". But seems to keep counting until exit from bounds.
EDIT: I will not rename the variables, but next time, better use improved names, and not "chunk" and "chunks".
Let's look at this block of code:
while bytes_recd < MSGLEN:
chunk = clientsocket.recv(min(MSGLEN - bytes_recd, 2048))
chunk = chunk.decode()
bytes_recd = bytes_recd + len(chunk)
chunks.append(chunk)
for i in range(bytes_recd):
if(chunk[i] == "_"):
print("Breaking(_Bad?)")
break
Lets say you read 100 bytes, and lets assume that the decoded chunk is the same length as the encoded chunk. bytes_recd will be 100, and your for loop goes from zero to 99, and all is well.
Now you read another 100 bytes. chunk is again 100 bytes long, and chunks (with an "s") is 200 bytes. bytes_recd is now 200. Your for loop now goes from 0 to 199, and you're checking chunk[i]. chunk is only 100 bytes long, so when i gets past 99, you get the error.
Maybe you meant to compare chunks[i] (with an "s")?
try:
for i, chunk in enumerate(chunks):
if(chunk == "_"):
print("Breaking(_Bad?)")
break
This way you never go out of bounds. So one error less :)

How to receive data from a socket, process, and send back data in python?

i have the following code:
import socket # Import socket module
import sys
s = socket.socket() # Create a socket object
host = '' # Get local machine name
port = 1234 # Reserve a port for your service.
s.connect((host, port))
while 1:
data = s.recv(1024)
print ' data ' , data
d = data.split('?') # parsing values from server
if len(d) < 2:
# if does not contain ?, do nothing
continue
else:
a = d[0]
b = d[1].replace('\n', '')
# check how a compares to b, and send response accordingly
if (a > b):
s.send('1')
elif (a == b):
s.send('2')
else:
s.send('3')
s.close() # Close the socket when done
Without the processing code I have, it works fine if I just send a random value. But with the code above, I can only parse the first set of line, and then it stops. (I assume it closes the socket or something?)
The data coming from the socket looks like '1 ? 23' or '23 ? 1' , etc. it expects a response that determines how the two numbers relate.
In comparison, if I have this code:
import socket # Import socket module
import sys
s = socket.socket() # Create a socket object
host = '' # Get local machine name
port = 1234 # Reserve a port for your service.
s.connect((host, port))
backlog = ''
while 1:
data = s.recv(1024)
sp = data.split('\n')
if len(sp) < 2:
backlog += data
continue
line = backlog + sp[0]
backlog = sp[1]
data = line
print ' data ' , data
if not data:
break
s.send ('2')
s.close() # Close the socket when done
This code will yield a server response of either 'Correct!' or 'Incorrect...try again!' depending on whether it's right or wrong.
You seem to assume that you always get a full line with each read() call. That is wrong.
You should split your input into lines, and only if you have a full line, you proceed.
backlog = ''
while 1:
data = s.recv(1024)
# do we have a line break?
sp = data.split('\n')
if len(sp) < 2:
# no, we haven't...
backlog += data
continue
# yes, we have.
line = backlog + sp[0] # first part is the now complete line.
backlog = sp[1] # 2nd part is the start of the new line.
print ' line ' , line
d = line.split('?') # parsing values from server
if len(d) < 2:
# if does not contain ?, do nothing
continue
else:
a = int(d[0]) # we want to compare numbers, not strings.
b = int(d[1])
# check how a compares to b, and send response accordingly
if (a > b):
s.send('1')
elif (a == b):
s.send('2')
else:
s.send('3')
Try out what happens now.
Another question which occurs to me is what exactly does the server expect? Really only one byte? Or rather '1\n', '2\n', '3\n'?

Python checking for serial string in If statement

As a newbie to python, I'm trying to use it to read a file and write each line of the file to the RS-232 port. My code bellow seems to work for the most part, except for my listen and react segments. From poking around, it seems that my if statements can't read if I've received a "Start\r", or "End\r" string from my device (RS-232). Can anyone provide feedback on what is missing?
import serial
import time
port = "/dev/ttyS0"
speed = 9600
print("\n\n\n\nScript Starting\n\n\n")
ser = serial.Serial(port, speed, timeout=0)
ser.flushInput() #flush input buffer, discarding all its contents
ser.flushOutput()#flush output buffer, aborting current output and discard all that is in buffer
text_file = open("my.file", "r")
lines = text_file.read().split('\n')
i = 0
counter = 0
while i<len(lines):
response = ser.readline()
if (counter == 0):
print("\n\nProbing With Off Data\n")
ser.write('FFF')
ser.write('\r')
counter+=1
if (response == 'Start'):
ser.write('FFF')
ser.write('\r')
if (response == 'End'):
print("\nString Transmitted:")
print lines
make_list_a_string = ''.join(map(str, lines))
ser.write(make_list_a_string)
ser.write('\r')
print("\n")
i+=1
text_file.close()
exit(0)
Try using strip() to get rid of any trailing or preceding '\r's:
if (response.strip() == 'Start'):

Categories

Resources