forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
http-parse-complete.py
298 lines (255 loc) · 10.6 KB
/
http-parse-complete.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
#!/usr/bin/python
#
#Bertrone Matteo - Polytechnic of Turin
#November 2015
#
#eBPF application that parses HTTP packets
#and extracts (and prints on screen) the URL contained in the GET/POST request.
#
#eBPF program http_filter is used as SOCKET_FILTER attached to eth0 interface.
#only packet of type ip and tcp containing HTTP GET/POST are returned to userspace, others dropped
#
#python script uses bcc BPF Compiler Collection by iovisor (https://github.com/iovisor/bcc)
#and prints on stdout the first line of the HTTP GET/POST request containing the url
from __future__ import print_function
from bcc import BPF
from ctypes import *
from struct import *
from sys import argv
import sys
import socket
import os
import struct
import binascii
import time
CLEANUP_N_PACKETS = 50 #run cleanup every CLEANUP_N_PACKETS packets received
MAX_URL_STRING_LEN = 8192 #max url string len (usually 8K)
MAX_AGE_SECONDS = 30 #max age entry in bpf_sessions map
#convert a bin string into a string of hex char
#helper function to print raw packet in hex
def toHex(s):
lst = []
for ch in s:
hv = hex(ord(ch)).replace('0x', '')
if len(hv) == 1:
hv = '0'+hv
lst.append(hv)
return reduce(lambda x,y:x+y, lst)
#print str until CR+LF
def printUntilCRLF(str):
for k in range (0,len(str)-1):
if (str[k] == '\n'):
if (str[k-1] == '\r'):
print ("")
return
print ("%c" % (str[k]), end = "")
print("")
return
#cleanup function
def cleanup():
#get current time in seconds
current_time = int(time.time())
#looking for leaf having:
#timestap == 0 --> update with current timestamp
#AGE > MAX_AGE_SECONDS --> delete item
for key,leaf in bpf_sessions.items():
try:
current_leaf = bpf_sessions[key]
#set timestamp if timestamp == 0
if (current_leaf.timestamp == 0):
bpf_sessions[key] = bpf_sessions.Leaf(current_time)
else:
#delete older entries
if (current_time - current_leaf.timestamp > MAX_AGE_SECONDS):
del bpf_sessions[key]
except:
print("cleanup exception.")
return
#args
def usage():
print("USAGE: %s [-i <if_name>]" % argv[0])
print("")
print("Try '%s -h' for more options." % argv[0])
exit()
#help
def help():
print("USAGE: %s [-i <if_name>]" % argv[0])
print("")
print("optional arguments:")
print(" -h print this help")
print(" -i if_name select interface if_name. Default is eth0")
print("")
print("examples:")
print(" http-parse # bind socket to eth0")
print(" http-parse -i wlan0 # bind socket to wlan0")
exit()
#arguments
interface="eth0"
if len(argv) == 2:
if str(argv[1]) == '-h':
help()
else:
usage()
if len(argv) == 3:
if str(argv[1]) == '-i':
interface = argv[2]
else:
usage()
if len(argv) > 3:
usage()
print ("binding socket to '%s'" % interface)
# initialize BPF - load source code from http-parse-complete.c
bpf = BPF(src_file = "http-parse-complete.c",debug = 0)
#load eBPF program http_filter of type SOCKET_FILTER into the kernel eBPF vm
#more info about eBPF program types
#https://man7.org/linux/man-pages/man2/bpf.2.html
function_http_filter = bpf.load_func("http_filter", BPF.SOCKET_FILTER)
#create raw socket, bind it to interface
#attach bpf program to socket created
BPF.attach_raw_socket(function_http_filter, interface)
#get file descriptor of the socket previously created inside BPF.attach_raw_socket
socket_fd = function_http_filter.sock
#create python socket object, from the file descriptor
sock = socket.fromfd(socket_fd,socket.PF_PACKET,socket.SOCK_RAW,socket.IPPROTO_IP)
#set it as blocking socket
sock.setblocking(True)
#get pointer to bpf map of type hash
bpf_sessions = bpf.get_table("sessions")
#packets counter
packet_count = 0
#dictionary containing association <key(ipsrc,ipdst,portsrc,portdst),payload_string>
#if url is not entirely contained in only one packet, save the firt part of it in this local dict
#when I find \r\n in a next pkt, append and print all the url
local_dictionary = {}
while 1:
#retrieve raw packet from socket
packet_str = os.read(socket_fd,4096) #set packet length to max packet length on the interface
packet_count += 1
#DEBUG - print raw packet in hex format
#packet_hex = toHex(packet_str)
#print ("%s" % packet_hex)
#convert packet into bytearray
packet_bytearray = bytearray(packet_str)
#ethernet header length
ETH_HLEN = 14
#IP HEADER
#https://tools.ietf.org/html/rfc791
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# |Version| IHL |Type of Service| Total Length |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
#IHL : Internet Header Length is the length of the internet header
#value to multiply * 4 byte
#e.g. IHL = 5 ; IP Header Length = 5 * 4 byte = 20 byte
#
#Total length: This 16-bit field defines the entire packet size,
#including header and data, in bytes.
#calculate packet total length
total_length = packet_bytearray[ETH_HLEN + 2] #load MSB
total_length = total_length << 8 #shift MSB
total_length = total_length + packet_bytearray[ETH_HLEN+3] #add LSB
#calculate ip header length
ip_header_length = packet_bytearray[ETH_HLEN] #load Byte
ip_header_length = ip_header_length & 0x0F #mask bits 0..3
ip_header_length = ip_header_length << 2 #shift to obtain length
#retrieve ip source/dest
ip_src_str = packet_str[ETH_HLEN+12:ETH_HLEN+16] #ip source offset 12..15
ip_dst_str = packet_str[ETH_HLEN+16:ETH_HLEN+20] #ip dest offset 16..19
ip_src = int(toHex(ip_src_str),16)
ip_dst = int(toHex(ip_dst_str),16)
#TCP HEADER
#https://www.rfc-editor.org/rfc/rfc793.txt
# 12 13 14 15
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | Data | |U|A|P|R|S|F| |
# | Offset| Reserved |R|C|S|S|Y|I| Window |
# | | |G|K|H|T|N|N| |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
#Data Offset: This indicates where the data begins.
#The TCP header is an integral number of 32 bits long.
#value to multiply * 4 byte
#e.g. DataOffset = 5 ; TCP Header Length = 5 * 4 byte = 20 byte
#calculate tcp header length
tcp_header_length = packet_bytearray[ETH_HLEN + ip_header_length + 12] #load Byte
tcp_header_length = tcp_header_length & 0xF0 #mask bit 4..7
tcp_header_length = tcp_header_length >> 2 #SHR 4 ; SHL 2 -> SHR 2
#retrieve port source/dest
port_src_str = packet_str[ETH_HLEN+ip_header_length:ETH_HLEN+ip_header_length+2]
port_dst_str = packet_str[ETH_HLEN+ip_header_length+2:ETH_HLEN+ip_header_length+4]
port_src = int(toHex(port_src_str),16)
port_dst = int(toHex(port_dst_str),16)
#calculate payload offset
payload_offset = ETH_HLEN + ip_header_length + tcp_header_length
#payload_string contains only packet payload
payload_string = packet_str[(payload_offset):(len(packet_bytearray))]
#CR + LF (substring to find)
crlf = "\r\n"
#current_Key contains ip source/dest and port source/map
#useful for direct bpf_sessions map access
current_Key = bpf_sessions.Key(ip_src,ip_dst,port_src,port_dst)
#looking for HTTP GET/POST request
if ((payload_string[:3] == "GET") or (payload_string[:4] == "POST") or (payload_string[:4] == "HTTP") \
or ( payload_string[:3] == "PUT") or (payload_string[:6] == "DELETE") or (payload_string[:4] == "HEAD") ):
#match: HTTP GET/POST packet found
if (crlf in payload_string):
#url entirely contained in first packet -> print it all
printUntilCRLF(payload_string)
#delete current_Key from bpf_sessions, url already printed. current session not useful anymore
try:
del bpf_sessions[current_Key]
except:
print ("error during delete from bpf map ")
else:
#url NOT entirely contained in first packet
#not found \r\n in payload.
#save current part of the payload_string in dictionary <key(ips,ipd,ports,portd),payload_string>
local_dictionary[binascii.hexlify(current_Key)] = payload_string
else:
#NO match: HTTP GET/POST NOT found
#check if the packet belong to a session saved in bpf_sessions
if (current_Key in bpf_sessions):
#check id the packet belong to a session saved in local_dictionary
#(local_dictionary maintains HTTP GET/POST url not printed yet because split in N packets)
if (binascii.hexlify(current_Key) in local_dictionary):
#first part of the HTTP GET/POST url is already present in local dictionary (prev_payload_string)
prev_payload_string = local_dictionary[binascii.hexlify(current_Key)]
#looking for CR+LF in current packet.
if (crlf in payload_string):
#last packet. containing last part of HTTP GET/POST url split in N packets.
#append current payload
prev_payload_string += payload_string
#print HTTP GET/POST url
printUntilCRLF(prev_payload_string)
#clean bpf_sessions & local_dictionary
try:
del bpf_sessions[current_Key]
del local_dictionary[binascii.hexlify(current_Key)]
except:
print ("error deleting from map or dictionary")
else:
#NOT last packet. containing part of HTTP GET/POST url split in N packets.
#append current payload
prev_payload_string += payload_string
#check if not size exceeding (usually HTTP GET/POST url < 8K )
if (len(prev_payload_string) > MAX_URL_STRING_LEN):
print("url too long")
try:
del bpf_sessions[current_Key]
del local_dictionary[binascii.hexlify(current_Key)]
except:
print ("error deleting from map or dict")
#update dictionary
local_dictionary[binascii.hexlify(current_Key)] = prev_payload_string
else:
#first part of the HTTP GET/POST url is NOT present in local dictionary
#bpf_sessions contains invalid entry -> delete it
try:
del bpf_sessions[current_Key]
except:
print ("error del bpf_session")
#check if dirty entry are present in bpf_sessions
if (((packet_count) % CLEANUP_N_PACKETS) == 0):
cleanup()