#!/usr/bin/perl
# (C) 2006 - 2009 Mark Boddington, http://www.badpenguin.co.uk
# Portions (C) 2009 Damien Mascord, http://www.tusker.org
# Licensed Under the GNU GPL Version 2.
#
# pcap-util Version 2.0
# This is a utility to reduce pcap files down to manageable sizes. 
# You can either extract packets captured between specified times,  
# split the dump into smaller files or extract packets matching a
# tcpdump filter.
#
# NEW in 2.0 - You can search for strings within the application 
# data using regular expressions. Either extract matching packets 
# using "search", or replace matches using "modify".
#
# Modify is great for stripping out sensitive data from your 
# captures, before sending them into technical support. PCI-DSS
# friendly! Thanks to Damien for the new "modify" feature.

use Net::Pcap;
use Switch;
use Time::Local;
use NetPacket::Ethernet qw(:ALL);   # parse packets
use NetPacket::IP;                  # parse packets
use NetPacket::IP qw/ :protos /;
use NetPacket::TCP;
use NetPacket::UDP;

# need to store these globally so that the processMatch function can see these
#my $regexmatch, $regexreplace, $outfile, $dump_out;

# Size of a pcap packet header. I'm guessing ;-)
use constant PCAP_PKTHDR => 18;

# Packets can arrive out of order. How long shall we wait for a late packet?
# Value is in seconds and we will keep processing until endtime + this value.
use constant PCAP_LATE_PKTS => 2;

# =============================================================================
# Begin Subroutine: printUsage
# =============================================================================
# Print the script usage information to the standard error
# =============================================================================
# Input  : NULL
# Output : NULL
# =============================================================================
sub printUsage
{
	print STDERR "\n";
	print STDERR "This utility will take a pcap file from a packet capture program like tcpdump\n"; 
	print STDERR "and split it into smaller parts to aid analysis. There are four options.\n\n";
	print STDERR " 1. You can split the file into several smaller ones of x bytes in length\n";
	print STDERR " 2. You can extract packets that fall within a specified time period\n";
	print STDERR " 3. You can extract packets that match a libpcap filter string.\n";
	print STDERR " 4. You can extract or modify packets that contain data matching a regular expression.\n\n";
	print STDERR "============================================================\n";
	print STDERR "   Simple Splitting Options \n";
	print STDERR "============================================================\n";
	print STDERR "Split into smaller files\n------------------------\n";
	print STDERR "$0 split <infile> <outfile prefix> <size in MB>\n\n";
	print STDERR "Extract packets from time period\n--------------------------------\n";
	print STDERR "$0 time <infile> <outfile> <Start time> <End time>\n\n";
	print STDERR "** NOTE: Time format should be YYYY-MM-DD:hh:mm:ss **\n\n";
	print STDERR "============================================================\n";
	print STDERR "   Connection Data Options (Layers 2-4)  \n";
	print STDERR "============================================================\n";
	print STDERR "Extract packets using libpcap filter language\n";
	print STDERR "---------------------------------------------\n";
	print STDERR "$0 filter <infile> <outfile> \"libpcap filter string\"\n\n";
	print STDERR "============================================================\n";
	print STDERR "   Application Layer (Layer 7) Matching Options \n";
	print STDERR "============================================================\n";
	print STDERR "Extract only the packets which contain data matching a regex\n";
	print STDERR "------------------------------------------------------------\n";
	print STDERR "$0 search <infile> <outfile> \"regex match string\"\n\n";
	print STDERR "Modify packets using a regex\n";
	print STDERR "----------------------------\n";
	print STDERR "$0 modify <infile> <outfile> \"regex match string\" \"regex replace string\"\n\n";
}

# =============================================================================
# Begin Subroutine: openOutFile
# =============================================================================
# Open the output file via the pcap library dump_open and return the file
# pointer to the caller.
# =============================================================================
# Input  : $outfile - the name of the output file to write
# Input  : $packets - the packet capture descriptor
# Output : $dump_out - return the file descriptor for the new savefile
# =============================================================================
sub openOutFile
{
	my ( $outfile, $packets ) = @_;
	my $dump_out;
	if ( ! ( $dump_out = Net::Pcap::dump_open($packets, $outfile) ) )
	{
		$error = Net::Pcap::geterr($packets);
		die("Failed to open output file : $error\n");
	}
	return $dump_out;
}

# =============================================================================
# Begin Subroutine: processTime
# =============================================================================
# process the input file writing the packets that fall between starttime and
# endtime to the outfile. Packets are read from the packet capture descriptor.
# =============================================================================
# Input  : $packets - the packet capture descriptor
# Input  : $starttime - the start of the period to extract packets from
# Input  : $endtime - the end of the period to extract packets from
# Input  : $outfile - the name of the output file to open via OpenOutFile
# Output : NULL
# =============================================================================
sub processTime
{
	my ( $packets, $starttime, $endtime, $outfile ) = @_;
	my %header;
	my $index=0;
	my $found=0;
	my $dump_out = openOutFile( $outfile, $packets);
	my $curpkt = Net::Pcap::next( $packets, \%header );

	my @st = split(/[:-]/,$starttime);
	my @et = split(/[:-]/,$endtime);
	my $st = timelocal(@st[5],@st[4],@st[3],@st[2],@st[1]-1,@st[0]);
	my $et = timelocal(@et[5],@et[4],@et[3],@et[2],@et[1]-1,@et[0]);
	
	while ( %header )
	{
		$index++;
		if ( %header->{'tv_sec'} >= $st and %header->{'tv_sec'} <= $et )
		{
			Net::Pcap::dump($dump_out, \%header, $curpkt);
			$found++;
		} elsif ( %header->{'tv_sec'} > ( $et + PCAP_LATE_PKTS ) ) {
			last;
		}
		if ( $index % 1000 == 0 ) { 
			print "\rPackets processed: $index, found: $found, last timestamp: " . %header->{'tv_sec'}; 
		}
		undef %header;
		$curpkt = Net::Pcap::next($packets,\%header);
	}
	print "\n====> Done <==== \n";
	Net::Pcap::dump_close($dump_out);
}

# =============================================================================
# Begin Subroutine: processModify
# =============================================================================
# process the input file writing and modifying the packets that match the regex
# =============================================================================
# Input  : $packets - the packet capture descriptor
# Input  : $regexmatch - regex to do the match
# Input  : $regexreplace - replace the match with this string
# Input  : $outfile - the name of the output file to open via OpenOutFile
# Output : NULL
# =============================================================================
sub processModify
{
	my $packets;
	( $packets, $regexmatch, $regexreplace, $outfile ) = @_;
	my %header;
	my $index=0;
	my $found=0;
	$dump_out = openOutFile( $outfile, $packets);
	
	$curpkt = Net::Pcap::next($packets,\%header);
	while ( %header )
	{
		$index++;
		
		# Decode the layer 2 data
    		$eth_obj = NetPacket::Ethernet->decode($curpkt);

		if ($eth_obj->{type} != ETH_TYPE_IP) {
			print "Not handling ethertype: " . $eth_obj->{type} . "\n";
		} else {

			# Decode the packet to get IP data
			$ip_obj = NetPacket::IP->decode($eth_obj->{data});

			if ( $ip_obj->{data} =~ m/$regexmatch/ )
			{
				# print "Match! \n";
				
				# Print some information
				#print "Packet: $pkt_cnt\t";
				#print localtime($header->{tv_sec}) . "\t";
				#print $header->{len} . " bytes\t";
				#print "Src: " . $ip_obj->{src_ip} . "\t";
				#print "Dst: " . $ip_obj->{dest_ip} . "\t";
				# print "Proto: " . $ip_obj->{proto} . "\t";
				# print "Data: " . $ip_obj->{data} . "\n";
				
				if ( $ip_obj->{proto} == IP_PROTO_TCP ) {

					$proto = NetPacket::TCP->decode( $ip_obj->{'data'} );
					# print "TCP data:" .  $proto->{'data'} . "\n";
					
					$proto->{'data'} =~ s/$regexmatch/$regexreplace/g;
				
					# print "Modded Data: " . $proto->{'data'} . "\n";

					#re-encode the packet
					$ip_obj->{'data'} = $proto->encode($ip_obj);
					$new_packet_data = $ip_obj->encode;
					
					#print "ip obj: $ip_obj\n";
					#print "new packet: $new_packet\n";
					#print "old packet: $curpkt\n";
					# 
					#$new_packet->{'header'} = $curpkt->{'header'};
					#print "new packet: $new_packet\n";
					# HOW?!
					#$curpkt =~ s/$regexmatch/$regexreplace/g;
					
					# manually unpack our current packet
					($dm_hi, $dm_lo, $sm_hi, $sm_lo, $packet_type, $packet_data) = unpack('NnNnna*' , $curpkt);

					# manually repack with the same ethernet details
					$newpkt = pack('NnNnna*', $dm_hi, $dm_lo, $sm_hi, $sm_lo, $eth_obj->{'type'}, $new_packet_data);
					
					# $new_eth_obj = NetPacket::Ethernet->decode($newpkt);
					# $new_ip_obj = NetPacket::IP->decode($new_eth_obj->{data});
					# $new_proto = NetPacket::TCP->decode( $new_ip_obj->{'data'} );
					
					# print "Modded Data: " . $new_proto->{'data'} . "\n";
					
					#print "old packet: " . $curpkt . "\n";
					#print "new packet: " . $newpkt . "\n";
					
					#print "writing modified packet...\n";
					Net::Pcap::dump($dump_out, \%header, $newpkt);

				} elsif ( $ip_obj->{proto} == IP_PROTO_UDP ) {

					$proto = NetPacket::UDP->decode( $ip_obj->{'data'} );
					print "UDP data:" .  $proto->{'data'} . "\n";

				} else {
					#   Unsupported network packet protocol - Currently, only TCP 
					#   and UDP packets
					#   are decoded with all other packet types silently dropped by this 
					#   accounting process.

				}

			} else {
				#print "writing unmodified packet...\n";
				Net::Pcap::dump($dump_out, \%header, $curpkt);
			}
			
		}
		undef %header;
		$curpkt = Net::Pcap::next($packets,\%header);
	}
		
	print "\n====> Done <==== \n";
	Net::Pcap::dump_close($dump_out);
}

# =============================================================================
# Begin Subroutine: processSearch
# =============================================================================
# process the input file writing packets with data matching the regex only
# =============================================================================
# Input  : $packets - the packet capture descriptor
# Input  : $regexmatch - regex to do the match
# Input  : $outfile - the name of the output file to open via OpenOutFile
# Output : NULL
# =============================================================================
sub processSearch
{
	my $packets;
	( $packets, $regexmatch, $outfile ) = @_;
	my %header;
	my $index=0;
	my $found=0;
	$dump_out = openOutFile( $outfile, $packets);
	
	$curpkt = Net::Pcap::next($packets,\%header);
	while ( %header )
	{
		$index++;
		
		# Decode the layer 2 data
    		$eth_obj = NetPacket::Ethernet->decode($curpkt);

		if ($eth_obj->{type} != ETH_TYPE_IP) {
			print "Not handling ethertype: " . $eth_obj->{type} . "\n";
		} else {

			# Decode the packet to get IP data
			$ip_obj = NetPacket::IP->decode($eth_obj->{data});

			if ( $ip_obj->{data} =~ m/$regexmatch/ )
			{
				Net::Pcap::dump($dump_out, \%header, $curpkt);
			}
		}
		undef %header;
		$curpkt = Net::Pcap::next($packets,\%header);
	}
		
	print "\n====> Done <==== \n";
	Net::Pcap::dump_close($dump_out);

}

# =============================================================================
# Begin Subroutine: processSize
# =============================================================================
# Process the input file, writing packets to the output file until they exceed
# the filesize. Then open a new output file and continue writing.
# =============================================================================
# Input  : $packets - the packet capture descriptor.
# Input  : $filesize - the maximum size of our output files.
# Input  : $outfile - the file prefix for our output files.
# Output : NULL
# =============================================================================
sub processSize
{
	my ($packets, $filesize, $outfile) = @_;
	my %header;
	my $cursize=0;
	my $fileindex=0;
	
	my $dump_out = openOutFile( "$outfile.$fileindex.tcpdump", $packets);
	my $curpkt = Net::Pcap::next( $packets, \%header );
	print "Writing file $outfile.$fileindex.tcpdump\n";

	while ( %header )
	{
		$cursize += %header->{'caplen'} + PCAP_PKTHDR;
		Net::Pcap::dump($dump_out, \%header, $curpkt);
		if ( $cursize > $filesize )
		{
			Net::Pcap::dump_close($dump_out);
			$fileindex++;
			$cursize = 0;
			$dump_out = openOutFile( "$outfile.$fileindex.tcpdump", $packets);
			print "Writing file $outfile.$fileindex.tcpdump\n";
		}
		undef %header;
		$curpkt = Net::Pcap::next($packets,\%header);
	}
	print "\n====> Done <==== \n";
	Net::Pcap::dump_close($dump_out);
}

# =============================================================================
# Begin Subroutine: processFilter
# =============================================================================
# Process the input file, writing packets to the output file that match the pcap
# filter string.
# =============================================================================
# Input  : $packets - the packet capture descriptor.
# Input  : $filterStr - the filter string.
# Input  : $outfile - the file prefix for our output files.
# Output : NULL
# =============================================================================
sub processFilter
{
	my ( $packets, $filterStr, $outfile) = @_;
	my $filter;
	my $optimise = 0;
	my $netmask = "255.255.255.255";
	my $dump_out = openOutFile( $outfile, $packets);

	if ( Net::Pcap::compile($packets, \$filter, "$filterStr", $optimise, $netmask) == -1 )
	{
		print STDERR "Failed to compile the filter string: $filterStr\n";
		return(-1);
	}
	
	Net::Pcap::setfilter($packets, $filter);

	print "Writing packets matching \"$filterStr\" to $outfile\n";
	my $curpkt = Net::Pcap::next( $packets, \%header );
	while ( %header )
	{
		Net::Pcap::dump($dump_out, \%header, $curpkt);
		undef %header;
		$curpkt = Net::Pcap::next($packets,\%header);
	}
	print "\n====> Done <==== \n";
	Net::Pcap::dump_close($dump_out);

}

# We either need 4 or 5 arguments, if we else print the usage and exit.
if ( @ARGV < 4 )
{
	printUsage;
	exit;
}

my $command = $ARGV[0];
my $file = $ARGV[1];
my $outfile = $ARGV[2];
my $error;
my $packets; 

# Open the capture file we are processing. If we can't print an error and exit.
if ( ! ( $packets = Net::Pcap::open_offline($file, \$error) ) )
{
	die("Failed to open input file : $error\n");
}

switch ($command)
{
	case "time" 
	{ 
		my $starttime  = $ARGV[3];
		my $endtime = $ARGV[4];
		processTime($packets, $starttime, $endtime, $outfile);
		Net::Pcap::close($packets);
		last; 
	} 
	case "split"
	{
		my $filesize = $ARGV[3];
		$filesize = $filesize * 1024 * 1024;
		processSize($packets, $filesize, $outfile);
		Net::Pcap::close($packets);
		last;
	} 
	case "filter"
	{
		my $filterStr = $ARGV[3];
		processFilter($packets, $filterStr, $outfile);
		Net::Pcap::close($packets);
		last;
	}
	case "modify"
	{
		my $regexmatch = $ARGV[3];
		my $regexreplace = $ARGV[4];
		processModify($packets, $regexmatch, $regexreplace, $outfile);
		Net::Pcap::close($packets);
		last;
	}
	case "search"
	{
		my $regexmatch = $ARGV[3];
		processSearch($packets, $regexmatch, $outfile);
		Net::Pcap::close($packets);
		last;
	}
	else
	{
		# no valid command given. print the usage information.
		printUsage();
	}
} 



