Index: dist/pf/share/man/man4/pfsync.4
===================================================================
RCS file: dist/pf/share/man/man4/pfsync.4
diff -N dist/pf/share/man/man4/pfsync.4
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ dist/pf/share/man/man4/pfsync.4	1 Aug 2009 14:51:54 -0000
@@ -0,0 +1,245 @@
+.\"	$OpenBSD: pfsync.4,v 1.25 2007/05/31 19:19:51 jmc Exp $
+.\"
+.\" Copyright (c) 2002 Michael Shalayeff
+.\" Copyright (c) 2003-2004 Ryan McBride
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF MIND,
+.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd $Mdocdate: May 31 2007 $
+.Dt PFSYNC 4
+.Os
+.Sh NAME
+.Nm pfsync
+.Nd packet filter state table logging interface
+.Sh SYNOPSIS
+.Cd "pseudo-device pfsync"
+.Sh DESCRIPTION
+The
+.Nm
+interface is a pseudo-device which exposes certain changes to the state
+table used by
+.Xr pf 4 .
+State changes can be viewed by invoking
+.Xr tcpdump 8
+on the
+.Nm
+interface.
+If configured with a physical synchronisation interface,
+.Nm
+will also send state changes out on that interface using IP multicast,
+and insert state changes received on that interface from other systems
+into the state table.
+.Pp
+By default, all local changes to the state table are exposed via
+.Nm .
+However, state changes from packets received by
+.Nm
+over the network are not rebroadcast.
+States created by a rule marked with the
+.Ar no-sync
+keyword are omitted from the
+.Nm
+interface (see
+.Xr pf.conf 5
+for details).
+.Pp
+The
+.Nm
+interface will attempt to collapse multiple updates of the same
+state into one message where possible.
+The maximum number of times this can be done before the update is sent out
+is controlled by the
+.Ar maxupd
+parameter to ifconfig
+(see
+.Xr ifconfig 8
+and the example below for more details).
+.Pp
+Each packet retrieved on this interface has a header associated
+with it of length
+.Dv PFSYNC_HDRLEN .
+The header indicates the version of the protocol, address family,
+action taken on the following states, and the number of state
+table entries attached in this packet.
+This structure is defined in
+.Aq Pa net/if_pfsync.h
+as:
+.Bd -literal -offset indent
+struct pfsync_header {
+	u_int8_t version;
+	u_int8_t af;
+	u_int8_t action;
+	u_int8_t count;
+};
+.Ed
+.Sh NETWORK SYNCHRONISATION
+States can be synchronised between two or more firewalls using this
+interface, by specifying a synchronisation interface using
+.Xr ifconfig 8 .
+For example, the following command sets fxp0 as the synchronisation
+interface:
+.Bd -literal -offset indent
+# ifconfig pfsync0 syncdev fxp0
+.Ed
+.Pp
+By default, state change messages are sent out on the synchronisation
+interface using IP multicast packets.
+The protocol is IP protocol 240, PFSYNC, and the multicast group
+used is 224.0.0.240.
+When a peer address is specified using the
+.Ic syncpeer
+keyword, the peer address is used as a destination for the pfsync traffic,
+and the traffic can then be protected using
+.Xr ipsec 4 .
+In such a configuration, the syncdev should be set to the
+.Xr enc 4
+interface, as this is where the traffic arrives when it is decapsulated,
+e.g.:
+.Bd -literal -offset indent
+# ifconfig pfsync0 syncpeer 10.0.0.2 syncdev enc0
+.Ed
+.Pp
+It is important that the pfsync traffic be well secured
+as there is no authentication on the protocol and it would
+be trivial to spoof packets which create states, bypassing the pf ruleset.
+Either run the pfsync protocol on a trusted network \- ideally  a network
+dedicated to pfsync messages such as a crossover cable between two firewalls,
+or specify a peer address and protect the traffic with
+.Xr ipsec 4 .
+.Pp
+There is a one-to-one correspondence between packets seen by
+.Xr bpf 4
+on the
+.Nm
+interface, and packets sent out on the synchronisation interface, i.e.\&
+a packet with 4 state deletion messages on
+.Nm
+means that the same 4 deletions were sent out on the synchronisation
+interface.
+However, the actual packet contents may differ as the messages
+sent over the network are "compressed" where possible, containing
+only the necessary information.
+.Sh EXAMPLES
+.Nm
+and
+.Xr carp 4
+can be used together to provide automatic failover of a pair of firewalls
+configured in parallel.
+One firewall handles all traffic \- if it dies or
+is shut down, the second firewall takes over automatically.
+.Pp
+Both firewalls in this example have three
+.Xr sis 4
+interfaces.
+sis0 is the external interface, on the 10.0.0.0/24 subnet; sis1 is the
+internal interface, on the 192.168.0.0/24 subnet; and sis2 is the
+.Nm
+interface, using the 192.168.254.0/24 subnet.
+A crossover cable connects the two firewalls via their sis2 interfaces.
+On all three interfaces, firewall A uses the .254 address, while firewall B
+uses .253.
+The interfaces are configured as follows (firewall A unless otherwise
+indicated):
+.Pp
+.Pa /etc/hostname.sis0 :
+.Bd -literal -offset indent
+inet 10.0.0.254 255.255.255.0 NONE
+.Ed
+.Pp
+.Pa /etc/hostname.sis1 :
+.Bd -literal -offset indent
+inet 192.168.0.254 255.255.255.0 NONE
+.Ed
+.Pp
+.Pa /etc/hostname.sis2 :
+.Bd -literal -offset indent
+inet 192.168.254.254 255.255.255.0 NONE
+.Ed
+.Pp
+.Pa /etc/hostname.carp0 :
+.Bd -literal -offset indent
+inet 10.0.0.1 255.255.255.0 10.0.0.255 vhid 1 pass foo
+.Ed
+.Pp
+.Pa /etc/hostname.carp1 :
+.Bd -literal -offset indent
+inet 192.168.0.1 255.255.255.0 192.168.0.255 vhid 2 pass bar
+.Ed
+.Pp
+.Pa /etc/hostname.pfsync0 :
+.Bd -literal -offset indent
+up syncdev sis2
+.Ed
+.Pp
+.Xr pf 4
+must also be configured to allow
+.Nm
+and
+.Xr carp 4
+traffic through.
+The following should be added to the top of
+.Pa /etc/pf.conf :
+.Bd -literal -offset indent
+pass quick on { sis2 } proto pfsync
+pass on { sis0 sis1 } proto carp
+.Ed
+.Pp
+If it is preferable that one firewall handle the traffic,
+the
+.Ar advskew
+on the backup firewall's
+.Xr carp 4
+interfaces should be set to something higher than
+the primary's.
+For example, if firewall B is the backup, its
+.Pa /etc/hostname.carp1
+would look like this:
+.Bd -literal -offset indent
+inet 192.168.0.1 255.255.255.0 192.168.0.255 vhid 2 pass bar \e
+	advskew 100
+.Ed
+.Pp
+The following must also be added to
+.Pa /etc/sysctl.conf :
+.Bd -literal -offset indent
+net.inet.carp.preempt=1
+.Ed
+.Sh SEE ALSO
+.Xr bpf 4 ,
+.Xr carp 4 ,
+.Xr enc 4 ,
+.Xr inet 4 ,
+.Xr inet6 4 ,
+.Xr ipsec 4 ,
+.Xr netintro 4 ,
+.Xr pf 4 ,
+.Xr hostname.if 5 ,
+.Xr pf.conf 5 ,
+.Xr protocols 5 ,
+.Xr ifconfig 8 ,
+.Xr ifstated 8 ,
+.Xr tcpdump 8
+.Sh HISTORY
+The
+.Nm
+device first appeared in
+.Ox 3.3 .
Index: distrib/sets/lists/man/mi
===================================================================
RCS file: /cvsroot/src/distrib/sets/lists/man/mi,v
retrieving revision 1.1149
diff -u -r1.1149 mi
--- distrib/sets/lists/man/mi	27 Jul 2009 12:34:12 -0000	1.1149
+++ distrib/sets/lists/man/mi	1 Aug 2009 14:52:02 -0000
@@ -1278,6 +1278,7 @@
 ./usr/share/man/cat4/pdcsata.0			man-sys-catman		.cat
 ./usr/share/man/cat4/pf.0			man-pf-catman		pf,.cat
 ./usr/share/man/cat4/pflog.0			man-pf-catman		pf,.cat
+./usr/share/man/cat4/pfsync.0			man-pf-catman		pf,.cat
 ./usr/share/man/cat4/phy.0			man-sys-catman		.cat
 ./usr/share/man/cat4/piixide.0			man-sys-catman		.cat
 ./usr/share/man/cat4/piixpcib.0			man-sys-catman		.cat
@@ -3835,6 +3836,7 @@
 ./usr/share/man/html4/pdcsata.html		man-sys-htmlman		html
 ./usr/share/man/html4/pf.html			man-pf-htmlman		pf,html
 ./usr/share/man/html4/pflog.html		man-pf-htmlman		pf,html
+./usr/share/man/html4/pfsync.html		man-pf-htmlman		pf,html
 ./usr/share/man/html4/phy.html			man-sys-htmlman		html
 ./usr/share/man/html4/piixide.html		man-sys-htmlman		html
 ./usr/share/man/html4/piixpcib.html		man-sys-htmlman		html
@@ -6270,6 +6272,7 @@
 ./usr/share/man/man4/pdcsata.4			man-sys-man		.man
 ./usr/share/man/man4/pf.4			man-pf-man		pf,.man
 ./usr/share/man/man4/pflog.4			man-pf-man		pf,.man
+./usr/share/man/man4/pfsync.4			man-pf-man		pf,.man
 ./usr/share/man/man4/phy.4			man-sys-man		.man
 ./usr/share/man/man4/piixide.4			man-sys-man		.man
 ./usr/share/man/man4/piixpcib.4			man-sys-man		.man
Index: sbin/ifconfig/Makefile.inc
===================================================================
RCS file: /cvsroot/src/sbin/ifconfig/Makefile.inc,v
retrieving revision 1.3
diff -u -r1.3 Makefile.inc
--- sbin/ifconfig/Makefile.inc	26 May 2009 21:58:31 -0000	1.3
+++ sbin/ifconfig/Makefile.inc	1 Aug 2009 14:52:50 -0000
@@ -19,3 +19,6 @@
 SRCS+= tunnel.c
 SRCS+= util.c
 SRCS+= vlan.c
+
+CPPFLAGS+=-I ${.CURDIR}/../../sys/dist/pf/ 
+SRCS+= pfsync.c
Index: sbin/ifconfig/ifconfig.8
===================================================================
RCS file: /cvsroot/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.99
diff -u -r1.99 ifconfig.8
--- sbin/ifconfig/ifconfig.8	29 Jul 2009 21:47:11 -0000	1.99
+++ sbin/ifconfig/ifconfig.8	1 Aug 2009 14:52:52 -0000
@@ -1,4 +1,4 @@
-.\"	$NetBSD: ifconfig.8,v 1.99 2009/07/29 21:47:11 hubertf Exp $
+.\"	$NetBSD: ifconfig.8,v 1.98 2009/07/02 18:43:47 dyoung Exp $
 .\"
 .\" Copyright (c) 1983, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
@@ -722,6 +722,37 @@
 .It Cm -tso6
 Disable hardware-assisted TCP/IPv6 segmentation on interfaces that
 support it.
+.It Cm maxupd Ar n
+If the driver is a
+.Xr pfsync 4
+pseudo-device, indicate the maximum number
+of updates for a single state which can be collapsed into one.
+This is an 8-bit number; the default value is 128.
+.It Cm syncdev Ar iface
+If the driver is a
+.Xr pfsync 4
+pseudo-device, use the specified interface
+to send and receive pfsync state synchronisation messages.
+.It Fl syncdev
+If the driver is a
+.Xr pfsync 4
+pseudo-device, stop sending pfsync state
+synchronisation messages over the network.
+.It Cm syncpeer Ar peer_address
+If the driver is a
+.Xr pfsync 4
+pseudo-device, make the pfsync link point-to-point rather than using
+multicast to broadcast the state synchronisation messages.
+The peer_address is the IP address of the other host taking part in
+the pfsync cluster.
+With this option,
+.Xr pfsync 4
+traffic can be protected using
+.Xr ipsec 4 .
+.It Fl syncpeer
+If the driver is a
+.Xr pfsync 4
+pseudo-device, broadcast the packets using multicast.
 .El
 .Pp
 .Nm
@@ -819,7 +850,7 @@
 .Pp
 .Ic ifconfig sip0 link 00:11:22:33:44:55
 .Pp
-Add and activate a link-layer (MAC) address:
+Add and activate a link-layer address:
 .Pp
 .Ic ifconfig sip0 link 00:11:22:33:44:55 active
 .Sh DIAGNOSTICS
@@ -832,6 +863,7 @@
 .Xr carp 4 ,
 .Xr ifmedia 4 ,
 .Xr netintro 4 ,
+.Xr pfsync 4 ,
 .Xr vlan 4 ,
 .Xr ifconfig.if 5 ,
 .\" .Xr eon 5 ,
Index: sbin/ifconfig/pfsync.c
===================================================================
RCS file: sbin/ifconfig/pfsync.c
diff -N sbin/ifconfig/pfsync.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sbin/ifconfig/pfsync.c	1 Aug 2009 14:52:52 -0000
@@ -0,0 +1,202 @@
+#include <sys/cdefs.h>
+#ifndef lint
+__RCSID("$NetBSD:$");
+#endif /* not lint */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <err.h>
+#include <errno.h>
+#include <util.h>
+
+#include "env.h"
+#include "parse.h"
+#include "extern.h"
+
+static status_func_t status;
+static usage_func_t usage;
+static cmdloop_branch_t branch;
+
+static void pfsync_constructor(void) __attribute__((constructor));
+static void pfsync_status(prop_dictionary_t, prop_dictionary_t);
+static int setpfsync_maxupd(prop_dictionary_t, prop_dictionary_t);
+static int setpfsync_peer(prop_dictionary_t, prop_dictionary_t);
+static int setpfsyncdev(prop_dictionary_t, prop_dictionary_t);
+
+struct pinteger parse_maxupd = PINTEGER_INITIALIZER1(&parse_maxupd, "maxupd",
+    0, 255, 10, setpfsync_maxupd, "maxupd", &command_root.pb_parser);
+
+struct piface pfsyncdev = PIFACE_INITIALIZER(&pfsyncdev, "syncdev", setpfsyncdev,
+    "syncdev", &command_root.pb_parser);
+
+struct paddr parse_sync_peer = PADDR_INITIALIZER(&parse_sync_peer, "syncpeer",
+		setpfsync_peer, "syncpeer", NULL, NULL, NULL, &command_root.pb_parser);
+
+static const struct kwinst pfsynckw[] = {
+	  {.k_word = "maxupd", .k_nextparser = &parse_maxupd.pi_parser}
+	, {.k_word = "syncdev", .k_nextparser = &pfsyncdev.pif_parser}
+	, {.k_word = "-syncdev", .k_key = "syncdev", .k_type = KW_T_STR,
+	   .k_str = "", .k_exec = setpfsyncdev,
+	   .k_nextparser = &command_root.pb_parser}
+	, {.k_word = "syncpeer", .k_nextparser = &parse_sync_peer.pa_parser}
+	, {.k_word = "-syncpeer", .k_key = "syncpeer", .k_type = KW_T_STR,
+	   .k_str = "", .k_exec = setpfsync_peer,
+	   .k_nextparser = &command_root.pb_parser}
+};
+
+struct pkw pfsync = PKW_INITIALIZER(&pfsync, "pfsync", NULL, NULL,
+    pfsynckw, __arraycount(pfsynckw), NULL);
+
+static void
+pfsync_set(prop_dictionary_t env, struct pfsyncreq *pfsyncr)
+{
+	if (indirect_ioctl(env, SIOCSETPFSYNC, pfsyncr) == -1)
+		err(EXIT_FAILURE, "SIOCSETPFSYNC");
+}
+
+static int
+pfsync_get1(prop_dictionary_t env, struct pfsyncreq *pfsyncr)
+{
+	memset(pfsyncr, 0, sizeof(*pfsyncr));
+
+	return indirect_ioctl(env, SIOCGETPFSYNC, pfsyncr);
+}
+
+static void
+pfsync_get(prop_dictionary_t env, struct pfsyncreq *pfsyncr)
+{
+	if (pfsync_get1(env, pfsyncr) == -1)
+		err(EXIT_FAILURE, "SIOCGETPFSYNC");
+}
+
+static void
+pfsync_status(prop_dictionary_t env, prop_dictionary_t oenv)
+{
+	struct pfsyncreq pfsyncr;
+
+	if (pfsync_get1(env, &pfsyncr) == -1)
+		return;
+
+	if (pfsyncr.pfsyncr_syncdev[0] != '\0') {
+		printf("\tpfsync: syncdev: %s ", pfsyncr.pfsyncr_syncdev);
+		if (pfsyncr.pfsyncr_syncpeer.s_addr != INADDR_PFSYNC_GROUP)
+			printf("syncpeer: %s ",
+					inet_ntoa(pfsyncr.pfsyncr_syncpeer));
+		printf("maxupd: %d\n", pfsyncr.pfsyncr_maxupdates);
+	}
+}
+
+/* ARGSUSED */
+int
+setpfsync_maxupd(prop_dictionary_t env, prop_dictionary_t oenv)
+{
+	struct pfsyncreq pfsyncr;
+	uint8_t maxupd;
+
+	if (!prop_dictionary_get_uint8(env, "maxupd", &maxupd)) {
+		errno = ENOENT;
+		return -1;
+	}
+
+	pfsync_get(env, &pfsyncr);
+
+	pfsyncr.pfsyncr_maxupdates = maxupd;
+
+	pfsync_set(env, &pfsyncr);
+	return 0;
+}
+
+
+/* ARGSUSED */
+int
+setpfsyncdev(prop_dictionary_t env, prop_dictionary_t oenv)
+{
+	struct pfsyncreq pfsyncr;
+	const char* dev;
+
+	if (!prop_dictionary_get_cstring_nocopy(env, "syncdev", &dev)) {
+		errno = ENOENT;
+		return -1;
+	}
+
+	pfsync_get(env, &pfsyncr);
+
+	strlcpy(pfsyncr.pfsyncr_syncdev, dev, sizeof(pfsyncr.pfsyncr_syncdev));
+
+	pfsync_set(env, &pfsyncr);
+	return 0;
+}
+
+/* ARGSUSED */
+int
+setpfsync_peer(prop_dictionary_t env, prop_dictionary_t oenv)
+{
+	struct pfsyncreq pfsyncr;
+	prop_data_t data;
+	const struct paddr_prefix *peerpfx;
+	const struct sockaddr_in* s;
+
+	data = (prop_data_t)prop_dictionary_get(env, "syncpeer");
+	if (data == NULL) {
+		errno = ENOENT;
+		return -1;
+	}
+
+	pfsync_get(env, &pfsyncr);
+
+	peerpfx = prop_data_data_nocopy(data);
+
+	if (peerpfx != NULL) {
+		// Only AF_INET is supported for now
+		if (peerpfx->pfx_addr.sa_family != AF_INET) {
+			errno = ENOENT;
+			return -1;
+		}
+
+
+		s = (const struct sockaddr_in*) &peerpfx->pfx_addr;
+
+		memcpy(&pfsyncr.pfsyncr_syncpeer.s_addr, &s->sin_addr,
+				MIN(sizeof(pfsyncr.pfsyncr_syncpeer.s_addr),
+					peerpfx->pfx_addr.sa_len));	   
+	} else {
+		memset(&pfsyncr.pfsyncr_syncpeer.s_addr, 0,
+			   	sizeof(pfsyncr.pfsyncr_syncpeer.s_addr));
+	}
+
+	pfsync_set(env, &pfsyncr);
+
+	return 0;
+}
+
+static void
+pfsync_usage(prop_dictionary_t env)
+{
+	fprintf(stderr,
+	    "\t[ maxupd n ] [ syncdev iface ] [syncpeer peer_addr]\n");
+}
+
+static void
+pfsync_constructor(void)
+{
+	cmdloop_branch_init(&branch, &pfsync.pk_parser);
+	register_cmdloop_branch(&branch);
+	status_func_init(&status, pfsync_status);
+	usage_func_init(&usage, pfsync_usage);
+	register_status(&status);
+	register_usage(&usage);
+}
Index: sys/dist/pf/net/if_pfsync.c
===================================================================
RCS file: sys/dist/pf/net/if_pfsync.c
diff -N sys/dist/pf/net/if_pfsync.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sys/dist/pf/net/if_pfsync.c	1 Aug 2009 14:53:12 -0000
@@ -0,0 +1,1780 @@
+/*	$OpenBSD: if_pfsync.c,v 1.83 2007/06/26 14:44:12 mcbride Exp $	*/
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <net/bpf.h>
+#include <netinet/in.h>
+#ifndef __NetBSD__
+#include <netinet/if_ether.h>
+#else
+#include <net/if_ether.h>
+#endif /* __NetBSD__ */
+#include <netinet/tcp.h>
+#include <netinet/tcp_seq.h>
+
+#ifdef	INET
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#endif
+
+#ifdef INET6
+#include <netinet6/nd6.h>
+#endif /* INET6 */
+
+#include "carp.h"
+#if NCARP > 0
+#include <netinet/ip_carp.h>
+#endif
+
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+
+#ifdef __NetBSD__
+#include <sys/conf.h>
+#include <sys/lwp.h>
+#include <sys/kauth.h>
+#endif /* __NetBSD__ */
+
+#include "bpfilter.h"
+#include "pfsync.h"
+
+#define PFSYNC_MINMTU	\
+    (sizeof(struct pfsync_header) + sizeof(struct pf_state))
+
+#ifdef PFSYNCDEBUG
+#define DPRINTF(x)    do { if (pfsyncdebug) printf x ; } while (0)
+int pfsyncdebug;
+#else
+#define DPRINTF(x)
+#endif
+
+extern int ifqmaxlen; /* XXX */
+
+struct pfsync_softc	*pfsyncif = NULL;
+struct pfsyncstats	 pfsyncstats;
+
+void	pfsyncattach(int);
+int	pfsync_clone_create(struct if_clone *, int);
+int	pfsync_clone_destroy(struct ifnet *);
+void	pfsync_setmtu(struct pfsync_softc *, int);
+int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
+	    struct pf_state_peer *);
+int	pfsync_insert_net_state(struct pfsync_state *, u_int8_t);
+void	pfsync_update_net_tdb(struct pfsync_tdb *);
+int	pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *,
+	    struct rtentry *);
+int	pfsyncioctl(struct ifnet *, u_long, void*);
+void	pfsyncstart(struct ifnet *);
+
+struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
+int	pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
+int	pfsync_sendout(struct pfsync_softc *);
+int	pfsync_tdb_sendout(struct pfsync_softc *);
+int	pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
+void	pfsync_timeout(void *);
+void	pfsync_tdb_timeout(void *);
+void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
+void	pfsync_bulk_update(void *);
+void	pfsync_bulkfail(void *);
+
+int	pfsync_sync_ok;
+
+struct if_clone	pfsync_cloner =
+    IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
+
+void
+pfsyncattach(int npfsync)
+{
+	if_clone_attach(&pfsync_cloner);
+}
+int
+pfsync_clone_create(struct if_clone *ifc, int unit)
+{
+	struct ifnet *ifp;
+
+	if (unit != 0)
+		return (EINVAL);
+
+	pfsync_sync_ok = 1;
+	if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL)
+		return (ENOMEM);
+	bzero(pfsyncif, sizeof(*pfsyncif));
+	pfsyncif->sc_mbuf = NULL;
+	pfsyncif->sc_mbuf_net = NULL;
+	pfsyncif->sc_mbuf_tdb = NULL;
+	pfsyncif->sc_statep.s = NULL;
+	pfsyncif->sc_statep_net.s = NULL;
+	pfsyncif->sc_statep_tdb.t = NULL;
+	pfsyncif->sc_maxupdates = 128;
+	pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
+	pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
+	pfsyncif->sc_ureq_received = 0;
+	pfsyncif->sc_ureq_sent = 0;
+	pfsyncif->sc_bulk_send_next = NULL;
+	pfsyncif->sc_bulk_terminator = NULL;
+	ifp = &pfsyncif->sc_if;
+	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
+	ifp->if_softc = pfsyncif;
+	ifp->if_ioctl = pfsyncioctl;
+	ifp->if_output = pfsyncoutput;
+	ifp->if_start = pfsyncstart;
+	ifp->if_type = IFT_PFSYNC;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = PFSYNC_HDRLEN;
+	pfsync_setmtu(pfsyncif, ETHERMTU);
+
+	callout_init(&pfsyncif->sc_tmo, 0);
+	callout_init(&pfsyncif->sc_tdb_tmo, 0);
+	callout_init(&pfsyncif->sc_bulk_tmo, 0);
+	callout_init(&pfsyncif->sc_bulkfail_tmo, 0);
+	callout_setfunc(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif);
+	callout_setfunc(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif);
+	callout_setfunc(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif);
+	callout_setfunc(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif);
+
+	if_attach(ifp);
+	if_alloc_sadl(ifp);
+
+#if NCARP > 0
+	if_addgroup(ifp, "carp");
+#endif
+
+#if NBPFILTER > 0
+	bpfattach(&pfsyncif->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN);
+#endif
+
+	return (0);
+}
+
+int
+pfsync_clone_destroy(struct ifnet *ifp)
+{
+#if NBPFILTER > 0
+	bpfdetach(ifp);
+#endif
+	if_detach(ifp);
+	free(pfsyncif, M_DEVBUF);
+	pfsyncif = NULL;
+	return (0);
+}
+
+/*
+ * Start output on the pfsync interface.
+ */
+void
+pfsyncstart(struct ifnet *ifp)
+{
+	struct mbuf *m;
+	int s;
+
+	for (;;) {
+		s = splnet();
+		IF_DROP(&ifp->if_snd);
+		IF_DEQUEUE(&ifp->if_snd, m);
+		splx(s);
+
+		if (m == NULL)
+			return;
+		else
+			m_freem(m);
+	}
+}
+
+int
+pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
+    struct pf_state_peer *d)
+{
+	if (s->scrub.scrub_flag && d->scrub == NULL) {
+		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
+		if (d->scrub == NULL)
+			return (ENOMEM);
+		bzero(d->scrub, sizeof(*d->scrub));
+	}
+
+	return (0);
+}
+
+int
+pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
+{
+	struct pf_state	*st = NULL;
+	struct pf_state_key *sk = NULL;
+	struct pf_rule *r = NULL;
+	struct pfi_kif	*kif;
+
+	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
+		printf("pfsync_insert_net_state: invalid creator id:"
+		    " %08x\n", ntohl(sp->creatorid));
+		return (EINVAL);
+	}
+
+	kif = pfi_kif_get(sp->ifname);
+	if (kif == NULL) {
+		if (pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync_insert_net_state: "
+			    "unknown interface: %s\n", sp->ifname);
+		/* skip this state */
+		return (0);
+	}
+
+	/*
+	 * If the ruleset checksums match, it's safe to associate the state
+	 * with the rule of that number.
+	 */
+	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag &&
+	    ntohl(sp->rule) <
+	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
+		r = pf_main_ruleset.rules[
+		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
+	else
+		r = &pf_default_rule;
+
+	if (!r->max_states || r->states < r->max_states)
+		st = pool_get(&pf_state_pl, PR_NOWAIT);
+	if (st == NULL) {
+		pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+		return (ENOMEM);
+	}
+	bzero(st, sizeof(*st));
+
+	if ((sk = pf_alloc_state_key(st)) == NULL) {
+		pool_put(&pf_state_pl, st);
+		pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+		return (ENOMEM);
+	}
+
+	/* allocate memory for scrub info */
+	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
+	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) {
+		pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+		if (st->src.scrub)
+			pool_put(&pf_state_scrub_pl, st->src.scrub);
+		pool_put(&pf_state_pl, st);
+		pool_put(&pf_state_key_pl, sk);
+		return (ENOMEM);
+	}
+
+	st->rule.ptr = r;
+	/* XXX get pointers to nat_rule and anchor */
+
+	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+	r->states++;
+
+	/* fill in the rest of the state entry */
+	pf_state_host_ntoh(&sp->lan, &sk->lan);
+	pf_state_host_ntoh(&sp->gwy, &sk->gwy);
+	pf_state_host_ntoh(&sp->ext, &sk->ext);
+
+	pf_state_peer_ntoh(&sp->src, &st->src);
+	pf_state_peer_ntoh(&sp->dst, &st->dst);
+
+	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
+	st->creation = time_second - ntohl(sp->creation);
+	st->expire = ntohl(sp->expire) + time_second;
+
+	sk->af = sp->af;
+	sk->proto = sp->proto;
+	sk->direction = sp->direction;
+	st->log = sp->log;
+	st->timeout = sp->timeout;
+	st->allow_opts = sp->allow_opts;
+
+	bcopy(sp->id, &st->id, sizeof(st->id));
+	st->creatorid = sp->creatorid;
+	st->sync_flags = PFSTATE_FROMSYNC;
+
+	if (pf_insert_state(kif, st)) {
+		pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
+		r->states--;
+		if (st->dst.scrub)
+			pool_put(&pf_state_scrub_pl, st->dst.scrub);
+		if (st->src.scrub)
+			pool_put(&pf_state_scrub_pl, st->src.scrub);
+		pool_put(&pf_state_pl, st);
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+void
+pfsync_input(struct mbuf *m, ...)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	struct pfsync_header *ph;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pf_state *st;
+	struct pf_state_key *sk;
+	struct pf_state_cmp id_key;
+	struct pfsync_state *sp;
+	struct pfsync_state_upd *up;
+	struct pfsync_state_del *dp;
+	struct pfsync_state_clr *cp;
+	struct pfsync_state_upd_req *rup;
+	struct pfsync_state_bus *bus;
+#ifdef IPSEC
+	struct pfsync_tdb *pt;
+#endif
+	struct in_addr src;
+	struct mbuf *mp;
+	int iplen, action, error, i, s, count, offp, sfail, stale = 0;
+	u_int8_t chksum_flag = 0;
+
+	pfsyncstats.pfsyncs_ipackets++;
+
+	/* verify that we have a sync interface configured */
+	if (!sc || !sc->sc_sync_ifp || !pf_status.running)
+		goto done;
+
+	/* verify that the packet came in on the right interface */
+	if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
+		pfsyncstats.pfsyncs_badif++;
+		goto done;
+	}
+
+	/* verify that the IP TTL is 255.  */
+	if (ip->ip_ttl != PFSYNC_DFLTTL) {
+		pfsyncstats.pfsyncs_badttl++;
+		goto done;
+	}
+
+	iplen = ip->ip_hl << 2;
+
+	if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
+		pfsyncstats.pfsyncs_hdrops++;
+		goto done;
+	}
+
+	if (iplen + sizeof(*ph) > m->m_len) {
+		if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
+			pfsyncstats.pfsyncs_hdrops++;
+			goto done;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	ph = (struct pfsync_header *)((char *)ip + iplen);
+
+	/* verify the version */
+	if (ph->version != PFSYNC_VERSION) {
+		pfsyncstats.pfsyncs_badver++;
+		goto done;
+	}
+
+	action = ph->action;
+	count = ph->count;
+
+	/* make sure it's a valid action code */
+	if (action >= PFSYNC_ACT_MAX) {
+		pfsyncstats.pfsyncs_badact++;
+		goto done;
+	}
+
+	/* Cheaper to grab this now than having to mess with mbufs later */
+	src = ip->ip_src;
+
+	if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+		chksum_flag++;
+
+	switch (action) {
+	case PFSYNC_ACT_CLR: {
+		struct pf_state *nexts;
+		struct pf_state_key *nextsk;
+		struct pfi_kif *kif;
+		u_int32_t creatorid;
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    sizeof(*cp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+		cp = (struct pfsync_state_clr *)(mp->m_data + offp);
+		creatorid = cp->creatorid;
+
+		s = splsoftnet();
+		if (cp->ifname[0] == '\0') {
+			for (st = RB_MIN(pf_state_tree_id, &tree_id);
+			    st; st = nexts) {
+				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
+				if (st->creatorid == creatorid) {
+					st->sync_flags |= PFSTATE_FROMSYNC;
+					pf_unlink_state(st);
+				}
+			}
+		} else {
+			if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
+				splx(s);
+				return;
+			}
+			for (sk = RB_MIN(pf_state_tree_lan_ext,
+			    &pf_statetbl_lan_ext); sk; sk = nextsk) {
+				nextsk = RB_NEXT(pf_state_tree_lan_ext,
+				    &pf_statetbl_lan_ext, sk);
+				TAILQ_FOREACH(st, &sk->states, next) {
+					if (st->creatorid == creatorid) {
+						st->sync_flags |=
+						    PFSTATE_FROMSYNC;
+						pf_unlink_state(st);
+					}
+				}
+			}
+		}
+		splx(s);
+
+		break;
+	}
+	case PFSYNC_ACT_INS:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*sp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
+		    i < count; i++, sp++) {
+			/* check for invalid values */
+			if (sp->timeout >= PFTM_MAX ||
+			    sp->src.state > PF_TCPS_PROXY_DST ||
+			    sp->dst.state > PF_TCPS_PROXY_DST ||
+			    sp->direction > PF_OUT ||
+			    (sp->af != AF_INET && sp->af != AF_INET6)) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync_insert: PFSYNC_ACT_INS: "
+					    "invalid value\n");
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+
+			if ((error = pfsync_insert_net_state(sp,
+			    chksum_flag))) {
+				if (error == ENOMEM) {
+					splx(s);
+					goto done;
+				}
+				continue;
+			}
+		}
+		splx(s);
+		break;
+	case PFSYNC_ACT_UPD:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*sp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
+		    i < count; i++, sp++) {
+			int flags = PFSYNC_FLAG_STALE;
+
+			/* check for invalid values */
+			if (sp->timeout >= PFTM_MAX ||
+			    sp->src.state > PF_TCPS_PROXY_DST ||
+			    sp->dst.state > PF_TCPS_PROXY_DST) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync_insert: PFSYNC_ACT_UPD: "
+					    "invalid value\n");
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+
+			bcopy(sp->id, &id_key.id, sizeof(id_key.id));
+			id_key.creatorid = sp->creatorid;
+
+			st = pf_find_state_byid(&id_key);
+			if (st == NULL) {
+				/* insert the update */
+				if (pfsync_insert_net_state(sp, chksum_flag))
+					pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			sk = st->state_key;
+			sfail = 0;
+			if (sk->proto == IPPROTO_TCP) {
+				/*
+				 * The state should never go backwards except
+				 * for syn-proxy states.  Neither should the
+				 * sequence window slide backwards.
+				 */
+				if (st->src.state > sp->src.state &&
+				    (st->src.state < PF_TCPS_PROXY_SRC ||
+				    sp->src.state >= PF_TCPS_PROXY_SRC))
+					sfail = 1;
+				else if (SEQ_GT(st->src.seqlo,
+				    ntohl(sp->src.seqlo)))
+					sfail = 3;
+				else if (st->dst.state > sp->dst.state) {
+					/* There might still be useful
+					 * information about the src state here,
+					 * so import that part of the update,
+					 * then "fail" so we send the updated
+					 * state back to the peer who is missing
+					 * our what we know. */
+					pf_state_peer_ntoh(&sp->src, &st->src);
+					/* XXX do anything with timeouts? */
+					sfail = 7;
+					flags = 0;
+				} else if (st->dst.state >= TCPS_SYN_SENT &&
+				    SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
+					sfail = 4;
+			} else {
+				/*
+				 * Non-TCP protocol state machine always go
+				 * forwards
+				 */
+				if (st->src.state > sp->src.state)
+					sfail = 5;
+				else if (st->dst.state > sp->dst.state)
+					sfail = 6;
+			}
+			if (sfail) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: %s stale update "
+					    "(%d) id: %016llx "
+					    "creatorid: %08x\n",
+					    (sfail < 7 ?  "ignoring"
+					     : "partial"), sfail,
+					    be64toh(st->id),
+					    ntohl(st->creatorid));
+				pfsyncstats.pfsyncs_badstate++;
+
+				if (!(sp->sync_flags & PFSTATE_STALE)) {
+					/* we have a better state, send it */
+					if (sc->sc_mbuf != NULL && !stale)
+						pfsync_sendout(sc);
+					stale++;
+					if (!st->sync_flags)
+						pfsync_pack_state(
+						    PFSYNC_ACT_UPD, st, flags);
+				}
+				continue;
+			}
+	    		pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
+			pf_state_peer_ntoh(&sp->src, &st->src);
+			pf_state_peer_ntoh(&sp->dst, &st->dst);
+			st->expire = ntohl(sp->expire) + time_second;
+			st->timeout = sp->timeout;
+		}
+		if (stale && sc->sc_mbuf != NULL)
+			pfsync_sendout(sc);
+		splx(s);
+		break;
+	/*
+	 * It's not strictly necessary for us to support the "uncompressed"
+	 * delete action, but it's relatively simple and maintains consistency.
+	 */
+	case PFSYNC_ACT_DEL:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*sp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
+		    i < count; i++, sp++) {
+			bcopy(sp->id, &id_key.id, sizeof(id_key.id));
+			id_key.creatorid = sp->creatorid;
+
+			st = pf_find_state_byid(&id_key);
+			if (st == NULL) {
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			st->sync_flags |= PFSTATE_FROMSYNC;
+			pf_unlink_state(st);
+		}
+		splx(s);
+		break;
+	case PFSYNC_ACT_UPD_C: {
+		int update_requested = 0;
+
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*up), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+		for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
+		    i < count; i++, up++) {
+			/* check for invalid values */
+			if (up->timeout >= PFTM_MAX ||
+			    up->src.state > PF_TCPS_PROXY_DST ||
+			    up->dst.state > PF_TCPS_PROXY_DST) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync_insert: "
+					    "PFSYNC_ACT_UPD_C: "
+					    "invalid value\n");
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+
+			bcopy(up->id, &id_key.id, sizeof(id_key.id));
+			id_key.creatorid = up->creatorid;
+
+			st = pf_find_state_byid(&id_key);
+			if (st == NULL) {
+				/* We don't have this state. Ask for it. */
+				error = pfsync_request_update(up, &src);
+				if (error == ENOMEM) {
+					splx(s);
+					goto done;
+				}
+				update_requested = 1;
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			sk = st->state_key;
+			sfail = 0;
+			if (sk->proto == IPPROTO_TCP) {
+				/*
+				 * The state should never go backwards except
+				 * for syn-proxy states.  Neither should the
+				 * sequence window slide backwards.
+				 */
+				if (st->src.state > up->src.state &&
+				    (st->src.state < PF_TCPS_PROXY_SRC ||
+				    up->src.state >= PF_TCPS_PROXY_SRC))
+					sfail = 1;
+				else if (st->dst.state > up->dst.state)
+					sfail = 2;
+				else if (SEQ_GT(st->src.seqlo,
+				    ntohl(up->src.seqlo)))
+					sfail = 3;
+				else if (st->dst.state >= TCPS_SYN_SENT &&
+				    SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
+					sfail = 4;
+			} else {
+				/*
+				 * Non-TCP protocol state machine always go
+				 * forwards
+				 */
+				if (st->src.state > up->src.state)
+					sfail = 5;
+				else if (st->dst.state > up->dst.state)
+					sfail = 6;
+			}
+			if (sfail) {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: ignoring stale update "
+					    "(%d) id: %016llx "
+					    "creatorid: %08x\n", sfail,
+					    be64toh(st->id),
+					    ntohl(st->creatorid));
+				pfsyncstats.pfsyncs_badstate++;
+
+				/* we have a better state, send it out */
+				if ((!stale || update_requested) &&
+				    sc->sc_mbuf != NULL) {
+					pfsync_sendout(sc);
+					update_requested = 0;
+				}
+				stale++;
+				if (!st->sync_flags)
+					pfsync_pack_state(PFSYNC_ACT_UPD, st,
+					    PFSYNC_FLAG_STALE);
+				continue;
+			}
+	    		pfsync_alloc_scrub_memory(&up->dst, &st->dst);
+			pf_state_peer_ntoh(&up->src, &st->src);
+			pf_state_peer_ntoh(&up->dst, &st->dst);
+			st->expire = ntohl(up->expire) + time_second;
+			st->timeout = up->timeout;
+		}
+		if ((update_requested || stale) && sc->sc_mbuf)
+			pfsync_sendout(sc);
+		splx(s);
+		break;
+	}
+	case PFSYNC_ACT_DEL_C:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*dp), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+		for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
+		    i < count; i++, dp++) {
+			bcopy(dp->id, &id_key.id, sizeof(id_key.id));
+			id_key.creatorid = dp->creatorid;
+
+			st = pf_find_state_byid(&id_key);
+			if (st == NULL) {
+				pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			st->sync_flags |= PFSTATE_FROMSYNC;
+			pf_unlink_state(st);
+		}
+		splx(s);
+		break;
+	case PFSYNC_ACT_INS_F:
+	case PFSYNC_ACT_DEL_F:
+		/* not implemented */
+		break;
+	case PFSYNC_ACT_UREQ:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*rup), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+
+		s = splsoftnet();
+		if (sc->sc_mbuf != NULL)
+			pfsync_sendout(sc);
+		for (i = 0,
+		    rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
+		    i < count; i++, rup++) {
+			bcopy(rup->id, &id_key.id, sizeof(id_key.id));
+			id_key.creatorid = rup->creatorid;
+
+			if (id_key.id == 0 && id_key.creatorid == 0) {
+				sc->sc_ureq_received = time_uptime;
+				if (sc->sc_bulk_send_next == NULL)
+					sc->sc_bulk_send_next =
+					    TAILQ_FIRST(&state_list);
+				sc->sc_bulk_terminator = sc->sc_bulk_send_next;
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: received "
+					    "bulk update request\n");
+				pfsync_send_bus(sc, PFSYNC_BUS_START);
+				callout_schedule(&sc->sc_bulk_tmo, 1 * hz);
+			} else {
+				st = pf_find_state_byid(&id_key);
+				if (st == NULL) {
+					pfsyncstats.pfsyncs_badstate++;
+					continue;
+				}
+				if (!st->sync_flags)
+					pfsync_pack_state(PFSYNC_ACT_UPD,
+					    st, 0);
+			}
+		}
+		if (sc->sc_mbuf != NULL)
+			pfsync_sendout(sc);
+		splx(s);
+		break;
+	case PFSYNC_ACT_BUS:
+		/* If we're not waiting for a bulk update, who cares. */
+		if (sc->sc_ureq_sent == 0)
+			break;
+
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    sizeof(*bus), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+		bus = (struct pfsync_state_bus *)(mp->m_data + offp);
+		switch (bus->status) {
+		case PFSYNC_BUS_START:
+			callout_schedule(&sc->sc_bulkfail_tmo,
+			    pf_pool_limits[PF_LIMIT_STATES].limit /
+			    (PFSYNC_BULKPACKETS * sc->sc_maxcount));
+			if (pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: received bulk "
+				    "update start\n");
+			break;
+		case PFSYNC_BUS_END:
+			if (time_uptime - ntohl(bus->endtime) >=
+			    sc->sc_ureq_sent) {
+				/* that's it, we're happy */
+				sc->sc_ureq_sent = 0;
+				sc->sc_bulk_tries = 0;
+				callout_stop(&sc->sc_bulkfail_tmo);
+#if NCARP > 0
+				if (!pfsync_sync_ok)
+					carp_group_demote_adj(&sc->sc_if, -1);
+#endif
+				pfsync_sync_ok = 1;
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: received valid "
+					    "bulk update end\n");
+			} else {
+				if (pf_status.debug >= PF_DEBUG_MISC)
+					printf("pfsync: received invalid "
+					    "bulk update end: bad timestamp\n");
+			}
+			break;
+		}
+		break;
+#ifdef IPSEC
+	case PFSYNC_ACT_TDB_UPD:
+		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+		    count * sizeof(*pt), &offp)) == NULL) {
+			pfsyncstats.pfsyncs_badlen++;
+			return;
+		}
+		s = splsoftnet();
+		for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp);
+		    i < count; i++, pt++)
+			pfsync_update_net_tdb(pt);
+		splx(s);
+		break;
+#endif
+	}
+
+done:
+	if (m)
+		m_freem(m);
+}
+
+int
+pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+	struct rtentry *rt)
+{
+	m_freem(m);
+	return (0);
+}
+
+/* ARGSUSED */
+int
+pfsyncioctl(struct ifnet *ifp, u_long cmd, void*  data)
+{
+	struct lwp *l = curlwp;
+	struct pfsync_softc *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct ip_moptions *imo = &sc->sc_imo;
+	struct pfsyncreq pfsyncr;
+	struct ifnet    *sifp;
+	int s, error;
+
+	switch (cmd) {
+	case SIOCSIFADDR:
+	case SIOCAIFADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCSIFFLAGS:
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_flags |= IFF_RUNNING;
+		else
+			ifp->if_flags &= ~IFF_RUNNING;
+		break;
+	case SIOCSIFMTU:
+		if (ifr->ifr_mtu < PFSYNC_MINMTU)
+			return (EINVAL);
+		if (ifr->ifr_mtu > MCLBYTES)
+			ifr->ifr_mtu = MCLBYTES;
+		s = splnet();
+		if (ifr->ifr_mtu < ifp->if_mtu)
+			pfsync_sendout(sc);
+		pfsync_setmtu(sc, ifr->ifr_mtu);
+		splx(s);
+		break;
+	case SIOCGETPFSYNC:
+		if ((error = kauth_authorize_network(l->l_cred,
+		    KAUTH_NETWORK_INTERFACE,
+		    KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, ifp, (void *)cmd,
+		    NULL)) != 0)
+			return (error);
+		bzero(&pfsyncr, sizeof(pfsyncr));
+		if (sc->sc_sync_ifp)
+			strlcpy(pfsyncr.pfsyncr_syncdev,
+			    sc->sc_sync_ifp->if_xname, IFNAMSIZ);
+		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
+		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
+		if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
+			return (error);
+		break;
+	case SIOCSETPFSYNC:
+		if ((error = kauth_authorize_network(l->l_cred,
+		    KAUTH_NETWORK_INTERFACE,
+		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
+		    NULL)) != 0)
+			return (error);
+		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
+			return (error);
+
+		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
+			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
+		else
+			sc->sc_sync_peer.s_addr =
+			    pfsyncr.pfsyncr_syncpeer.s_addr;
+
+		if (pfsyncr.pfsyncr_maxupdates > 255)
+			return (EINVAL);
+		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
+
+		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
+			sc->sc_sync_ifp = NULL;
+			if (sc->sc_mbuf_net != NULL) {
+				/* Don't keep stale pfsync packets around. */
+				s = splnet();
+				m_freem(sc->sc_mbuf_net);
+				sc->sc_mbuf_net = NULL;
+				sc->sc_statep_net.s = NULL;
+				splx(s);
+			}
+			if (imo->imo_num_memberships > 0) {
+				in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+				imo->imo_multicast_ifp = NULL;
+			}
+			break;
+		}
+
+		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
+			return (EINVAL);
+
+		s = splnet();
+		if (sifp->if_mtu < sc->sc_if.if_mtu ||
+		    (sc->sc_sync_ifp != NULL &&
+		    sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
+		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
+			pfsync_sendout(sc);
+		sc->sc_sync_ifp = sifp;
+
+		pfsync_setmtu(sc, sc->sc_if.if_mtu);
+
+		if (imo->imo_num_memberships > 0) {
+			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+			imo->imo_multicast_ifp = NULL;
+		}
+
+		if (sc->sc_sync_ifp &&
+		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+			struct in_addr addr;
+
+			if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
+				sc->sc_sync_ifp = NULL;
+				splx(s);
+				return (EADDRNOTAVAIL);
+			}
+
+			addr.s_addr = INADDR_PFSYNC_GROUP;
+
+			if ((imo->imo_membership[0] =
+			    in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
+				sc->sc_sync_ifp = NULL;
+				splx(s);
+				return (ENOBUFS);
+			}
+			imo->imo_num_memberships++;
+			imo->imo_multicast_ifp = sc->sc_sync_ifp;
+			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
+			imo->imo_multicast_loop = 0;
+		}
+
+		if (sc->sc_sync_ifp ||
+		    sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
+			/* Request a full state table update. */
+			sc->sc_ureq_sent = time_uptime;
+#if NCARP > 0
+			if (pfsync_sync_ok)
+				carp_group_demote_adj(&sc->sc_if, 1);
+#endif
+			pfsync_sync_ok = 0;
+			if (pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: requesting bulk update\n");
+			callout_schedule(&sc->sc_bulkfail_tmo, 5 * hz);
+			error = pfsync_request_update(NULL, NULL);
+			if (error == ENOMEM) {
+				splx(s);
+				return (ENOMEM);
+			}
+			pfsync_sendout(sc);
+		}
+		splx(s);
+
+		break;
+
+	default:
+		return ifioctl_common(ifp, cmd, data);
+	}
+
+	return (0);
+}
+
+void
+pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
+{
+	int mtu;
+
+	if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
+		mtu = sc->sc_sync_ifp->if_mtu;
+	else
+		mtu = mtu_req;
+
+	sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
+	    sizeof(struct pfsync_state);
+	if (sc->sc_maxcount > 254)
+	    sc->sc_maxcount = 254;
+	sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
+	    sc->sc_maxcount * sizeof(struct pfsync_state);
+}
+
+struct mbuf *
+pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
+{
+	struct pfsync_header *h;
+	struct mbuf *m;
+	int len;
+
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL) {
+		sc->sc_if.if_oerrors++;
+		return (NULL);
+	}
+
+	switch (action) {
+	case PFSYNC_ACT_CLR:
+		len = sizeof(struct pfsync_header) +
+		    sizeof(struct pfsync_state_clr);
+		break;
+	case PFSYNC_ACT_UPD_C:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
+		    sizeof(struct pfsync_header);
+		break;
+	case PFSYNC_ACT_DEL_C:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
+		    sizeof(struct pfsync_header);
+		break;
+	case PFSYNC_ACT_UREQ:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
+		    sizeof(struct pfsync_header);
+		break;
+	case PFSYNC_ACT_BUS:
+		len = sizeof(struct pfsync_header) +
+		    sizeof(struct pfsync_state_bus);
+		break;
+	case PFSYNC_ACT_TDB_UPD:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
+		    sizeof(struct pfsync_header);
+		break;
+	default:
+		len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
+		    sizeof(struct pfsync_header);
+		break;
+	}
+
+	if (len > MHLEN) {
+		MCLGET(m, M_DONTWAIT);
+		if ((m->m_flags & M_EXT) == 0) {
+			m_free(m);
+			sc->sc_if.if_oerrors++;
+			return (NULL);
+		}
+		m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
+	} else
+		MH_ALIGN(m, len);
+
+	m->m_pkthdr.rcvif = NULL;
+	m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
+	h = mtod(m, struct pfsync_header *);
+	h->version = PFSYNC_VERSION;
+	h->af = 0;
+	h->count = 0;
+	h->action = action;
+	if (action != PFSYNC_ACT_TDB_UPD)
+		bcopy(&pf_status.pf_chksum, &h->pf_chksum,
+		    PF_MD5_DIGEST_LENGTH);
+
+	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
+	if (action == PFSYNC_ACT_TDB_UPD)
+		callout_schedule(&sc->sc_tdb_tmo, hz);
+	else
+		callout_schedule(&sc->sc_tmo, hz);
+	return (m);
+}
+
+int
+pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_header *h, *h_net;
+	struct pfsync_state *sp = NULL;
+	struct pfsync_state_upd *up = NULL;
+	struct pfsync_state_del *dp = NULL;
+	struct pf_state_key *sk = st->state_key;
+	struct pf_rule *r;
+	u_long secs;
+	int s, ret = 0;
+	u_int8_t i = 255, newaction = 0;
+
+	if (sc == NULL)
+		return (0);
+	ifp = &sc->sc_if;
+
+	/*
+	 * If a packet falls in the forest and there's nobody around to
+	 * hear, does it make a sound?
+	 */
+	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+		/* Don't leave any stale pfsync packets hanging around. */
+		if (sc->sc_mbuf != NULL) {
+			m_freem(sc->sc_mbuf);
+			sc->sc_mbuf = NULL;
+			sc->sc_statep.s = NULL;
+		}
+		return (0);
+	}
+
+	if (action >= PFSYNC_ACT_MAX)
+		return (EINVAL);
+
+	s = splnet();
+	if (sc->sc_mbuf == NULL) {
+		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
+		    (void *)&sc->sc_statep.s)) == NULL) {
+			splx(s);
+			return (ENOMEM);
+		}
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+	} else {
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		if (h->action != action) {
+			pfsync_sendout(sc);
+			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
+			    (void *)&sc->sc_statep.s)) == NULL) {
+				splx(s);
+				return (ENOMEM);
+			}
+			h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		} else {
+			/*
+			 * If it's an update, look in the packet to see if
+			 * we already have an update for the state.
+			 */
+			if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
+				struct pfsync_state *usp =
+				    (void *)((char *)h + PFSYNC_HDRLEN);
+
+				for (i = 0; i < h->count; i++) {
+					if (!memcmp(usp->id, &st->id,
+					    PFSYNC_ID_LEN) &&
+					    usp->creatorid == st->creatorid) {
+						sp = usp;
+						sp->updates++;
+						break;
+					}
+					usp++;
+				}
+			}
+		}
+	}
+
+	secs = time_second;
+
+	st->pfsync_time = time_uptime;
+
+	if (sp == NULL) {
+		/* not a "duplicate" update */
+		i = 255;
+		sp = sc->sc_statep.s++;
+		sc->sc_mbuf->m_pkthdr.len =
+		    sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
+		h->count++;
+		bzero(sp, sizeof(*sp));
+
+		bcopy(&st->id, sp->id, sizeof(sp->id));
+		sp->creatorid = st->creatorid;
+
+		strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+		pf_state_host_hton(&sk->lan, &sp->lan);
+		pf_state_host_hton(&sk->gwy, &sp->gwy);
+		pf_state_host_hton(&sk->ext, &sp->ext);
+
+		bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
+
+		sp->creation = htonl(secs - st->creation);
+		pf_state_counter_hton(st->packets[0], sp->packets[0]);
+		pf_state_counter_hton(st->packets[1], sp->packets[1]);
+		pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+		pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
+		if ((r = st->rule.ptr) == NULL)
+			sp->rule = htonl(-1);
+		else
+			sp->rule = htonl(r->nr);
+		if ((r = st->anchor.ptr) == NULL)
+			sp->anchor = htonl(-1);
+		else
+			sp->anchor = htonl(r->nr);
+		sp->af = sk->af;
+		sp->proto = sk->proto;
+		sp->direction = sk->direction;
+		sp->log = st->log;
+		sp->allow_opts = st->allow_opts;
+		sp->timeout = st->timeout;
+
+		if (flags & PFSYNC_FLAG_STALE)
+			sp->sync_flags |= PFSTATE_STALE;
+	}
+
+	pf_state_peer_hton(&st->src, &sp->src);
+	pf_state_peer_hton(&st->dst, &sp->dst);
+
+	if (st->expire <= secs)
+		sp->expire = htonl(0);
+	else
+		sp->expire = htonl(st->expire - secs);
+
+	/* do we need to build "compressed" actions for network transfer? */
+	if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
+		switch (action) {
+		case PFSYNC_ACT_UPD:
+			newaction = PFSYNC_ACT_UPD_C;
+			break;
+		case PFSYNC_ACT_DEL:
+			newaction = PFSYNC_ACT_DEL_C;
+			break;
+		default:
+			/* by default we just send the uncompressed states */
+			break;
+		}
+	}
+
+	if (newaction) {
+		if (sc->sc_mbuf_net == NULL) {
+			if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
+			    (void *)&sc->sc_statep_net.s)) == NULL) {
+				splx(s);
+				return (ENOMEM);
+			}
+		}
+		h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
+
+		switch (newaction) {
+		case PFSYNC_ACT_UPD_C:
+			if (i != 255) {
+				up = (void *)((char *)h_net +
+				    PFSYNC_HDRLEN + (i * sizeof(*up)));
+				up->updates++;
+			} else {
+				h_net->count++;
+				sc->sc_mbuf_net->m_pkthdr.len =
+				    sc->sc_mbuf_net->m_len += sizeof(*up);
+				up = sc->sc_statep_net.u++;
+
+				bzero(up, sizeof(*up));
+				bcopy(&st->id, up->id, sizeof(up->id));
+				up->creatorid = st->creatorid;
+			}
+			up->timeout = st->timeout;
+			up->expire = sp->expire;
+			up->src = sp->src;
+			up->dst = sp->dst;
+			break;
+		case PFSYNC_ACT_DEL_C:
+			sc->sc_mbuf_net->m_pkthdr.len =
+			    sc->sc_mbuf_net->m_len += sizeof(*dp);
+			dp = sc->sc_statep_net.d++;
+			h_net->count++;
+
+			bzero(dp, sizeof(*dp));
+			bcopy(&st->id, dp->id, sizeof(dp->id));
+			dp->creatorid = st->creatorid;
+			break;
+		}
+	}
+
+	if (h->count == sc->sc_maxcount ||
+	    (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
+		ret = pfsync_sendout(sc);
+
+	splx(s);
+	return (ret);
+}
+
+/* This must be called in splnet() */
+int
+pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_header *h;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_state_upd_req *rup;
+	int ret = 0;
+
+	if (sc == NULL)
+		return (0);
+
+	ifp = &sc->sc_if;
+	if (sc->sc_mbuf == NULL) {
+		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
+		    (void *)&sc->sc_statep.s)) == NULL)
+			return (ENOMEM);
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+	} else {
+		h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		if (h->action != PFSYNC_ACT_UREQ) {
+			pfsync_sendout(sc);
+			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
+			    (void *)&sc->sc_statep.s)) == NULL)
+				return (ENOMEM);
+			h = mtod(sc->sc_mbuf, struct pfsync_header *);
+		}
+	}
+
+	if (src != NULL)
+		sc->sc_sendaddr = *src;
+	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
+	h->count++;
+	rup = sc->sc_statep.r++;
+	bzero(rup, sizeof(*rup));
+	if (up != NULL) {
+		bcopy(up->id, rup->id, sizeof(rup->id));
+		rup->creatorid = up->creatorid;
+	}
+
+	if (h->count == sc->sc_maxcount)
+		ret = pfsync_sendout(sc);
+
+	return (ret);
+}
+
+int
+pfsync_clear_states(u_int32_t creatorid, char *ifname)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_state_clr *cp;
+	int s, ret;
+
+	if (sc == NULL)
+		return (0);
+
+	ifp = &sc->sc_if;
+	s = splnet();
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+	if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
+	    (void *)&sc->sc_statep.c)) == NULL) {
+		splx(s);
+		return (ENOMEM);
+	}
+	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
+	cp = sc->sc_statep.c;
+	cp->creatorid = creatorid;
+	if (ifname != NULL)
+		strlcpy(cp->ifname, ifname, IFNAMSIZ);
+
+	ret = (pfsync_sendout(sc));
+	splx(s);
+	return (ret);
+}
+
+void
+pfsync_timeout(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s;
+
+	s = splnet();
+	pfsync_sendout(sc);
+	splx(s);
+}
+
+void
+pfsync_tdb_timeout(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s;
+
+	s = splnet();
+	pfsync_tdb_sendout(sc);
+	splx(s);
+}
+
+/* This must be called in splnet() */
+void
+pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
+{
+	struct pfsync_state_bus *bus;
+
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+
+	if (pfsync_sync_ok &&
+	    (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
+	    (void *)&sc->sc_statep.b)) != NULL) {
+		sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
+		bus = sc->sc_statep.b;
+		bus->creatorid = pf_status.hostid;
+		bus->status = status;
+		bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
+		pfsync_sendout(sc);
+	}
+}
+
+void
+pfsync_bulk_update(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s, i = 0;
+	struct pf_state *state;
+
+	s = splnet();
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+
+	/*
+	 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
+	 * been sent since the latest request was made.
+	 */
+	state = sc->sc_bulk_send_next;
+	if (state)
+		do {
+			/* send state update if syncable and not already sent */
+			if (!state->sync_flags
+			    && state->timeout < PFTM_MAX
+			    && state->pfsync_time <= sc->sc_ureq_received) {
+				pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
+				i++;
+			}
+
+			/* figure next state to send */
+			state = TAILQ_NEXT(state, entry_list);
+
+			/* wrap to start of list if we hit the end */
+			if (!state)
+				state = TAILQ_FIRST(&state_list);
+		} while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
+		    state != sc->sc_bulk_terminator);
+
+	if (!state || state == sc->sc_bulk_terminator) {
+		/* we're done */
+		pfsync_send_bus(sc, PFSYNC_BUS_END);
+		sc->sc_ureq_received = 0;
+		sc->sc_bulk_send_next = NULL;
+		sc->sc_bulk_terminator = NULL;
+		callout_stop(&sc->sc_bulk_tmo);
+		if (pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: bulk update complete\n");
+	} else {
+		/* look again for more in a bit */
+		callout_schedule(&sc->sc_bulk_tmo, 1);
+		sc->sc_bulk_send_next = state;
+	}
+	if (sc->sc_mbuf != NULL)
+		pfsync_sendout(sc);
+	splx(s);
+}
+
+void
+pfsync_bulkfail(void *v)
+{
+	struct pfsync_softc *sc = v;
+	int s, error;
+
+	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
+		/* Try again in a bit */
+		callout_schedule(&sc->sc_bulkfail_tmo, 5 * hz);
+		s = splnet();
+		error = pfsync_request_update(NULL, NULL);
+		if (error == ENOMEM) {
+			if (pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: cannot allocate mbufs for "
+				    "bulk update\n");
+		} else
+			pfsync_sendout(sc);
+		splx(s);
+	} else {
+		/* Pretend like the transfer was ok */
+		sc->sc_ureq_sent = 0;
+		sc->sc_bulk_tries = 0;
+#if NCARP > 0
+		if (!pfsync_sync_ok)
+			carp_group_demote_adj(&sc->sc_if, -1);
+#endif
+		pfsync_sync_ok = 1;
+		if (pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: failed to receive "
+			    "bulk update status\n");
+		callout_stop(&sc->sc_bulkfail_tmo);
+	}
+}
+
+/* This must be called in splnet() */
+int
+pfsync_sendout(struct pfsync_softc *sc)
+{
+#if NBPFILTER > 0
+	struct ifnet *ifp = &sc->sc_if;
+#endif
+	struct mbuf *m;
+
+	callout_stop(&sc->sc_tmo);
+
+	if (sc->sc_mbuf == NULL)
+		return (0);
+	m = sc->sc_mbuf;
+	sc->sc_mbuf = NULL;
+	sc->sc_statep.s = NULL;
+
+#if NBPFILTER > 0
+	if (ifp->if_bpf)
+		bpf_mtap(ifp->if_bpf, m);
+#endif
+
+	if (sc->sc_mbuf_net) {
+		m_freem(m);
+		m = sc->sc_mbuf_net;
+		sc->sc_mbuf_net = NULL;
+		sc->sc_statep_net.s = NULL;
+	}
+
+	return pfsync_sendout_mbuf(sc, m);
+}
+
+int
+pfsync_tdb_sendout(struct pfsync_softc *sc)
+{
+#if NBPFILTER > 0
+	struct ifnet *ifp = &sc->sc_if;
+#endif
+	struct mbuf *m;
+
+	callout_stop(&sc->sc_tdb_tmo);
+
+	if (sc->sc_mbuf_tdb == NULL)
+		return (0);
+	m = sc->sc_mbuf_tdb;
+	sc->sc_mbuf_tdb = NULL;
+	sc->sc_statep_tdb.t = NULL;
+
+#if NBPFILTER > 0
+	if (ifp->if_bpf)
+		bpf_mtap(ifp->if_bpf, m);
+#endif
+
+	return pfsync_sendout_mbuf(sc, m);
+}
+
+int
+pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
+{
+	struct sockaddr sa;
+	struct ip *ip;
+
+	if (sc->sc_sync_ifp ||
+	    sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
+		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+		if (m == NULL) {
+			pfsyncstats.pfsyncs_onomem++;
+			return (0);
+		}
+		ip = mtod(m, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = sizeof(*ip) >> 2;
+		ip->ip_tos = IPTOS_LOWDELAY;
+		ip->ip_len = htons(m->m_pkthdr.len);
+		ip->ip_id = htons(ip_randomid(0));
+		ip->ip_off = htons(IP_DF);
+		ip->ip_ttl = PFSYNC_DFLTTL;
+		ip->ip_p = IPPROTO_PFSYNC;
+		ip->ip_sum = 0;
+
+		bzero(&sa, sizeof(sa));
+		ip->ip_src.s_addr = INADDR_ANY;
+
+		if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
+			m->m_flags |= M_MCAST;
+		ip->ip_dst = sc->sc_sendaddr;
+		sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
+
+		pfsyncstats.pfsyncs_opackets++;
+
+		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
+			pfsyncstats.pfsyncs_oerrors++;
+	} else
+		m_freem(m);
+
+	return (0);
+}
+
+#ifdef IPSEC
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+void
+pfsync_update_net_tdb(struct pfsync_tdb *pt)
+{
+	struct tdb		*tdb;
+	int			 s;
+
+	/* check for invalid values */
+	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+	    (pt->dst.sa.sa_family != AF_INET &&
+	     pt->dst.sa.sa_family != AF_INET6))
+		goto bad;
+
+	s = spltdb();
+	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
+	if (tdb) {
+		pt->rpl = ntohl(pt->rpl);
+		pt->cur_bytes = betoh64(pt->cur_bytes);
+
+		/* Neither replay nor byte counter should ever decrease. */
+		if (pt->rpl < tdb->tdb_rpl ||
+		    pt->cur_bytes < tdb->tdb_cur_bytes) {
+			splx(s);
+			goto bad;
+		}
+
+		tdb->tdb_rpl = pt->rpl;
+		tdb->tdb_cur_bytes = pt->cur_bytes;
+	}
+	splx(s);
+	return;
+
+ bad:
+	if (pf_status.debug >= PF_DEBUG_MISC)
+		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+		    "invalid value\n");
+	pfsyncstats.pfsyncs_badstate++;
+	return;
+}
+
+/* One of our local tdbs have been updated, need to sync rpl with others */
+int
+pfsync_update_tdb(struct tdb *tdb, int output)
+{
+	struct ifnet *ifp = NULL;
+	struct pfsync_softc *sc = pfsyncif;
+	struct pfsync_header *h;
+	struct pfsync_tdb *pt = NULL;
+	int s, i, ret;
+
+	if (sc == NULL)
+		return (0);
+
+	ifp = &sc->sc_if;
+	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+		/* Don't leave any stale pfsync packets hanging around. */
+		if (sc->sc_mbuf_tdb != NULL) {
+			m_freem(sc->sc_mbuf_tdb);
+			sc->sc_mbuf_tdb = NULL;
+			sc->sc_statep_tdb.t = NULL;
+		}
+		return (0);
+	}
+
+	s = splnet();
+	if (sc->sc_mbuf_tdb == NULL) {
+		if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD,
+		    (void *)&sc->sc_statep_tdb.t)) == NULL) {
+			splx(s);
+			return (ENOMEM);
+		}
+		h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+	} else {
+		h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+		if (h->action != PFSYNC_ACT_TDB_UPD) {
+			/*
+			 * XXX will never happen as long as there's
+			 * only one "TDB action".
+			 */
+			pfsync_tdb_sendout(sc);
+			sc->sc_mbuf_tdb = pfsync_get_mbuf(sc,
+			    PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t);
+			if (sc->sc_mbuf_tdb == NULL) {
+				splx(s);
+				return (ENOMEM);
+			}
+			h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+		} else if (sc->sc_maxupdates) {
+			/*
+			 * If it's an update, look in the packet to see if
+			 * we already have an update for the state.
+			 */
+			struct pfsync_tdb *u =
+			    (void *)((char *)h + PFSYNC_HDRLEN);
+
+			for (i = 0; !pt && i < h->count; i++) {
+				if (tdb->tdb_spi == u->spi &&
+				    tdb->tdb_sproto == u->sproto &&
+			            !bcmp(&tdb->tdb_dst, &u->dst,
+				    SA_LEN(&u->dst.sa))) {
+					pt = u;
+					pt->updates++;
+				}
+				u++;
+			}
+		}
+	}
+
+	if (pt == NULL) {
+		/* not a "duplicate" update */
+		pt = sc->sc_statep_tdb.t++;
+		sc->sc_mbuf_tdb->m_pkthdr.len =
+		    sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb);
+		h->count++;
+		bzero(pt, sizeof(*pt));
+
+		pt->spi = tdb->tdb_spi;
+		memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst);
+		pt->sproto = tdb->tdb_sproto;
+	}
+
+	/*
+	 * When a failover happens, the master's rpl is probably above
+	 * what we see here (we may be up to a second late), so
+	 * increase it a bit for outbound tdbs to manage most such
+	 * situations.
+	 *
+	 * For now, just add an offset that is likely to be larger
+	 * than the number of packets we can see in one second. The RFC
+	 * just says the next packet must have a higher seq value.
+	 *
+	 * XXX What is a good algorithm for this? We could use
+	 * a rate-determined increase, but to know it, we would have
+	 * to extend struct tdb.
+	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
+	 * will soon be replaced anyway. For now, just don't handle
+	 * this edge case.
+	 */
+#define RPL_INCR 16384
+	pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0));
+	pt->cur_bytes = htobe64(tdb->tdb_cur_bytes);
+
+	if (h->count == sc->sc_maxcount ||
+	    (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates)))
+		ret = pfsync_tdb_sendout(sc);
+
+	splx(s);
+	return (ret);
+}
+#endif
Index: sys/dist/pf/net/if_pfsync.h
===================================================================
RCS file: sys/dist/pf/net/if_pfsync.h
diff -N sys/dist/pf/net/if_pfsync.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sys/dist/pf/net/if_pfsync.h	1 Aug 2009 14:53:12 -0000
@@ -0,0 +1,284 @@
+/*	$OpenBSD: if_pfsync.h,v 1.31 2007/05/31 04:11:42 mcbride Exp $	*/
+
+/*
+ * Copyright (c) 2001 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFSYNC_H_
+#define _NET_IF_PFSYNC_H_
+
+#define  INADDR_PFSYNC_GROUP     __IPADDR(0xe00000f0)    /* 224.0.0.240 */
+
+#define PFSYNC_ID_LEN	sizeof(u_int64_t)
+
+struct pfsync_tdb {
+	u_int32_t	spi;
+	union sockaddr_union dst;
+	u_int32_t	rpl;
+	u_int64_t	cur_bytes;
+	u_int8_t	sproto;
+	u_int8_t	updates;
+	u_int8_t	pad[2];
+} __packed;
+
+struct pfsync_state_upd {
+	u_int32_t		id[2];
+	struct pfsync_state_peer	src;
+	struct pfsync_state_peer	dst;
+	u_int32_t		creatorid;
+	u_int32_t		expire;
+	u_int8_t		timeout;
+	u_int8_t		updates;
+	u_int8_t		pad[6];
+} __packed;
+
+struct pfsync_state_del {
+	u_int32_t		id[2];
+	u_int32_t		creatorid;
+	struct {
+		u_int8_t	state;
+	} src;
+	struct {
+		u_int8_t	state;
+	} dst;
+	u_int8_t		pad[2];
+} __packed;
+
+struct pfsync_state_upd_req {
+	u_int32_t		id[2];
+	u_int32_t		creatorid;
+	u_int32_t		pad;
+} __packed;
+
+struct pfsync_state_clr {
+	char			ifname[IFNAMSIZ];
+	u_int32_t		creatorid;
+	u_int32_t		pad;
+} __packed;
+
+struct pfsync_state_bus {
+	u_int32_t		creatorid;
+	u_int32_t		endtime;
+	u_int8_t		status;
+#define PFSYNC_BUS_START	1
+#define PFSYNC_BUS_END		2
+	u_int8_t		pad[7];
+} __packed;
+
+#ifdef _KERNEL
+
+union sc_statep {
+	struct pfsync_state	*s;
+	struct pfsync_state_upd	*u;
+	struct pfsync_state_del	*d;
+	struct pfsync_state_clr	*c;
+	struct pfsync_state_bus	*b;
+	struct pfsync_state_upd_req	*r;
+};
+
+union sc_tdb_statep {
+	struct pfsync_tdb	*t;
+};
+
+extern int	pfsync_sync_ok;
+
+struct pfsync_softc {
+	struct ifnet		 sc_if;
+	struct ifnet		*sc_sync_ifp;
+
+	struct ip_moptions	 sc_imo;
+	struct callout		 sc_tmo;
+	struct callout		 sc_tdb_tmo;
+	struct callout		 sc_bulk_tmo;
+	struct callout		 sc_bulkfail_tmo;
+	struct in_addr		 sc_sync_peer;
+	struct in_addr		 sc_sendaddr;
+	struct mbuf		*sc_mbuf;	/* current cumulative mbuf */
+	struct mbuf		*sc_mbuf_net;	/* current cumulative mbuf */
+    	struct mbuf		*sc_mbuf_tdb;	/* dito for TDB updates */
+	union sc_statep		 sc_statep;
+	union sc_statep		 sc_statep_net;
+	union sc_tdb_statep	 sc_statep_tdb;
+	u_int32_t		 sc_ureq_received;
+	u_int32_t		 sc_ureq_sent;
+	struct pf_state		*sc_bulk_send_next;
+	struct pf_state		*sc_bulk_terminator;
+	int			 sc_bulk_tries;
+	int			 sc_maxcount;	/* number of states in mtu */
+	int			 sc_maxupdates;	/* number of updates/state */
+};
+
+extern struct pfsync_softc	*pfsyncif;
+#endif
+
+
+struct pfsync_header {
+	u_int8_t version;
+#define	PFSYNC_VERSION	3
+	u_int8_t af;
+	u_int8_t action;
+#define	PFSYNC_ACT_CLR		0	/* clear all states */
+#define	PFSYNC_ACT_INS		1	/* insert state */
+#define	PFSYNC_ACT_UPD		2	/* update state */
+#define	PFSYNC_ACT_DEL		3	/* delete state */
+#define	PFSYNC_ACT_UPD_C	4	/* "compressed" state update */
+#define	PFSYNC_ACT_DEL_C	5	/* "compressed" state delete */
+#define	PFSYNC_ACT_INS_F	6	/* insert fragment */
+#define	PFSYNC_ACT_DEL_F	7	/* delete fragments */
+#define	PFSYNC_ACT_UREQ		8	/* request "uncompressed" state */
+#define PFSYNC_ACT_BUS		9	/* Bulk Update Status */
+#define PFSYNC_ACT_TDB_UPD	10	/* TDB replay counter update */
+#define	PFSYNC_ACT_MAX		11
+	u_int8_t count;
+	u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
+} __packed;
+
+#define PFSYNC_BULKPACKETS	1	/* # of packets per timeout */
+#define PFSYNC_MAX_BULKTRIES	12
+#define PFSYNC_HDRLEN	sizeof(struct pfsync_header)
+#define	PFSYNC_ACTIONS \
+	"CLR ST", "INS ST", "UPD ST", "DEL ST", \
+	"UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \
+	"UPD REQ", "BLK UPD STAT", "TDB UPD"
+
+#define PFSYNC_DFLTTL		255
+
+struct pfsyncstats {
+	u_int64_t	pfsyncs_ipackets;	/* total input packets, IPv4 */
+	u_int64_t	pfsyncs_ipackets6;	/* total input packets, IPv6 */
+	u_int64_t	pfsyncs_badif;		/* not the right interface */
+	u_int64_t	pfsyncs_badttl;		/* TTL is not PFSYNC_DFLTTL */
+	u_int64_t	pfsyncs_hdrops;		/* packets shorter than hdr */
+	u_int64_t	pfsyncs_badver;		/* bad (incl unsupp) version */
+	u_int64_t	pfsyncs_badact;		/* bad action */
+	u_int64_t	pfsyncs_badlen;		/* data length does not match */
+	u_int64_t	pfsyncs_badauth;	/* bad authentication */
+	u_int64_t	pfsyncs_stale;		/* stale state */
+	u_int64_t	pfsyncs_badval;		/* bad values */
+	u_int64_t	pfsyncs_badstate;	/* insert/lookup failed */
+
+	u_int64_t	pfsyncs_opackets;	/* total output packets, IPv4 */
+	u_int64_t	pfsyncs_opackets6;	/* total output packets, IPv6 */
+	u_int64_t	pfsyncs_onomem;		/* no memory for an mbuf */
+	u_int64_t	pfsyncs_oerrors;	/* ip output error */
+};
+
+/*
+ * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
+ */
+struct pfsyncreq {
+	char		 pfsyncr_syncdev[IFNAMSIZ];
+	struct in_addr	 pfsyncr_syncpeer;
+	int		 pfsyncr_maxupdates;
+	int		 pfsyncr_authlevel;
+};
+
+
+/* for copies to/from network */
+#define pf_state_peer_hton(s,d) do {		\
+	(d)->seqlo = htonl((s)->seqlo);		\
+	(d)->seqhi = htonl((s)->seqhi);		\
+	(d)->seqdiff = htonl((s)->seqdiff);	\
+	(d)->max_win = htons((s)->max_win);	\
+	(d)->mss = htons((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub) {						\
+		(d)->scrub.pfss_flags = 				\
+		    htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP);	\
+		(d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl;		\
+		(d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+		(d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;	\
+	}								\
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do {		\
+	(d)->seqlo = ntohl((s)->seqlo);		\
+	(d)->seqhi = ntohl((s)->seqhi);		\
+	(d)->seqdiff = ntohl((s)->seqdiff);	\
+	(d)->max_win = ntohs((s)->max_win);	\
+	(d)->mss = ntohs((s)->mss);		\
+	(d)->state = (s)->state;		\
+	(d)->wscale = (s)->wscale;		\
+	if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 	\
+	    (d)->scrub != NULL) {					\
+		(d)->scrub->pfss_flags =				\
+		    ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP;	\
+		(d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl;		\
+		(d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+	}								\
+} while (0)
+
+#define pf_state_host_hton(s,d) do {				\
+	bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr));	\
+	(d)->port = (s)->port;					\
+} while (0)
+
+#define pf_state_host_ntoh(s,d) do {				\
+	bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr));	\
+	(d)->port = (s)->port;					\
+} while (0)
+
+#define pf_state_counter_hton(s,d) do {				\
+	d[0] = htonl((s>>32)&0xffffffff);			\
+	d[1] = htonl(s&0xffffffff);				\
+} while (0)
+
+#define pf_state_counter_ntoh(s,d) do {				\
+	d = ntohl(s[0]);					\
+	d = d<<32;						\
+	d += ntohl(s[1]);					\
+} while (0)
+
+#ifdef _KERNEL
+void pfsync_input(struct mbuf *, ...);
+int pfsync_clear_states(u_int32_t, char *);
+int pfsync_pack_state(u_int8_t, struct pf_state *, int);
+#define pfsync_insert_state(st)	do {				\
+	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||	\
+	    (st->state_key->proto == IPPROTO_PFSYNC))			\
+		st->sync_flags |= PFSTATE_NOSYNC;		\
+	else if (!st->sync_flags)				\
+		pfsync_pack_state(PFSYNC_ACT_INS, (st), 	\
+		    PFSYNC_FLAG_COMPRESS);			\
+	st->sync_flags &= ~PFSTATE_FROMSYNC;			\
+} while (0)
+#define pfsync_update_state(st) do {				\
+	if (!st->sync_flags)					\
+		pfsync_pack_state(PFSYNC_ACT_UPD, (st), 	\
+		    PFSYNC_FLAG_COMPRESS);			\
+	st->sync_flags &= ~PFSTATE_FROMSYNC;			\
+} while (0)
+#define pfsync_delete_state(st) do {				\
+	if (!st->sync_flags)					\
+		pfsync_pack_state(PFSYNC_ACT_DEL, (st),		\
+		    PFSYNC_FLAG_COMPRESS);			\
+} while (0)
+#ifdef NOTYET
+int pfsync_update_tdb(struct tdb *, int);
+#endif /* NOTYET */
+#endif
+
+#endif /* _NET_IF_PFSYNC_H_ */
Index: sys/dist/pf/net/pf.c
===================================================================
RCS file: /cvsroot/src/sys/dist/pf/net/pf.c,v
retrieving revision 1.56
diff -u -r1.56 pf.c
--- sys/dist/pf/net/pf.c	28 Jul 2009 18:15:26 -0000	1.56
+++ sys/dist/pf/net/pf.c	1 Aug 2009 14:53:14 -0000
@@ -42,11 +42,7 @@
 #include "bpfilter.h"
 #include "pflog.h"
 
-#ifndef __NetBSD__
 #include "pfsync.h"
-#else
-#define NPFSYNC	0
-#endif /* __NetBSD__ */
 
 #include <sys/param.h>
 #include <sys/systm.h>
Index: sys/dist/pf/net/pf_ioctl.c
===================================================================
RCS file: /cvsroot/src/sys/dist/pf/net/pf_ioctl.c,v
retrieving revision 1.35
diff -u -r1.35 pf_ioctl.c
--- sys/dist/pf/net/pf_ioctl.c	28 Jul 2009 18:15:26 -0000	1.35
+++ sys/dist/pf/net/pf_ioctl.c	1 Aug 2009 14:53:15 -0000
@@ -44,11 +44,7 @@
 #include "opt_pfil_hooks.h"
 #endif
 
-#ifndef __NetBSD__
 #include "pfsync.h"
-#else
-#define NPFSYNC 0
-#endif /* __NetBSD__ */
 
 #include <sys/param.h>
 #include <sys/systm.h>
Index: sys/net/files.pf
===================================================================
RCS file: /cvsroot/src/sys/net/files.pf,v
retrieving revision 1.4
diff -u -r1.4 files.pf
--- sys/net/files.pf	18 Jun 2008 09:06:28 -0000	1.4
+++ sys/net/files.pf	1 Aug 2009 14:53:16 -0000
@@ -2,10 +2,10 @@
 
 defpseudo pf:		ifnet
 defpseudo pflog:	ifnet
-#defpseudo pfsync:	ifnet
+defpseudo pfsync:	ifnet
 
 file	dist/pf/net/if_pflog.c		pflog		needs-flag
-#file	dist/pf/net/if_pfsync.c		pfsync		needs-flag
+file	dist/pf/net/if_pfsync.c		pfsync		needs-flag
 
 file	dist/pf/net/pf.c		pf		needs-flag
 file	dist/pf/net/pf_if.c		pf
Index: sys/netinet/in.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in.h,v
retrieving revision 1.85
diff -u -r1.85 in.h
--- sys/netinet/in.h	17 Jul 2009 22:02:54 -0000	1.85
+++ sys/netinet/in.h	1 Aug 2009 14:53:17 -0000
@@ -103,6 +103,7 @@
 #define	IPPROTO_IPCOMP		108		/* IP Payload Comp. Protocol */
 #define	IPPROTO_VRRP		112		/* VRRP RFC 2338 */
 #define	IPPROTO_CARP		112		/* Common Address Resolution Protocol */
+#define IPPROTO_PFSYNC      240     /* PFSYNC */
 #define	IPPROTO_RAW		255		/* raw IP packet */
 #define	IPPROTO_MAX		256
 
Index: sys/netinet/in_proto.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_proto.c,v
retrieving revision 1.97
diff -u -r1.97 in_proto.c
--- sys/netinet/in_proto.c	28 Feb 2009 18:31:12 -0000	1.97
+++ sys/netinet/in_proto.c	1 Aug 2009 14:53:17 -0000
@@ -144,6 +144,12 @@
 #include <netinet/ip_carp.h>
 #endif
 
+#include "pfsync.h"
+#if NPFSYNC > 0
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+#endif
+
 #include "etherip.h"
 #if NETHERIP > 0
 #include <netinet/ip_etherip.h>
@@ -358,6 +364,17 @@
 	.pr_usrreq = rip_usrreq,
 },
 #endif /* NCARP > 0 */
+#if NPFSYNC > 0
+{	.pr_type = SOCK_RAW,
+	.pr_domain = &inetdomain,
+	.pr_protocol = IPPROTO_PFSYNC,
+	.pr_flags	 = PR_ATOMIC|PR_ADDR,
+	.pr_input	 = pfsync_input,
+	.pr_output	 = rip_output,
+	.pr_ctloutput = rip_ctloutput,
+	.pr_usrreq	 = rip_usrreq,
+},
+#endif /* NPFSYNC > 0 */
 {	.pr_type = SOCK_RAW,
 	.pr_domain = &inetdomain,
 	.pr_protocol = IPPROTO_IGMP,
Index: sys/rump/librump/rumpnet/opt/pfsync.h
===================================================================
RCS file: sys/rump/librump/rumpnet/opt/pfsync.h
diff -N sys/rump/librump/rumpnet/opt/pfsync.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sys/rump/librump/rumpnet/opt/pfsync.h	1 Aug 2009 14:53:18 -0000
@@ -0,0 +1,2 @@
+/*	$NetBSD: pfsync.h Exp $	*/
+
Index: sys/sys/sockio.h
===================================================================
RCS file: /cvsroot/src/sys/sys/sockio.h,v
retrieving revision 1.28
diff -u -r1.28 sockio.h
--- sys/sys/sockio.h	11 Jan 2009 02:45:55 -0000	1.28
+++ sys/sys/sockio.h	1 Aug 2009 14:53:18 -0000
@@ -129,4 +129,7 @@
 #define	SIOCZIFDATA	_IOWR('i', 134, struct ifdatareq) /* get if_data then
 							     zero ctrs*/
 
+#define	SIOCSETPFSYNC	_IOW('i', 247, struct ifreq)	
+#define	SIOCGETPFSYNC	_IOWR('i', 248, struct ifreq)
+
 #endif /* !_SYS_SOCKIO_H_ */
Index: usr.sbin/pf/man/man4/Makefile
===================================================================
RCS file: /cvsroot/src/usr.sbin/pf/man/man4/Makefile,v
retrieving revision 1.1
diff -u -r1.1 Makefile
--- usr.sbin/pf/man/man4/Makefile	14 Nov 2004 11:26:48 -0000	1.1
+++ usr.sbin/pf/man/man4/Makefile	1 Aug 2009 14:53:22 -0000
@@ -4,6 +4,6 @@
 
 .PATH:	${NETBSDSRCDIR}/dist/pf/share/man/man4
 
-MAN=	pf.4 pflog.4
+MAN=	pf.4 pflog.4 pfsync.4
 
 .include <bsd.man.mk>
