diff -urN linux-2.4.20/Documentation/Configure.help linux-2.4.20-ipvs-1.0.9/Documentation/Configure.help
--- linux-2.4.20/Documentation/Configure.help	Wed May 21 11:09:33 2003
+++ linux-2.4.20-ipvs-1.0.9/Documentation/Configure.help	Wed May 21 11:59:22 2003
@@ -3056,6 +3056,190 @@
   If you want to compile it as a module, say M here and read
   <file:Documentation/modules.txt>.  If unsure, say `N'.
 
+IP: virtual server support
+CONFIG_IP_VS
+  IP Virtual Server support will let you build a high-performance
+  virtual server based on cluster of two or more real servers. This
+  option must be enabled for at least one of the clustered computers
+  that will take care of intercepting incomming connections to a
+  single IP address and scheduling them to real servers.
+
+  Three request dispatching techniques are implemented, they are
+  virtual server via NAT, virtual server via tunneling and virtual
+  server via direct routing. The several scheduling algorithms can
+  be used to choose which server the connection is directed to,
+  thus load balancing can be achieved among the servers.  For more
+  information and its administration program, please visit the
+  following URL:
+	http://www.linuxvirtualserver.org/
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IP virtual server debugging
+CONFIG_IP_VS_DEBUG
+  Say Y here if you want to get additional messages useful in
+  debugging the IP virtual server code. You can change the debug
+  level in /proc/sys/net/ipv4/vs/debug_level
+
+IPVS connection hash table size (the Nth power of 2)
+CONFIG_IP_VS_TAB_BITS
+  The IPVS connection hash table uses the chaining scheme to handle
+  hash collisions. Using a big IPVS connection hash table will greatly
+  reduce conflicts when there are hundreds of thousands of connections
+  in the hash table.
+
+  Note the table size must be power of 2. The table size will be the
+  value of 2 to the your input number power. The number to choose is
+  from 8 to 20, the default number is 12, which means the table size
+  is 4096. Don't input the number too small, otherwise you will lose
+  performance on it. You can adapt the table size yourself, according
+  to your virtual server application. It is good to set the table size
+  not far less than the number of connections per second multiplying
+  average lasting time of connection in the table.  For example, your
+  virtual server gets 200 connections per second, the connection lasts
+  for 200 seconds in average in the connection table, the table size
+  should be not far less than 200x200, it is good to set the table
+  size 32768 (2**15).
+
+  Another note that each connection occupies 128 bytes effectively and
+  each hash entry uses 8 bytes, so you can estimate how much memory is
+  needed for your box.
+
+IPVS: round-robin scheduling
+CONFIG_IP_VS_RR
+  The robin-robin scheduling algorithm simply directs network
+  connections to different real servers in a round-robin manner.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: weighted round-robin scheduling
+CONFIG_IP_VS_WRR
+  The weighted robin-robin scheduling algorithm directs network
+  connections to different real servers based on server weights
+  in a round-robin manner. Servers with higher weights receive
+  new connections first than those with less weights, and servers
+  with higher weights get more connections than those with less
+  weights and servers with equal weights get equal connections.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: least-connection scheduling
+CONFIG_IP_VS_LC
+  The least-connection scheduling algorithm directs network
+  connections to the server with the least number of active 
+  connections.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: weighted least-connection scheduling
+CONFIG_IP_VS_WLC
+  The weighted least-connection scheduling algorithm directs network
+  connections to the server with the least active connections
+  normalized by the server weight.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: locality-based least-connection scheduling
+CONFIG_IP_VS_LBLC
+  The locality-based least-connection scheduling algorithm is for
+  destination IP load balancing. It is usually used in cache cluster.
+  This algorithm usually directs packet destined for an IP address to
+  its server if the server is alive and under load. If the server is
+  overloaded (its active connection numbers is larger than its weight)
+  and there is a server in its half load, then allocate the weighted
+  least-connection server to this IP address.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: locality-based least-connection with replication scheduling
+CONFIG_IP_VS_LBLCR
+  The locality-based least-connection with replication scheduling
+  algorithm is also for destination IP load balancing. It is 
+  usually used in cache cluster. It differs from the LBLC scheduling
+  as follows: the load balancer maintains mappings from a target
+  to a set of server nodes that can serve the target. Requests for
+  a target are assigned to the least-connection node in the target's
+  server set. If all the node in the server set are over loaded,
+  it picks up a least-connection node in the cluster and adds it
+  in the sever set for the target. If the server set has not been
+  modified for the specified time, the most loaded node is removed
+  from the server set, in order to avoid high degree of replication.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: destination hashing scheduling
+CONFIG_IP_VS_DH
+  The destination hashing scheduling algorithm assigns network
+  connections to the servers through looking up a statically assigned
+  hash table by their destination IP addresses.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: source hashing scheduling
+CONFIG_IP_VS_SH
+  The source hashing scheduling algorithm assigns network
+  connections to the servers through looking up a statically assigned
+  hash table by their source IP addresses.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: shortest expected delay scheduling
+CONFIG_IP_VS_SED
+  The shortest expected delay scheduling algorithm assigns network
+  connections to the server with the shortest expected delay. The 
+  expected delay that the job will experience is (Ci + 1) / Ui if 
+  sent to the ith server, in which Ci is the number of connections
+  on the the ith server and Ui is the fixed service rate (weight)
+  of the ith server.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: never queue scheduling
+CONFIG_IP_VS_NQ
+  The never queue scheduling algorithm adopts a two-speed model.
+  When there is an idle server available, the job will be sent to
+  the idle server, instead of waiting for a fast one. When there
+  is no idle server available, the job will be sent to the server
+  that minimize its expected delay (The Shortest Expected Delay
+  scheduling algorithm).
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
+IPVS: FTP protocol helper
+CONFIG_IP_VS_FTP
+  FTP is a protocol that transfers IP address and/or port number in
+  the payload. In the virtual server via Network Address Translation,
+  the IP address and port number of real servers cannot be sent to
+  clients in ftp connections directly, so FTP protocol helper is
+  required for tracking the connection and mangling it back to that of
+  virtual service.
+
+  If you want to compile it in kernel, say Y. If you want to compile
+  it as a module, say M here and read Documentation/modules.txt. If
+  unsure, say N.
+
 SYN flood protection
 CONFIG_SYN_COOKIES
   Normal TCP/IP networking is open to an attack known as "SYN
diff -urN linux-2.4.20/include/net/ip_vs.h linux-2.4.20-ipvs-1.0.9/include/net/ip_vs.h
--- linux-2.4.20/include/net/ip_vs.h	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/include/net/ip_vs.h	Wed May 21 11:53:39 2003
@@ -0,0 +1,944 @@
+/*
+ *      IP Virtual Server
+ *      data structure and functionality definitions
+ */
+
+#ifndef _IP_VS_H
+#define _IP_VS_H
+
+#include <asm/types.h>          /* For __uXX types */
+
+#define IP_VS_VERSION_CODE            0x010009
+#define NVERSION(version)                       \
+	(version >> 16) & 0xFF,                 \
+	(version >> 8) & 0xFF,                  \
+	version & 0xFF
+
+/*
+ *      Virtual Service Flags
+ */
+#define IP_VS_SVC_F_PERSISTENT        0x0001    /* persistent port */
+#define IP_VS_SVC_F_HASHED            0x0002    /* hashed entry */
+
+/*
+ *      Destination Server Flags
+ */
+#define IP_VS_DEST_F_AVAILABLE        0x0001    /* Available tag */
+
+/*
+ *      IPVS sync daemon states
+ */
+#define IP_VS_STATE_NONE    0           /* daemon is stopped */
+#define IP_VS_STATE_MASTER  1           /* started as master */
+#define IP_VS_STATE_BACKUP  2           /* started as backup */
+
+/*
+ *      IPVS socket options
+ */
+#define IP_VS_BASE_CTL		(64+1024+64)		/* base */
+
+#define IP_VS_SO_SET_NONE	IP_VS_BASE_CTL	        /* just peek */
+#define IP_VS_SO_SET_INSERT	(IP_VS_BASE_CTL+1)
+#define IP_VS_SO_SET_ADD	(IP_VS_BASE_CTL+2)
+#define IP_VS_SO_SET_EDIT	(IP_VS_BASE_CTL+3)
+#define IP_VS_SO_SET_DEL	(IP_VS_BASE_CTL+4)
+#define IP_VS_SO_SET_FLUSH	(IP_VS_BASE_CTL+5)
+#define IP_VS_SO_SET_LIST	(IP_VS_BASE_CTL+6)
+#define IP_VS_SO_SET_ADDDEST	(IP_VS_BASE_CTL+7)
+#define IP_VS_SO_SET_DELDEST	(IP_VS_BASE_CTL+8)
+#define IP_VS_SO_SET_EDITDEST	(IP_VS_BASE_CTL+9)
+#define IP_VS_SO_SET_TIMEOUTS	(IP_VS_BASE_CTL+10)
+#define IP_VS_SO_SET_STARTDAEMON (IP_VS_BASE_CTL+11)
+#define IP_VS_SO_SET_STOPDAEMON (IP_VS_BASE_CTL+12)
+#define IP_VS_SO_SET_RESTORE    (IP_VS_BASE_CTL+13)
+#define IP_VS_SO_SET_SAVE       (IP_VS_BASE_CTL+14)
+#define IP_VS_SO_SET_ZERO	(IP_VS_BASE_CTL+15)
+#define IP_VS_SO_SET_MAX	IP_VS_SO_SET_ZERO
+
+#define IP_VS_SO_GET_VERSION	IP_VS_BASE_CTL
+#define IP_VS_SO_GET_INFO	(IP_VS_BASE_CTL+1)
+#define IP_VS_SO_GET_SERVICES	(IP_VS_BASE_CTL+2)
+#define IP_VS_SO_GET_SERVICE	(IP_VS_BASE_CTL+3)
+#define IP_VS_SO_GET_DESTS	(IP_VS_BASE_CTL+4)
+#define IP_VS_SO_GET_DEST	(IP_VS_BASE_CTL+5)	/* not used now */
+#define IP_VS_SO_GET_TIMEOUTS	(IP_VS_BASE_CTL+6)
+#define IP_VS_SO_GET_DAEMON	(IP_VS_BASE_CTL+7)
+#define IP_VS_SO_GET_MAX	IP_VS_SO_GET_DAEMON
+
+
+/*
+ *      IPVS Connection Flags
+ */
+#define IP_VS_CONN_F_FWD_MASK         0x0007    /* mask for the fwd methods */
+#define IP_VS_CONN_F_MASQ	      0x0000    /* masquerading */
+#define IP_VS_CONN_F_LOCALNODE	      0x0001    /* local node */
+#define IP_VS_CONN_F_TUNNEL	      0x0002    /* tunneling */
+#define IP_VS_CONN_F_DROUTE           0x0003    /* direct routing */
+#define IP_VS_CONN_F_BYPASS           0x0004    /* cache bypass */
+#define IP_VS_CONN_F_HASHED	      0x0040	/* hashed entry */
+#define IP_VS_CONN_F_NOOUTPUT         0x0080    /* no output packets */
+#define IP_VS_CONN_F_INACTIVE         0x0100    /* not established */
+#define IP_VS_CONN_F_OUT_SEQ          0x0200    /* must do output seq adjust */
+#define IP_VS_CONN_F_IN_SEQ           0x0400    /* must do input seq adjust */
+#define IP_VS_CONN_F_SEQ_MASK         0x0600    /* in/out sequence mask */
+#define IP_VS_CONN_F_NO_CPORT         0x0800    /* no client port set yet */
+
+/* Move it to better place one day, for now keep it unique */
+#define NFC_IPVS_PROPERTY	0x10000
+
+#define IP_VS_SCHEDNAME_MAXLEN         16
+#define IP_VS_IFNAME_MAXLEN            16
+
+struct ip_vs_rule_user {
+	/* global options */
+	int             tcp_timeout;    /* timeout values */
+	int             tcp_fin_timeout;
+	int             udp_timeout;
+	int             state;          /* sync daemon state */
+	char            mcast_ifn[IP_VS_IFNAME_MAXLEN];
+					/* multicast interface name */
+
+	/* virtual service options */
+	u_int16_t	protocol;
+	u_int32_t	vaddr;          /* virtual address */
+	u_int16_t	vport;
+	u_int32_t       vfwmark;        /* firwall mark of virtual service*/
+	char            sched_name[IP_VS_SCHEDNAME_MAXLEN];
+	unsigned	vs_flags;       /* virtual service flags */
+	unsigned        timeout;        /* persistent timeout in ticks */
+	u_int32_t	netmask;        /* persistent netmask */
+
+	/* destination specific options */
+	u_int32_t	daddr;          /* destination address */
+	u_int16_t	dport;
+	unsigned        conn_flags;     /* destination flags */
+	int             weight;         /* destination weight */
+};
+
+
+/*
+ *	IPVS statistics object (for user space)
+ */
+struct ip_vs_stats_user
+{
+	__u32                   conns;          /* connections scheduled */
+	__u32                   inpkts;         /* incoming packets */
+	__u32                   outpkts;        /* outgoing packets */
+	__u64                   inbytes;        /* incoming bytes */
+	__u64                   outbytes;       /* outgoing bytes */
+
+	__u32			cps;		/* current connection rate */
+	__u32			inpps;		/* current in packet rate */
+	__u32			outpps;		/* current out packet rate */
+	__u32			inbps;		/* current in byte rate */
+	__u32			outbps;		/* current out byte rate */
+};
+
+
+/* The argument to IP_VS_SO_GET_INFO */
+struct ip_vs_getinfo {
+	/* version number */
+	unsigned int	version;
+
+	/* size of connection hash table */
+	unsigned int	size;
+
+	/* number of virtual services */
+	unsigned int	num_services;
+};
+
+/* The argument to IP_VS_SO_GET_SERVICE */
+struct ip_vs_service_user {
+	/* which service: user fills this in */
+	u_int16_t	protocol;
+	u_int32_t	addr;           /* virtual address */
+	u_int16_t	port;
+	u_int32_t       fwmark;         /* firwall mark of virtual service */
+
+	/* service options */
+	char            sched_name[IP_VS_SCHEDNAME_MAXLEN];
+	unsigned	flags;          /* virtual service flags */
+	unsigned        timeout;        /* persistent timeout in ticks */
+	u_int32_t	netmask;        /* persistent netmask */
+
+	/* number of real servers */
+	unsigned int    num_dests;
+
+	/* statistics */
+	struct ip_vs_stats_user stats;
+};
+
+struct ip_vs_dest_user {
+	u_int32_t	addr;           /* destination address */
+	u_int16_t	port;
+	unsigned	flags;		/* destination flags */
+	int		weight;         /* destination weight */
+	u_int32_t	activeconns;	/* active connections */
+	u_int32_t	inactconns;	/* inactive connections */
+
+	/* statistics */
+	struct ip_vs_stats_user stats;
+};
+
+/* The argument to IP_VS_SO_GET_DESTS */
+struct ip_vs_get_dests {
+	/* which service: user fills this in */
+	u_int16_t	protocol;
+	u_int32_t	addr;           /* virtual address */
+	u_int16_t	port;
+	u_int32_t       fwmark;         /* firwall mark of virtual service */
+
+	/* number of real servers */
+	unsigned int    num_dests;
+
+	/* the real servers */
+	struct ip_vs_dest_user entrytable[0];
+};
+
+/* The argument to IP_VS_SO_GET_SERVICES */
+struct ip_vs_get_services {
+	/* number of virtual services */
+	unsigned int num_services;
+
+	/* service table */
+	struct ip_vs_service_user entrytable[0];
+};
+
+/* The argument to IP_VS_SO_GET_TIMEOUTS */
+struct ip_vs_timeout_user {
+	int             tcp_timeout;
+	int             tcp_fin_timeout;
+	int             udp_timeout;
+};
+
+/* The argument to IP_VS_SO_GET_DAEMON */
+struct ip_vs_daemon_user {
+	int	state;				/* sync daemon state */
+	char	mcast_ifn[IP_VS_IFNAME_MAXLEN];	/* multicast interface name */
+};
+
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/list.h>                 /* for struct list_head */
+#include <linux/spinlock.h>             /* for struct rwlock_t */
+#include <linux/skbuff.h>               /* for struct sk_buff */
+#include <linux/ip.h>                   /* for struct iphdr */
+#include <asm/atomic.h>                 /* for struct atomic_t */
+#include <linux/netdevice.h>		/* for struct neighbour; */
+#include <net/dst.h>			/* for struct dst_entry */
+#include <net/route.h>			/* for ip_route_output */
+#include <net/tcp.h>
+#include <net/udp.h>
+
+
+#ifdef CONFIG_IP_VS_DEBUG
+extern int ip_vs_get_debug_level(void);
+#define IP_VS_DBG(level, msg...)			\
+    do {						\
+	    if (level <= ip_vs_get_debug_level())	\
+		    printk(KERN_DEBUG "IPVS: " msg);	\
+    } while (0)
+#define IP_VS_DBG_RL(msg...)				\
+    do {						\
+	    if (net_ratelimit())			\
+		    printk(KERN_DEBUG "IPVS: " msg);	\
+    } while (0)
+#else	/* NO DEBUGGING at ALL */
+#define IP_VS_DBG(level, msg...)  do {} while (0)
+#define IP_VS_DBG_RL(msg...)  do {} while (0)
+#endif
+
+#define IP_VS_BUG() BUG()
+#define IP_VS_ERR(msg...) printk(KERN_ERR "IPVS: " msg)
+#define IP_VS_INFO(msg...) printk(KERN_INFO "IPVS: " msg)
+#define IP_VS_WARNING(msg...) \
+	printk(KERN_WARNING "IPVS: " msg)
+#define IP_VS_ERR_RL(msg...)				\
+    do {						\
+	    if (net_ratelimit())			\
+		    printk(KERN_ERR "IPVS: " msg);	\
+    } while (0)
+
+#ifdef CONFIG_IP_VS_DEBUG
+#define EnterFunction(level)						\
+    do {								\
+	    if (level <= ip_vs_get_debug_level())			\
+		    printk(KERN_DEBUG "Enter: %s, %s line %i\n",	\
+			   __FUNCTION__, __FILE__, __LINE__);		\
+    } while (0)
+#define LeaveFunction(level)                                            \
+    do {                                                                \
+	    if (level <= ip_vs_get_debug_level())                       \
+			printk(KERN_DEBUG "Leave: %s, %s line %i\n",    \
+			       __FUNCTION__, __FILE__, __LINE__);       \
+    } while (0)
+#else
+#define EnterFunction(level)   do {} while (0)
+#define LeaveFunction(level)   do {} while (0)
+#endif
+
+
+/*
+ *      The port number of FTP service (in network order).
+ */
+#define FTPPORT  __constant_htons(21)
+#define FTPDATA  __constant_htons(20)
+
+
+/*
+ *      IPVS sysctl variables under the /proc/sys/net/ipv4/vs/
+ */
+#define NET_IPV4_VS              21
+
+enum {
+	NET_IPV4_VS_DEBUG_LEVEL=1,
+	NET_IPV4_VS_AMEMTHRESH=2,
+	NET_IPV4_VS_AMDROPRATE=3,
+	NET_IPV4_VS_DROP_ENTRY=4,
+	NET_IPV4_VS_DROP_PACKET=5,
+	NET_IPV4_VS_SECURE_TCP=6,
+	NET_IPV4_VS_TO_ES=7,
+	NET_IPV4_VS_TO_SS=8,
+	NET_IPV4_VS_TO_SR=9,
+	NET_IPV4_VS_TO_FW=10,
+	NET_IPV4_VS_TO_TW=11,
+	NET_IPV4_VS_TO_CL=12,
+	NET_IPV4_VS_TO_CW=13,
+	NET_IPV4_VS_TO_LA=14,
+	NET_IPV4_VS_TO_LI=15,
+	NET_IPV4_VS_TO_SA=16,
+	NET_IPV4_VS_TO_UDP=17,
+	NET_IPV4_VS_TO_ICMP=18,
+	NET_IPV4_VS_LBLC_EXPIRE=19,
+	NET_IPV4_VS_LBLCR_EXPIRE=20,
+	NET_IPV4_VS_CACHE_BYPASS=22,
+	NET_IPV4_VS_EXPIRE_NODEST_CONN=23,
+	NET_IPV4_VS_SYNC_THRESHOLD=24,
+	NET_IPV4_VS_NAT_ICMP_SEND=25,
+	NET_IPV4_VS_LAST
+};
+
+
+/*
+ *      IPVS State Values
+ */
+enum {
+	IP_VS_S_NONE = 0,
+	IP_VS_S_ESTABLISHED,
+	IP_VS_S_SYN_SENT,
+	IP_VS_S_SYN_RECV,
+	IP_VS_S_FIN_WAIT,
+	IP_VS_S_TIME_WAIT,
+	IP_VS_S_CLOSE,
+	IP_VS_S_CLOSE_WAIT,
+	IP_VS_S_LAST_ACK,
+	IP_VS_S_LISTEN,
+	IP_VS_S_SYNACK,
+	IP_VS_S_UDP,
+	IP_VS_S_ICMP,
+	IP_VS_S_LAST
+};
+
+
+struct ip_vs_timeout_table {
+	atomic_t refcnt;
+	int scale;
+	int timeout[IP_VS_S_LAST+1];
+};
+
+
+/*
+ *	Transport protocol header
+ */
+union ip_vs_tphdr {
+	unsigned char *raw;
+	struct udphdr *uh;
+	struct tcphdr *th;
+	struct icmphdr *icmph;
+	__u16 *portp;
+};
+
+
+/*
+ *	Delta sequence info structure
+ *	Each ip_vs_conn has 2 (output AND input seq. changes).
+ *      Only used in the VS/NAT.
+ */
+struct ip_vs_seq {
+	__u32           init_seq;       /* Add delta from this seq */
+	__u32           delta;          /* Delta in sequence numbers */
+	__u32           previous_delta; /* Delta in sequence numbers
+					   before last resized pkt */
+};
+
+
+/*
+ *	IPVS statistics object
+ */
+struct ip_vs_stats
+{
+	__u32                   conns;          /* connections scheduled */
+	__u32                   inpkts;         /* incoming packets */
+	__u32                   outpkts;        /* outgoing packets */
+	__u64                   inbytes;        /* incoming bytes */
+	__u64                   outbytes;       /* outgoing bytes */
+
+	__u32			cps;		/* current connection rate */
+	__u32			inpps;		/* current in packet rate */
+	__u32			outpps;		/* current out packet rate */
+	__u32			inbps;		/* current in byte rate */
+	__u32			outbps;		/* current out byte rate */
+
+	spinlock_t              lock;           /* spin lock */
+};
+
+
+/*
+ *	IP_VS structure allocated for each dynamically scheduled connection
+ */
+struct ip_vs_conn {
+	struct list_head        c_list;         /* hashed list heads */
+
+	/* Protocol, addresses and port numbers */
+	__u32                   caddr;          /* client address */
+	__u32                   vaddr;          /* virtual address */
+	__u32                   daddr;          /* destination address */
+	__u16                   cport;
+	__u16                   vport;
+	__u16                   dport;
+	__u16                   protocol;       /* Which protocol (TCP/UDP) */
+
+	/* counter and timer */
+	atomic_t		refcnt;		/* reference count */
+	struct timer_list	timer;		/* Expiration timer */
+	volatile unsigned long	timeout;	/* timeout */
+	struct ip_vs_timeout_table *timeout_table;
+
+	/* Flags and state transition */
+	spinlock_t              lock;           /* lock for state transition */
+	volatile __u16          flags;          /* status flags */
+	volatile __u16          state;          /* state info */
+
+	/* Control members */
+	struct ip_vs_conn       *control;       /* Master control connection */
+	atomic_t                n_control;      /* Number of controlled ones */
+	struct ip_vs_dest       *dest;          /* real server */
+	atomic_t                in_pkts;        /* incoming packet counter */
+
+	/* packet transmitter for different forwarding methods */
+	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp);
+
+	/* Note: we can group the following members into a structure,
+	   in order to save more space, and the following members are
+	   only used in VS/NAT anyway */
+	struct ip_vs_app        *app;           /* bound ip_vs_app object */
+	void                    *app_data;      /* Application private data */
+	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
+	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
+};
+
+
+/*
+ *	The information about the virtual service offered to the net
+ *	and the forwarding entries
+ */
+struct ip_vs_service {
+	struct list_head	s_list;   /* for normal service table */
+	struct list_head	f_list;   /* for fwmark-based service table */
+	atomic_t		refcnt;   /* reference counter */
+	atomic_t		usecnt;   /* use counter */
+
+	__u16			protocol; /* which protocol (TCP/UDP) */
+	__u32			addr;	  /* IP address for virtual service */
+	__u16			port;	  /* port number for the service */
+	__u32                   fwmark;   /* firewall mark of the service */
+	unsigned		flags;	  /* service status flags */
+	unsigned		timeout;  /* persistent timeout in ticks */
+	__u32			netmask;  /* grouping granularity */
+
+	struct list_head	destinations;  /* real server d-linked list */
+	__u32			num_dests;     /* number of servers */
+	struct ip_vs_stats      stats;         /* statistics for the service */
+
+	/* for scheduling */
+	struct ip_vs_scheduler	*scheduler;    /* bound scheduler object */
+	rwlock_t		sched_lock;    /* lock sched_data */
+	void			*sched_data;   /* scheduler application data */
+};
+
+
+/*
+ *	The real server destination forwarding entry
+ *	with ip address, port number, and so on.
+ */
+struct ip_vs_dest {
+	struct list_head	n_list;   /* for the dests in the service */
+	struct list_head	d_list;   /* for table with all the dests */
+
+	__u32			addr;	  /* IP address of real server */
+	__u16			port;	  /* port number of the service */
+	unsigned		flags;	  /* dest status flags */
+	atomic_t		weight;	  /* server weight */
+	atomic_t		conn_flags;	/* flags to copy to conn */
+	atomic_t		activeconns;	/* active connections */
+	atomic_t		inactconns;     /* inactive connections */
+	atomic_t		refcnt;	        /* reference counter */
+	struct ip_vs_stats      stats;          /* statistics */
+
+	/* for destination cache */
+	spinlock_t		dst_lock;	/* lock dst_cache */
+	struct dst_entry	*dst_cache;	/* destination cache entry */
+	u32			dst_rtos;	/* RT_TOS(tos) for dst */
+
+	/* for virtual service */
+	struct ip_vs_service    *svc;     /* service that it belongs to */
+	__u16			protocol; /* which protocol (TCP/UDP) */
+	__u32			vaddr;	  /* IP address for virtual service */
+	__u16			vport;	  /* port number for the service */
+	__u32                   vfwmark;  /* firewall mark of the service */
+};
+
+
+/*
+ *	The scheduler object
+ */
+struct ip_vs_scheduler {
+	struct list_head        n_list;   /* d-linked list head */
+	char			*name;    /* scheduler name */
+	atomic_t                refcnt;   /* reference counter */
+	struct module		*module;  /* THIS_MODULE/NULL */
+
+	/* scheduler initializing service */
+	int (*init_service)(struct ip_vs_service *svc);
+	/* scheduling service finish */
+	int (*done_service)(struct ip_vs_service *svc);
+	/* scheduler updating service */
+	int (*update_service)(struct ip_vs_service *svc);
+
+	/* selecting a server from the given service */
+	struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
+				       struct iphdr *iph);
+};
+
+
+/*
+ *	The application module object
+ */
+struct ip_vs_app
+{
+	struct list_head        n_list;   /* d-linked list head */
+	char                    *name;    /* name of application module */
+	unsigned                type;     /* type = proto<<16 | port
+					     (host byte order)*/
+	struct module		*module;  /* THIS_MODULE/NULL */
+
+	/* ip_vs_app initializer */
+	int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
+	/* ip_vs_app finish */
+	int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *);
+	/* output hook */
+	int (*pkt_out)(struct ip_vs_app *,
+		       struct ip_vs_conn *, struct sk_buff *);
+	/* input hook */
+	int (*pkt_in)(struct ip_vs_app *,
+		      struct ip_vs_conn *, struct sk_buff *);
+};
+
+
+/*
+ *      IPVS core functions
+ *      (from ip_vs_core.c)
+ */
+extern const char *ip_vs_proto_name(unsigned proto);
+extern unsigned int check_for_ip_vs_out(struct sk_buff **skb_p,
+					int (*okfn)(struct sk_buff *));
+
+
+/*
+ *     ip_vs_conn handling functions
+ *     (from ip_vs_conn.c)
+ */
+
+/*
+ *     IPVS connection entry hash table
+ */
+#ifndef CONFIG_IP_VS_TAB_BITS
+#define CONFIG_IP_VS_TAB_BITS   12
+#endif
+/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */
+#if CONFIG_IP_VS_TAB_BITS < 8
+#define IP_VS_CONN_TAB_BITS	8
+#endif
+#if CONFIG_IP_VS_TAB_BITS > 20
+#define IP_VS_CONN_TAB_BITS	20
+#endif
+#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20
+#define IP_VS_CONN_TAB_BITS	CONFIG_IP_VS_TAB_BITS
+#endif
+#define IP_VS_CONN_TAB_SIZE     (1 << IP_VS_CONN_TAB_BITS)
+#define IP_VS_CONN_TAB_MASK     (IP_VS_CONN_TAB_SIZE - 1)
+
+#define VS_STATE_INPUT	        0
+#define VS_STATE_OUTPUT	        4
+#define VS_STATE_INPUT_ONLY	8
+
+extern struct ip_vs_timeout_table vs_timeout_table;
+extern struct ip_vs_timeout_table vs_timeout_table_dos;
+
+extern struct ip_vs_conn *ip_vs_conn_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
+extern struct ip_vs_conn *ip_vs_conn_out_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
+
+/* put back the conn without restarting its timer */
+static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+	atomic_dec(&cp->refcnt);
+}
+extern void ip_vs_conn_put(struct ip_vs_conn *cp);
+
+extern struct ip_vs_conn *
+ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport,
+	       __u32 daddr, __u16 dport, unsigned flags,
+	       struct ip_vs_dest *dest);
+extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
+
+extern const char * ip_vs_state_name(int state);
+extern int ip_vs_set_state(struct ip_vs_conn *cp, int state_off,
+			   struct iphdr *iph, void *tp);
+extern int ip_vs_conn_listen(struct ip_vs_conn *cp);
+extern int ip_vs_check_template(struct ip_vs_conn *ct);
+extern void ip_vs_secure_tcp_set(int on);
+extern void ip_vs_random_dropentry(void);
+extern int ip_vs_conn_init(void);
+extern void ip_vs_conn_cleanup(void);
+
+static inline void ip_vs_control_del(struct ip_vs_conn *cp)
+{
+	struct ip_vs_conn *ctl_cp = cp->control;
+	if (!ctl_cp) {
+		IP_VS_ERR("request control DEL for uncontrolled: "
+			  "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
+			  NIPQUAD(cp->caddr),ntohs(cp->cport),
+			  NIPQUAD(cp->vaddr),ntohs(cp->vport));
+		return;
+	}
+
+	IP_VS_DBG(7, "DELeting control for: "
+		  "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
+		  NIPQUAD(cp->caddr),ntohs(cp->cport),
+		  NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+
+	cp->control = NULL;
+	if (atomic_read(&ctl_cp->n_control) == 0) {
+		IP_VS_ERR("BUG control DEL with n=0 : "
+			  "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
+			  NIPQUAD(cp->caddr),ntohs(cp->cport),
+			  NIPQUAD(cp->vaddr),ntohs(cp->vport));
+		return;
+	}
+	atomic_dec(&ctl_cp->n_control);
+}
+
+static inline void
+ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
+{
+	if (cp->control) {
+		IP_VS_ERR("request control ADD for already controlled: "
+			  "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
+			  NIPQUAD(cp->caddr),ntohs(cp->cport),
+			  NIPQUAD(cp->vaddr),ntohs(cp->vport));
+		ip_vs_control_del(cp);
+	}
+
+	IP_VS_DBG(7, "ADDing control for: "
+		  "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
+		  NIPQUAD(cp->caddr),ntohs(cp->cport),
+		  NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+
+	cp->control = ctl_cp;
+	atomic_inc(&ctl_cp->n_control);
+}
+
+
+/*
+ *      IPVS application functions
+ *      (from ip_vs_app.c)
+ */
+#define IP_VS_APP_MAX_PORTS  8
+extern int register_ip_vs_app(struct ip_vs_app *mapp,
+			      unsigned short proto, __u16 port);
+extern int unregister_ip_vs_app(struct ip_vs_app *mapp);
+extern struct ip_vs_app * ip_vs_bind_app(struct ip_vs_conn *cp);
+extern int ip_vs_unbind_app(struct ip_vs_conn *cp);
+extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
+extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
+extern int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+			     char *o_buf, int o_len, char *n_buf, int n_len);
+extern int ip_vs_app_init(void);
+extern void ip_vs_app_cleanup(void);
+
+
+/*
+ *      Registering/unregistering scheduler functions
+ *      (from ip_vs_sched.c)
+ */
+extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
+extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
+extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
+				struct ip_vs_scheduler *scheduler);
+extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
+extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
+extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
+
+
+/*
+ *      IPVS control data and functions
+ *      (from ip_vs_ctl.c)
+ */
+extern int sysctl_ip_vs_cache_bypass;
+extern int sysctl_ip_vs_expire_nodest_conn;
+extern int sysctl_ip_vs_sync_threshold;
+extern int sysctl_ip_vs_nat_icmp_send;
+extern atomic_t ip_vs_dropentry;
+extern struct ip_vs_stats ip_vs_stats;
+
+extern struct ip_vs_service *ip_vs_service_get(__u32 fwmark,
+					       __u16 protocol,
+					       __u32 vaddr, __u16 vport);
+static inline void ip_vs_service_put(struct ip_vs_service *svc)
+{
+	atomic_dec(&svc->usecnt);
+}
+
+extern struct ip_vs_dest *
+ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport);
+extern void update_defense_level(void);
+extern void ip_vs_random_dropentry(void);
+extern int ip_vs_control_init(void);
+extern void ip_vs_control_cleanup(void);
+
+
+/*
+ *      IPVS sync daemon data and function prototypes
+ *      (from ip_vs_sync.c)
+ */
+extern volatile int ip_vs_sync_state;
+extern char ip_vs_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+extern int start_sync_thread(int state, char *mcast_ifn);
+extern int stop_sync_thread(void);
+extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+
+
+/*
+ *      IPVS rate estimator prototypes (from ip_vs_est.c)
+ */
+extern int ip_vs_new_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
+
+
+/*
+ *	This is a simple mechanism to ignore packets when
+ *	we are loaded. Just set ip_vs_drop_rate to 'n' and
+ *	we start to drop 1/rate of the packets
+ */
+extern int ip_vs_drop_rate;
+extern int ip_vs_drop_counter;
+
+static __inline__ int ip_vs_todrop(void)
+{
+	if (!ip_vs_drop_rate) return 0;
+	if (--ip_vs_drop_counter > 0) return 0;
+	ip_vs_drop_counter = ip_vs_drop_rate;
+	return 1;
+}
+
+
+/*
+ *      Slow timer functions for IPVS
+ *      (from ip_vs_timer.c)
+ */
+extern void add_sltimer(struct timer_list * timer);
+extern int  del_sltimer(struct timer_list * timer);
+extern void mod_sltimer(struct timer_list *timer, unsigned long expires);
+extern void ip_vs_sltimer_init(void);
+extern void ip_vs_sltimer_cleanup(void);
+
+
+/*
+ *      ip_vs_fwd_tag returns the forwarding tag of the connection
+ */
+#define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
+
+extern __inline__ char ip_vs_fwd_tag(struct ip_vs_conn *cp)
+{
+	char fwd;
+
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		fwd = 'M'; break;
+	case IP_VS_CONN_F_LOCALNODE:
+		fwd = 'L'; break;
+	case IP_VS_CONN_F_TUNNEL:
+		fwd = 'T'; break;
+	case IP_VS_CONN_F_DROUTE:
+		fwd = 'R'; break;
+	case IP_VS_CONN_F_BYPASS:
+		fwd = 'B'; break;
+	default:
+		fwd = '?'; break;
+	}
+	return fwd;
+}
+
+
+/*
+ *	transport layer header checking
+ */
+extern inline int ip_vs_header_check(struct sk_buff *skb, int proto, int ihl)
+{
+	int len;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		len = ihl + sizeof(struct tcphdr);
+		/* we don't care about TCP options */
+		break;
+	case IPPROTO_UDP:
+		len = ihl + sizeof(struct udphdr);
+		break;
+	default:
+		len = 0;
+	}
+
+	/* guarantee protocol header available in skb data area */
+	if (!pskb_may_pull(skb, len))
+		return -1;
+	else
+		return 0;
+}
+
+
+/*
+ *      Destination cache
+ */
+static inline void
+__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = dst;
+	dest->dst_rtos = rtos;
+	dst_release(old_dst);
+}
+
+static inline void
+__ip_vs_dst_reset(struct ip_vs_dest *dest)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = NULL;
+	dst_release(old_dst);
+}
+
+static inline struct dst_entry *
+__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
+{
+	struct dst_entry *dst = dest->dst_cache;
+
+	if (!dst)
+		return NULL;
+	if ((dst->obsolete || rtos != dest->dst_rtos) &&
+	    dst->ops->check(dst, cookie) == NULL) {
+		dest->dst_cache = 0;
+		return NULL;
+	}
+	dst_hold(dst);
+	return dst;
+}
+
+static inline struct rtable *
+__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		spin_lock(&dest->dst_lock);
+		if (!(rt = (struct rtable *)
+		      __ip_vs_dst_check(dest, rtos, 0))) {
+			if (ip_route_output(&rt, dest->addr, 0, rtos, 0)) {
+				spin_unlock(&dest->dst_lock);
+				IP_VS_DBG_RL("ip_route_output error, "
+					     "dest: %u.%u.%u.%u\n",
+					     NIPQUAD(dest->addr));
+				return NULL;
+			}
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
+				  NIPQUAD(dest->addr),
+				  atomic_read(&rt->u.dst.__refcnt), rtos);
+		}
+		spin_unlock(&dest->dst_lock);
+	} else {
+		if (ip_route_output(&rt, cp->daddr, 0, rtos, 0)) {
+			IP_VS_DBG_RL("ip_route_output error, dest: "
+				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+
+static inline u16 ip_vs_check_diff(u32 old, u32 new, u16 oldsum)
+{
+	u32 diff[2] = { old, new };
+
+	return csum_fold(csum_partial((char *) diff, sizeof(diff),
+				      oldsum ^ 0xFFFF));
+}
+
+static inline void ip_vs_fast_check_update(union ip_vs_tphdr *h,
+	u32 oldip, u32 newip, u16 oldport, u16 newport, u8 protocol)
+{
+	u16 *checkp;
+
+	if (protocol == IPPROTO_TCP)
+		checkp = &h->th->check;
+	else
+		checkp = &h->uh->check;
+	*checkp = ip_vs_check_diff(~oldip, newip,
+		ip_vs_check_diff(oldport ^ 0xFFFF, newport, *checkp));
+	if (!*checkp && protocol == IPPROTO_UDP)
+		*checkp = 0xFFFF;
+}
+
+static inline int
+ip_vs_skb_cow(struct sk_buff *skb, unsigned int headroom,
+	      struct iphdr **iph_p, unsigned char **t_p)
+{
+	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+
+	if (delta < 0)
+		delta = 0;
+
+	if (delta ||skb_cloned(skb)) {
+		if (pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* skb data changed, update pointers */
+		*iph_p = skb->nh.iph;
+		*t_p = (char*) (*iph_p) + (*iph_p)->ihl * 4;
+	}
+	return 0;
+}
+
+#endif /* __KERNEL__ */
+
+#endif	/* _IP_VS_H */
diff -urN linux-2.4.20/kernel/ksyms.c linux-2.4.20-ipvs-1.0.9/kernel/ksyms.c
--- linux-2.4.20/kernel/ksyms.c	Wed May 21 11:09:34 2003
+++ linux-2.4.20-ipvs-1.0.9/kernel/ksyms.c	Wed May 21 12:21:53 2003
@@ -127,6 +127,9 @@
 EXPORT_SYMBOL(kmap_prot);
 EXPORT_SYMBOL(kmap_pte);
 #endif
+EXPORT_SYMBOL(buffermem_pages);
+EXPORT_SYMBOL(nr_free_pages);
+EXPORT_SYMBOL(page_cache_size);
 
 /* filesystem internal functions */
 EXPORT_SYMBOL(def_blk_fops);
diff -urN linux-2.4.20/net/Makefile linux-2.4.20-ipvs-1.0.9/net/Makefile
--- linux-2.4.20/net/Makefile	Wed May 21 11:09:34 2003
+++ linux-2.4.20-ipvs-1.0.9/net/Makefile	Wed May 21 11:36:16 2003
@@ -46,6 +46,10 @@
 subdir-$(CONFIG_ECONET)		+= econet
 subdir-$(CONFIG_VLAN_8021Q)           += 8021q
 
+ifeq ($(CONFIG_NETFILTER),y)
+  mod-subdirs += ipv4/ipvs
+  subdir-$(CONFIG_IP_VS) += ipv4/ipvs
+endif
 
 obj-y	:= socket.o $(join $(subdir-y), $(patsubst %,/%.o,$(notdir $(subdir-y))))
 ifeq ($(CONFIG_NET),y)
diff -urN linux-2.4.20/net/ipv4/Config.in linux-2.4.20-ipvs-1.0.9/net/ipv4/Config.in
--- linux-2.4.20/net/ipv4/Config.in	Wed May 21 11:09:34 2003
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/Config.in	Wed May 21 11:36:16 2003
@@ -44,3 +44,6 @@
 if [ "$CONFIG_NETFILTER" != "n" ]; then
    source net/ipv4/netfilter/Config.in
 fi
+if [ "$CONFIG_NETFILTER" != "n" ]; then
+   source net/ipv4/ipvs/Config.in
+fi
diff -urN linux-2.4.20/net/ipv4/ipvs/Config.in linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/Config.in
--- linux-2.4.20/net/ipv4/ipvs/Config.in	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/Config.in	Wed May 21 00:43:47 2003
@@ -0,0 +1,26 @@
+#
+# IP VS configuration
+#
+mainmenu_option next_comment
+comment '  IP: Virtual Server Configuration'
+
+tristate 'virtual server support (EXPERIMENTAL)' CONFIG_IP_VS
+if [ "$CONFIG_IP_VS" != "n" ]; then
+  bool '  IP virtual server debugging' CONFIG_IP_VS_DEBUG
+  int '  IPVS connection table size (the Nth power of 2)' CONFIG_IP_VS_TAB_BITS 12
+  comment 'IPVS scheduler'
+  dep_tristate '  round-robin scheduling' CONFIG_IP_VS_RR $CONFIG_IP_VS
+  dep_tristate '  weighted round-robin scheduling' CONFIG_IP_VS_WRR $CONFIG_IP_VS
+  dep_tristate '  least-connection scheduling scheduling' CONFIG_IP_VS_LC $CONFIG_IP_VS
+  dep_tristate '  weighted least-connection scheduling' CONFIG_IP_VS_WLC $CONFIG_IP_VS
+  dep_tristate '  locality-based least-connection scheduling' CONFIG_IP_VS_LBLC $CONFIG_IP_VS
+  dep_tristate '  locality-based least-connection with replication scheduling' CONFIG_IP_VS_LBLCR $CONFIG_IP_VS
+  dep_tristate '  destination hashing scheduling' CONFIG_IP_VS_DH $CONFIG_IP_VS
+  dep_tristate '  source hashing scheduling' CONFIG_IP_VS_SH $CONFIG_IP_VS
+  dep_tristate '  shortest expected delay scheduling' CONFIG_IP_VS_SED $CONFIG_IP_VS
+  dep_tristate '  never queue scheduling' CONFIG_IP_VS_NQ $CONFIG_IP_VS
+  comment 'IPVS application helper'
+  dep_tristate '  FTP protocol helper' CONFIG_IP_VS_FTP $CONFIG_IP_VS
+fi
+
+endmenu
diff -urN linux-2.4.20/net/ipv4/ipvs/Makefile linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/Makefile
--- linux-2.4.20/net/ipv4/ipvs/Makefile	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/Makefile	Wed May 21 00:41:28 2003
@@ -0,0 +1,43 @@
+#
+# Makefile for the IPVS modules on top of IPv4.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET :=	ipvs.o
+
+export-objs :=	ip_vs_core.o ip_vs_app.o
+
+ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
+		ip_vs_timer.o ip_vs_app.o ip_vs_sync.o ip_vs_est.o
+
+ifeq ($(CONFIG_IP_VS),y)
+  obj-y := $(ip_vs-objs)
+else
+  ifeq ($(CONFIG_IP_VS),m)
+    obj-m := ip_vs.o
+  endif
+endif
+
+# IPVS schedulers
+obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
+obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
+obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
+obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
+obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
+obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
+obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
+obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+
+# IPVS application helpers
+obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
+
+include $(TOPDIR)/Rules.make
+
+ip_vs.o: $(ip_vs-objs)
+	$(LD) $(LD_RFLAG) -r -o $@ $(ip_vs-objs)
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_app.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_app.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_app.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_app.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,508 @@
+/*
+ * IPVS         Application module
+ *
+ * Version:     $Id: ip_vs_app.c,v 1.14 2001/11/23 14:34:10 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
+ * is that ip_vs_app module handles the reverse direction (incoming requests
+ * and outgoing responses). The ip_vs_app modules are only used for VS/NAT.
+ *
+ *		IP_MASQ_APP application masquerading module
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+#define IP_VS_APP_TAB_SIZE  16          /* must be power of 2 */
+
+#define IP_VS_APP_HASH(proto, port) ((port^proto) & (IP_VS_APP_TAB_SIZE-1))
+#define IP_VS_APP_TYPE(proto, port) (proto<<16 | port)
+#define IP_VS_APP_PORT(type)        (type & 0xffff)
+#define IP_VS_APP_PROTO(type)       ((type>>16) & 0x00ff)
+
+
+EXPORT_SYMBOL(register_ip_vs_app);
+EXPORT_SYMBOL(unregister_ip_vs_app);
+
+
+/*
+ *	will hold ipvs app. hashed list heads
+ */
+static struct list_head ip_vs_app_base[IP_VS_APP_TAB_SIZE];
+
+/* lock for ip_vs_app table */
+static rwlock_t __ip_vs_app_lock = RW_LOCK_UNLOCKED;
+
+
+/*
+ *	ip_vs_app registration routine
+ *	port: host byte order.
+ */
+int register_ip_vs_app(struct ip_vs_app *vapp,
+		       unsigned short proto, __u16 port)
+{
+	unsigned hash;
+
+	if (!vapp) {
+		IP_VS_ERR("register_ip_vs_app(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	MOD_INC_USE_COUNT;
+
+	vapp->type = IP_VS_APP_TYPE(proto, port);
+	hash = IP_VS_APP_HASH(proto, port);
+
+	write_lock_bh(&__ip_vs_app_lock);
+	list_add(&vapp->n_list, &ip_vs_app_base[hash]);
+	write_unlock_bh(&__ip_vs_app_lock);
+
+	return 0;
+}
+
+
+/*
+ *	ip_vs_app unregistration routine.
+ */
+int unregister_ip_vs_app(struct ip_vs_app *vapp)
+{
+	if (!vapp) {
+		IP_VS_ERR("unregister_ip_vs_app(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_app_lock);
+	list_del(&vapp->n_list);
+	write_unlock_bh(&__ip_vs_app_lock);
+
+	MOD_DEC_USE_COUNT;
+
+	return 0;
+}
+
+
+/*
+ *	get ip_vs_app object by its proto and port (net byte order).
+ */
+static struct ip_vs_app * ip_vs_app_get(unsigned short proto, __u16 port)
+{
+	struct list_head *e;
+	struct ip_vs_app *vapp;
+	unsigned hash;
+	unsigned type;
+
+	port = ntohs(port);
+	type = IP_VS_APP_TYPE(proto, port);
+	hash = IP_VS_APP_HASH(proto, port);
+
+	read_lock_bh(&__ip_vs_app_lock);
+
+	list_for_each(e, &ip_vs_app_base[hash]) {
+		vapp = list_entry(e, struct ip_vs_app, n_list);
+
+		/*
+		 * Test and MOD_INC_USE_COUNT atomically
+		 */
+		if (vapp->module && !try_inc_mod_count(vapp->module)) {
+			/*
+			 * This application module is just deleted
+			 */
+			continue;
+		}
+		if (type == vapp->type) {
+			read_unlock_bh(&__ip_vs_app_lock);
+			return vapp;
+		}
+
+		if (vapp->module)
+			__MOD_DEC_USE_COUNT(vapp->module);
+	}
+
+	read_unlock_bh(&__ip_vs_app_lock);
+	return NULL;
+}
+
+
+/*
+ *	Bind ip_vs_conn to its ip_vs_app based on proto and dport,
+ *	and call the ip_vs_app constructor.
+ */
+struct ip_vs_app * ip_vs_bind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *vapp;
+
+	/* no need to bind app if its forwarding method is not NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return NULL;
+
+	if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
+		return NULL;
+
+	/*
+	 *	don't allow binding if already bound
+	 */
+	if (cp->app != NULL) {
+		IP_VS_ERR("ip_vs_bind_app(): "
+			  "called for already bound object.\n");
+		return cp->app;
+	}
+
+	vapp = ip_vs_app_get(cp->protocol, cp->vport);
+
+	if (vapp != NULL) {
+		cp->app = vapp;
+
+		if (vapp->init_conn)
+			vapp->init_conn(vapp, cp);
+	}
+	return vapp;
+}
+
+
+/*
+ *	Unbind cp from type object and call cp destructor (does not kfree()).
+ */
+int ip_vs_unbind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *vapp = cp->app;
+
+	if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
+		return 0;
+
+	if (vapp != NULL) {
+		if (vapp->done_conn)
+			vapp->done_conn(vapp, cp);
+		cp->app = NULL;
+		if (vapp->module)
+			__MOD_DEC_USE_COUNT(vapp->module);
+	}
+	return (vapp != NULL);
+}
+
+
+/*
+ *	Fixes th->seq based on ip_vs_seq info.
+ */
+static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 seq = ntohl(th->seq);
+
+	/*
+	 *	Adjust seq with delta-offset for all packets after
+	 *	the most recent resized pkt seq and with previous_delta offset
+	 *	for all packets	before most recent resized pkt seq.
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		if(after(seq, vseq->init_seq)) {
+			th->seq = htonl(seq + vseq->delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
+				  vseq->delta);
+		} else {
+			th->seq = htonl(seq + vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
+				  "(%d) to seq\n", vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Fixes th->ack_seq based on ip_vs_seq info.
+ */
+static inline void
+vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 ack_seq = ntohl(th->ack_seq);
+
+	/*
+	 * Adjust ack_seq with delta-offset for
+	 * the packets AFTER most recent resized pkt has caused a shift
+	 * for packets before most recent resized pkt, use previous_delta
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		/* since ack_seq is the number of octet that is expected
+		   to receive next, so compare it with init_seq+delta */
+		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
+			th->ack_seq = htonl(ack_seq - vseq->delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
+				  "(%d) from ack_seq\n", vseq->delta);
+
+		} else {
+			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
+				  "previous_delta (%d) from ack_seq\n",
+				  vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Updates ip_vs_seq if pkt has been resized
+ *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
+				 unsigned flag, __u32 seq, int diff)
+{
+	/* spinlock is to keep updating cp->flags atomic */
+	spin_lock(&cp->lock);
+	if ( !(cp->flags & flag) || after(seq, vseq->init_seq)) {
+		vseq->previous_delta = vseq->delta;
+		vseq->delta += diff;
+		vseq->init_seq = seq;
+		cp->flags |= flag;
+	}
+	spin_unlock(&cp->lock);
+}
+
+
+/*
+ *	Output pkt hook. Will call bound ip_vs_app specific function
+ *	called by ip_vs_out(), assumes previously checked cp!=NULL
+ *	returns (new - old) skb->len diff.
+ */
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *vapp;
+	int diff;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	__u32 seq;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((vapp = cp->app) == NULL)
+		return 0;
+
+	iph = skb->nh.iph;
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->protocol == IPPROTO_TCP) {
+		if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+			vs_fix_seq(&cp->out_seq, th);
+		if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+			vs_fix_ack_seq(&cp->in_seq, th);
+	}
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (vapp->pkt_out == NULL)
+		return 0;
+
+	diff = vapp->pkt_out(vapp, cp, skb);
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0 && cp->protocol == IPPROTO_TCP)
+		vs_seq_update(cp, &cp->out_seq,
+			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
+
+	return diff;
+}
+
+
+/*
+ *	Input pkt hook. Will call bound ip_vs_app specific function
+ *	called by ip_fw_demasquerade(), assumes previously checked cp!=NULL.
+ *	returns (new - old) skb->len diff.
+ */
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *vapp;
+	int diff;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	__u32 seq;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((vapp = cp->app) == NULL)
+		return 0;
+
+	iph = skb->nh.iph;
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->protocol == IPPROTO_TCP) {
+		if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+			vs_fix_seq(&cp->in_seq, th);
+		if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+			vs_fix_ack_seq(&cp->out_seq, th);
+	}
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (vapp->pkt_in == NULL)
+		return 0;
+
+	diff = vapp->pkt_in(vapp, cp, skb);
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0 && cp->protocol == IPPROTO_TCP)
+		vs_seq_update(cp, &cp->in_seq,
+			      IP_VS_CONN_F_IN_SEQ, seq, diff);
+
+	return diff;
+}
+
+
+/*
+ *	/proc/net/ip_vs_app entry function
+ */
+static int ip_vs_app_getinfo(char *buffer, char **start, off_t offset,
+			     int length)
+{
+	off_t pos=0;
+	int len=0;
+	char temp[64];
+	int idx;
+	struct ip_vs_app *vapp;
+	struct list_head *e;
+
+	pos = 64;
+	if (pos > offset) {
+		len += sprintf(buffer+len, "%-63s\n",
+			       "prot port    usecnt name");
+	}
+
+	read_lock_bh(&__ip_vs_app_lock);
+	for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
+		list_for_each (e, &ip_vs_app_base[idx]) {
+			vapp = list_entry(e, struct ip_vs_app, n_list);
+
+			pos += 64;
+			if (pos <= offset)
+				continue;
+			sprintf(temp, "%-3s  %-7u %-6d %-17s",
+				ip_vs_proto_name(IP_VS_APP_PROTO(vapp->type)),
+				IP_VS_APP_PORT(vapp->type),
+				vapp->module?GET_USE_COUNT(vapp->module):0,
+				vapp->name);
+			len += sprintf(buffer+len, "%-63s\n", temp);
+			if (pos >= offset+length)
+				goto done;
+		}
+	}
+  done:
+	read_unlock_bh(&__ip_vs_app_lock);
+
+	*start = buffer+len-(pos-offset);       /* Start of wanted data */
+	len = pos-offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+
+
+/*
+ *	Replace a segment of data with a new segment
+ */
+int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+		      char *o_buf, int o_len, char *n_buf, int n_len)
+{
+	struct iphdr *iph;
+	int diff;
+	int o_offset;
+	int o_left;
+
+	EnterFunction(9);
+
+	diff = n_len - o_len;
+	o_offset = o_buf - (char *)skb->data;
+	/* The length of left data after o_buf+o_len in the skb data */
+	o_left = skb->len - (o_offset + o_len);
+
+	if (diff <= 0) {
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+		skb_trim(skb, skb->len + diff);
+	} else if (diff <= skb_tailroom(skb)) {
+		skb_put(skb, diff);
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+	} else {
+		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
+			return -ENOMEM;
+		skb_put(skb, diff);
+		memmove(skb->data + o_offset + n_len,
+			skb->data + o_offset + o_len, o_left);
+		memcpy(skb->data + o_offset, n_buf, n_len);
+	}
+
+	/* must update the iph total length here */
+	iph = skb->nh.iph;
+	iph->tot_len = htons(skb->len);
+
+	LeaveFunction(9);
+	return 0;
+}
+
+
+int ip_vs_app_init(void)
+{
+	int idx;
+
+	for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
+		INIT_LIST_HEAD(&ip_vs_app_base[idx]);
+	}
+
+	/* we will replace it with proc_net_ipvs_create() soon */
+	proc_net_create("ip_vs_app", 0, ip_vs_app_getinfo);
+	return 0;
+}
+
+void ip_vs_app_cleanup(void)
+{
+	proc_net_remove("ip_vs_app");
+}
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_conn.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_conn.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_conn.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,1565 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_conn.c,v 1.28.2.2 2003/04/11 14:02:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others. Many code here is taken from IP MASQ code of kernel 2.2.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/vmalloc.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <linux/in.h>
+#include <linux/proc_fs.h>              /* for proc_net_* */
+#include <asm/softirq.h>                /* for local_bh_* */
+#include <net/ip.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>                  /* for ip_route_output */
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *  Connection hash table: for input and output packets lookups of IPVS
+ */
+static struct list_head *ip_vs_conn_tab;
+
+/*  SLAB cache for IPVS connections */
+static kmem_cache_t *ip_vs_conn_cachep;
+
+/*  counter for current IPVS connections */
+static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
+
+/*
+ *  No client port connection counter
+ */
+static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
+
+
+/*
+ *  Fine locking granularity for big connection hash table
+ */
+#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
+#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
+
+struct ip_vs_aligned_lock
+{
+	rwlock_t	l;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+/* lock array for conn table */
+struct ip_vs_aligned_lock
+__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
+
+static inline void ct_read_lock(unsigned key)
+{
+	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock(unsigned key)
+{
+	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock(unsigned key)
+{
+	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock(unsigned key)
+{
+	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_lock_bh(unsigned key)
+{
+	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock_bh(unsigned key)
+{
+	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock_bh(unsigned key)
+{
+	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock_bh(unsigned key)
+{
+	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+
+/*
+ *	Returns hash value for IPVS connection entry
+ */
+static inline unsigned
+ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port)
+{
+	unsigned addrh = ntohl(addr);
+
+	return (proto^addrh^(addrh>>IP_VS_CONN_TAB_BITS)^ntohs(port))
+		& IP_VS_CONN_TAB_MASK;
+}
+
+
+/*
+ *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *	returns bool success.
+ */
+static int ip_vs_conn_hash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+
+	if (cp->flags & IP_VS_CONN_F_HASHED) {
+		IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/* Hash by protocol, client address and port */
+	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+
+	ct_write_lock(hash);
+
+	list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+	cp->flags |= IP_VS_CONN_F_HASHED;
+	atomic_inc(&cp->refcnt);
+
+	ct_write_unlock(hash);
+
+	return 1;
+}
+
+
+/*
+ *	UNhashes ip_vs_conn from ip_vs_conn_tab.
+ *	returns bool success.
+ */
+static int ip_vs_conn_unhash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+
+	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
+		IP_VS_ERR("ip_vs_conn_unhash(): request for unhash flagged, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/* unhash it and decrease its reference counter */
+	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+	ct_write_lock(hash);
+
+	list_del(&cp->c_list);
+	cp->flags &= ~IP_VS_CONN_F_HASHED;
+	atomic_dec(&cp->refcnt);
+
+	ct_write_unlock(hash);
+
+	return 1;
+}
+
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from OUTside-to-INside.
+ *	s_addr, s_port: pkt source address (foreign host)
+ *	d_addr, d_port: pkt dest address (load balancer)
+ */
+static inline struct ip_vs_conn *__ip_vs_conn_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+	struct list_head *l,*e;
+
+	hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+	l = &ip_vs_conn_tab[hash];
+
+	ct_read_lock(hash);
+
+	for (e=l->next; e!=l; e=e->next) {
+		cp = list_entry(e, struct ip_vs_conn, c_list);
+		if (s_addr==cp->caddr && s_port==cp->cport &&
+		    d_port==cp->vport && d_addr==cp->vaddr &&
+		    protocol==cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ct_read_unlock(hash);
+			return cp;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	return NULL;
+}
+
+struct ip_vs_conn *ip_vs_conn_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	struct ip_vs_conn *cp;
+
+	cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
+	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
+		cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
+
+	IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+		  ip_vs_proto_name(protocol),
+		  NIPQUAD(s_addr), ntohs(s_port),
+		  NIPQUAD(d_addr), ntohs(d_port),
+		  cp?"hit":"not hit");
+
+	return cp;
+}
+
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from inside-to-OUTside.
+ *	s_addr, s_port: pkt source address (inside host)
+ *	d_addr, d_port: pkt dest address (foreign host)
+ */
+struct ip_vs_conn *ip_vs_conn_out_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp, *ret=NULL;
+	struct list_head *l,*e;
+
+	/*
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
+	l = &ip_vs_conn_tab[hash];
+
+	ct_read_lock(hash);
+
+	for (e=l->next; e!=l; e=e->next) {
+		cp = list_entry(e, struct ip_vs_conn, c_list);
+		if (d_addr == cp->caddr && d_port == cp->cport &&
+		    s_port == cp->dport && s_addr == cp->daddr &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ret = cp;
+			break;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+		  ip_vs_proto_name(protocol),
+		  NIPQUAD(s_addr), ntohs(s_port),
+		  NIPQUAD(d_addr), ntohs(d_port),
+		  ret?"hit":"not hit");
+
+	return ret;
+}
+
+
+/*
+ *      Put back the conn and restart its timer with its timeout
+ */
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+	/* reset it expire in its timeout */
+	mod_sltimer(&cp->timer, jiffies+cp->timeout);
+
+	__ip_vs_conn_put(cp);
+}
+
+
+/*
+ *	Timeout table[state]
+ */
+struct ip_vs_timeout_table vs_timeout_table = {
+	ATOMIC_INIT(0),	/* refcnt */
+	0,		/* scale  */
+	{
+		[IP_VS_S_NONE]          =	30*60*HZ,
+		[IP_VS_S_ESTABLISHED]	=	15*60*HZ,
+		[IP_VS_S_SYN_SENT]	=	2*60*HZ,
+		[IP_VS_S_SYN_RECV]	=	1*60*HZ,
+		[IP_VS_S_FIN_WAIT]	=	2*60*HZ,
+		[IP_VS_S_TIME_WAIT]	=	2*60*HZ,
+		[IP_VS_S_CLOSE]         =	10*HZ,
+		[IP_VS_S_CLOSE_WAIT]	=	60*HZ,
+		[IP_VS_S_LAST_ACK]	=	30*HZ,
+		[IP_VS_S_LISTEN]	=	2*60*HZ,
+		[IP_VS_S_SYNACK]	=	120*HZ,
+		[IP_VS_S_UDP]		=	5*60*HZ,
+		[IP_VS_S_ICMP]          =	1*60*HZ,
+		[IP_VS_S_LAST]          =	2*HZ,
+	},	/* timeout */
+};
+
+
+struct ip_vs_timeout_table vs_timeout_table_dos = {
+	ATOMIC_INIT(0),	/* refcnt */
+	0,		/* scale  */
+	{
+		[IP_VS_S_NONE]          =	15*60*HZ,
+		[IP_VS_S_ESTABLISHED]	=	8*60*HZ,
+		[IP_VS_S_SYN_SENT]	=	60*HZ,
+		[IP_VS_S_SYN_RECV]	=	10*HZ,
+		[IP_VS_S_FIN_WAIT]	=	60*HZ,
+		[IP_VS_S_TIME_WAIT]	=	60*HZ,
+		[IP_VS_S_CLOSE]         =	10*HZ,
+		[IP_VS_S_CLOSE_WAIT]	=	60*HZ,
+		[IP_VS_S_LAST_ACK]	=	30*HZ,
+		[IP_VS_S_LISTEN]	=	2*60*HZ,
+		[IP_VS_S_SYNACK]	=	100*HZ,
+		[IP_VS_S_UDP]		=	3*60*HZ,
+		[IP_VS_S_ICMP]          =	1*60*HZ,
+		[IP_VS_S_LAST]          =	2*HZ,
+	},	/* timeout */
+};
+
+
+/*
+ *	Timeout table to use for the VS entries
+ *	If NULL we use the default table (vs_timeout_table).
+ *	Under flood attack we switch to vs_timeout_table_dos
+ */
+
+static struct ip_vs_timeout_table *ip_vs_timeout_table = &vs_timeout_table;
+
+static const char * state_name_table[IP_VS_S_LAST+1] = {
+	[IP_VS_S_NONE]          =	"NONE",
+	[IP_VS_S_ESTABLISHED]	=	"ESTABLISHED",
+	[IP_VS_S_SYN_SENT]	=	"SYN_SENT",
+	[IP_VS_S_SYN_RECV]	=	"SYN_RECV",
+	[IP_VS_S_FIN_WAIT]	=	"FIN_WAIT",
+	[IP_VS_S_TIME_WAIT]	=	"TIME_WAIT",
+	[IP_VS_S_CLOSE]         =	"CLOSE",
+	[IP_VS_S_CLOSE_WAIT]	=	"CLOSE_WAIT",
+	[IP_VS_S_LAST_ACK]	=	"LAST_ACK",
+	[IP_VS_S_LISTEN]	=	"LISTEN",
+	[IP_VS_S_SYNACK]	=	"SYNACK",
+	[IP_VS_S_UDP]		=	"UDP",
+	[IP_VS_S_ICMP]          =	"ICMP",
+	[IP_VS_S_LAST]          =	"BUG!",
+};
+
+#define sNO IP_VS_S_NONE
+#define sES IP_VS_S_ESTABLISHED
+#define sSS IP_VS_S_SYN_SENT
+#define sSR IP_VS_S_SYN_RECV
+#define sFW IP_VS_S_FIN_WAIT
+#define sTW IP_VS_S_TIME_WAIT
+#define sCL IP_VS_S_CLOSE
+#define sCW IP_VS_S_CLOSE_WAIT
+#define sLA IP_VS_S_LAST_ACK
+#define sLI IP_VS_S_LISTEN
+#define sSA IP_VS_S_SYNACK
+
+struct vs_tcp_states_t {
+	int next_state[IP_VS_S_LAST];	/* should be _LAST_TCP */
+};
+
+const char * ip_vs_state_name(int state)
+{
+	if (state >= IP_VS_S_LAST)
+		return "ERR!";
+	return state_name_table[state] ? state_name_table[state] : "?";
+}
+
+static struct vs_tcp_states_t vs_tcp_states [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct vs_tcp_states_t vs_tcp_states_dos [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
+/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct vs_tcp_states_t *ip_vs_state_table = vs_tcp_states;
+
+void ip_vs_secure_tcp_set(int on)
+{
+	if (on) {
+		ip_vs_state_table = vs_tcp_states_dos;
+		ip_vs_timeout_table = &vs_timeout_table_dos;
+	} else {
+		ip_vs_state_table = vs_tcp_states;
+		ip_vs_timeout_table = &vs_timeout_table;
+	}
+}
+
+
+static inline int vs_tcp_state_idx(struct tcphdr *th, int state_off)
+{
+	/*
+	 *	[0-3]: input states, [4-7]: output, [8-11] input only states.
+	 */
+	if (th->rst)
+		return state_off+3;
+	if (th->syn)
+		return state_off+0;
+	if (th->fin)
+		return state_off+1;
+	if (th->ack)
+		return state_off+2;
+	return -1;
+}
+
+
+static inline int vs_set_state_timeout(struct ip_vs_conn *cp, int state)
+{
+	struct ip_vs_timeout_table *vstim = cp->timeout_table;
+
+	/*
+	 *	Use default timeout table if no specific for this entry
+	 */
+	if (!vstim)
+		vstim = &vs_timeout_table;
+
+	cp->timeout = vstim->timeout[cp->state=state];
+
+	if (vstim->scale) {
+		int scale = vstim->scale;
+
+		if (scale<0)
+			cp->timeout >>= -scale;
+		else if (scale > 0)
+			cp->timeout <<= scale;
+	}
+
+	return state;
+}
+
+
+static inline int
+vs_tcp_state(struct ip_vs_conn *cp, int state_off, struct tcphdr *th)
+{
+	int state_idx;
+	int new_state = IP_VS_S_CLOSE;
+
+	/*
+	 *    Update state offset to INPUT_ONLY if necessary
+	 *    or delete NO_OUTPUT flag if output packet detected
+	 */
+	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+		if (state_off == VS_STATE_OUTPUT)
+			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+		else
+			state_off = VS_STATE_INPUT_ONLY;
+	}
+
+	if ((state_idx = vs_tcp_state_idx(th, state_off)) < 0) {
+		IP_VS_DBG(8, "vs_tcp_state_idx(%d)=%d!!!\n",
+			  state_off, state_idx);
+		goto tcp_state_out;
+	}
+
+	new_state = ip_vs_state_table[state_idx].next_state[cp->state];
+
+  tcp_state_out:
+	if (new_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
+			  "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
+			  ip_vs_proto_name(cp->protocol),
+			  (state_off==VS_STATE_OUTPUT)?"output ":"input ",
+			  th->syn? 'S' : '.',
+			  th->fin? 'F' : '.',
+			  th->ack? 'A' : '.',
+			  th->rst? 'R' : '.',
+			  NIPQUAD(cp->daddr), ntohs(cp->dport),
+			  NIPQUAD(cp->caddr), ntohs(cp->cport),
+			  ip_vs_state_name(cp->state),
+			  ip_vs_state_name(new_state),
+			  atomic_read(&cp->refcnt));
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			    (new_state != IP_VS_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   (new_state == IP_VS_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+	}
+
+	return vs_set_state_timeout(cp, new_state);
+}
+
+
+/*
+ *	Handle state transitions
+ */
+int ip_vs_set_state(struct ip_vs_conn *cp,
+		    int state_off, struct iphdr *iph, void *tp)
+{
+	int ret;
+
+	spin_lock(&cp->lock);
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		ret = vs_tcp_state(cp, state_off, tp);
+		break;
+	case IPPROTO_UDP:
+		ret = vs_set_state_timeout(cp, IP_VS_S_UDP);
+		break;
+	case IPPROTO_ICMP:
+		ret = vs_set_state_timeout(cp, IP_VS_S_ICMP);
+		break;
+	default:
+		ret = -1;
+	}
+	spin_unlock(&cp->lock);
+
+	return ret;
+}
+
+
+/*
+ *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
+ */
+int ip_vs_conn_listen(struct ip_vs_conn *cp)
+{
+	vs_set_state_timeout(cp, IP_VS_S_LISTEN);
+	return cp->timeout;
+}
+
+
+/*
+ *      Bypass transmitter
+ *      Let packets bypass the destination when the destination is not
+ *      available, it may be only used in transparent cache cluster.
+ */
+static int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = skb->nh.iph;
+	u8     tos = iph->tos;
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(tos), 0)) {
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
+			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
+		goto tx_error_icmp;
+	}
+
+	/* MTU checking */
+	mtu = rt->u.dst.pmtu;
+	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+#if 0
+	if (skb_is_nonlinear(skb) && skb->len <= mtu)
+#endif
+		ip_send_check(iph);
+
+	if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) {
+		if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) {
+			ip_rt_put(rt);
+			IP_VS_ERR_RL("ip_vs_bypass_xmit(): no memory\n");
+			goto tx_error;
+		}
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+#endif /* CONFIG_NETFILTER_DEBUG */
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+	ip_send(skb);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
+
+/*
+ *      NULL transmitter (do nothing except return NF_ACCEPT)
+ */
+static int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+	return NF_ACCEPT;
+}
+
+
+/*
+ *      NAT transmitter (only for outside-to-inside nat forwarding)
+ */
+static int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+	struct rtable *rt;		/* Route to the other host */
+	struct iphdr  *iph;
+	union ip_vs_tphdr h;
+	int ihl;
+	unsigned short size;
+	int mtu;
+
+	EnterFunction(10);
+
+	/*
+	 * If it has ip_vs_app helper, the helper may change the payload,
+	 * so it needs full checksum checking and checksum calculation.
+	 * If not, only the header (such as IP address and port number)
+	 * will be changed, so it is fast to do incremental checksum update,
+	 * and let the destination host  do final checksum checking.
+	 */
+
+	if (cp->app && skb_is_nonlinear(skb)
+	    && skb_linearize(skb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
+	iph = skb->nh.iph;
+	ihl = iph->ihl << 2;
+	h.raw = (char*) iph + ihl;
+	size = ntohs(iph->tot_len) - ihl;
+
+	/* do TCP/UDP checksum checking if it has application helper */
+	if (cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
+		switch (skb->ip_summed) {
+		case CHECKSUM_NONE:
+			skb->csum = csum_partial(h.raw, size, 0);
+
+		case CHECKSUM_HW:
+			if (csum_tcpudp_magic(iph->saddr, iph->daddr, size,
+					      iph->protocol, skb->csum)) {
+				IP_VS_DBG_RL("Incoming failed %s checksum "
+					     "from %d.%d.%d.%d (size=%d)!\n",
+					     ip_vs_proto_name(iph->protocol),
+					     NIPQUAD(iph->saddr),
+					     size);
+				goto tx_error;
+			}
+			break;
+		default:
+			/* CHECKSUM_UNNECESSARY */
+			break;
+		}
+	}
+
+	/*
+	 *  Check if it is no_cport connection ...
+	 */
+	if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
+		atomic_dec(&ip_vs_conn_no_cport_cnt);
+		ip_vs_conn_unhash(cp);
+		cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
+		cp->cport = h.portp[0];
+		/* hash on new dport */
+		ip_vs_conn_hash(cp);
+
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(cp->dport));
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = rt->u.dst.pmtu;
+	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_nat_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* copy-on-write the packet before mangling it */
+	if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len, &iph, &h.raw))
+		return NF_DROP;
+
+	/* mangle the packet */
+	iph->daddr = cp->daddr;
+	h.portp[1] = cp->dport;
+
+	/*
+	 *	Attempt ip_vs_app call.
+	 *	will fix ip_vs_conn and iph ack_seq stuff
+	 */
+	if (ip_vs_app_pkt_in(cp, skb) != 0) {
+		/* skb data has probably changed, update pointers */
+		iph = skb->nh.iph;
+		h.raw = (char*) iph + ihl;
+		size = skb->len - ihl;
+	}
+
+	/*
+	 *	Adjust TCP/UDP checksums
+	 */
+	if (!cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		ip_vs_fast_check_update(&h, cp->vaddr, cp->daddr,
+					cp->vport, cp->dport, iph->protocol);
+		if (skb->ip_summed == CHECKSUM_HW)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		switch (iph->protocol) {
+		case IPPROTO_TCP:
+			h.th->check = 0;
+			h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+							size, iph->protocol,
+							csum_partial(h.raw, size, 0));
+			break;
+		case IPPROTO_UDP:
+			h.uh->check = 0;
+			h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+							size, iph->protocol,
+							csum_partial(h.raw, size, 0));
+			if (h.uh->check == 0)
+				h.uh->check = 0xFFFF;
+			break;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	ip_send_check(iph);
+
+	IP_VS_DBG(10, "NAT to %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr), ntohs(h.portp[1]));
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+#endif /* CONFIG_NETFILTER_DEBUG */
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+	ip_send(skb);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
+
+/*
+ *   IP Tunneling transmitter
+ *
+ *   This function encapsulates the packet in a new IP packet, its
+ *   destination will be set to cp->daddr. Most code of this function
+ *   is taken from ipip.c.
+ *
+ *   It is used in VS/TUN cluster. The load balancer selects a real
+ *   server from a cluster based on a scheduling algorithm,
+ *   encapsulates the request packet and forwards it to the selected
+ *   server. For example, all real servers are configured with
+ *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
+ *   the encapsulated packet, it will decapsulate the packet, processe
+ *   the request and return the response packets directly to the client
+ *   without passing the load balancer. This can greatly increase the
+ *   scalability of virtual server.
+ */
+static int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct net_device *tdev;		/* Device to other host */
+	struct iphdr  *old_iph = skb->nh.iph;
+	u8     tos = old_iph->tos;
+	u16    df = old_iph->frag_off;
+	struct iphdr  *iph;			/* Our new IP header */
+	int    max_headroom;			/* The extra header space needed */
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (skb->protocol != __constant_htons(ETH_P_IP)) {
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
+			     "ETH_P_IP: %d, skb protocol: %d\n",
+			     __constant_htons(ETH_P_IP), skb->protocol);
+		goto tx_error;
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
+		goto tx_error_icmp;
+
+	tdev = rt->u.dst.dev;
+
+	mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+	if (mtu < 68) {
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
+		goto tx_error;
+	}
+	if (skb->dst && mtu < skb->dst->pmtu)
+		skb->dst->pmtu = mtu;
+
+	df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+	if ((old_iph->frag_off&__constant_htons(IP_DF))
+	    && mtu < ntohs(old_iph->tot_len)) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+#if 0
+	if (skb_is_nonlinear(skb))
+#endif
+		ip_send_check(old_iph);
+
+	skb->h.raw = skb->nh.raw;
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
+
+	if (skb_headroom(skb) < max_headroom
+	    || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb =
+			skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+			kfree_skb(skb);
+			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
+			return -EINVAL;
+		}
+		kfree_skb(skb);
+		skb = new_skb;
+	}
+
+	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	skb->nh.iph;
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_IPIP;
+	iph->tos		=	tos;
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+	iph->ttl		=	old_iph->ttl;
+	iph->tot_len		=	htons(skb->len);
+	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_send_check(iph);
+
+	skb->ip_summed = CHECKSUM_NONE;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+#endif /* CONFIG_NETFILTER_DEBUG */
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+	ip_send(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
+
+/*
+ *      Direct Routing transmitter
+ */
+static int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = skb->nh.iph;
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = rt->u.dst.pmtu;
+	if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+#if 0
+	if (skb_is_nonlinear(skb) && skb->len <= mtu)
+#endif
+		ip_send_check(iph);
+
+	if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) {
+		if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) {
+			ip_rt_put(rt);
+			IP_VS_ERR_RL("ip_vs_dr_xmit(): no memory\n");
+			goto tx_error;
+		}
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+#endif /* CONFIG_NETFILTER_DEBUG */
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+	ip_send(skb);
+
+#if 0000
+	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		do_ip_send);
+#endif
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
+
+/*
+ *  Bind a connection entry with the corresponding packet_xmit.
+ *  Called by ip_vs_conn_new.
+ */
+static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		cp->packet_xmit = ip_vs_tunnel_xmit;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit;
+		break;
+	}
+}
+
+
+/*
+ *  Bind a connection entry with a virtual service destination
+ *  Called just after a new connection entry is created.
+ */
+static inline void
+ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
+{
+	/* if dest is NULL, then return directly */
+	if (!dest)
+		return;
+
+	/*
+	 *    Increase the refcnt counter of the dest.
+	 */
+	atomic_inc(&dest->refcnt);
+
+	/* Bind with the destination and its corresponding transmitter */
+	cp->flags |= atomic_read(&dest->conn_flags);
+	cp->dest = dest;
+
+	IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%s flg:%X cnt:%d destcnt:%d\n",
+		  ip_vs_proto_name(cp->protocol),
+		  NIPQUAD(cp->caddr), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
+		  cp->flags, atomic_read(&cp->refcnt),
+		  atomic_read(&dest->refcnt));
+}
+
+
+/*
+ *  Unbind a connection entry with its VS destination
+ *  Called by the ip_vs_conn_expire function.
+ */
+static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d "
+			  "v:%u.%u.%u.%u:%d d:%u.%u.%u.%u:%d fwd:%c "
+			  "s:%s flg:%X cnt:%d destcnt:%d",
+			  ip_vs_proto_name(cp->protocol),
+			  NIPQUAD(cp->caddr), ntohs(cp->cport),
+			  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+			  NIPQUAD(cp->daddr), ntohs(cp->dport),
+			  ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
+			  cp->flags, atomic_read(&cp->refcnt),
+			  atomic_read(&dest->refcnt));
+
+		/*
+		 * Decrease the inactconns or activeconns counter
+		 * if it is not a connection template ((cp->cport!=0)
+		 *   || (cp->flags & IP_VS_CONN_F_NO_CPORT)).
+		 */
+		if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+			if (cp->flags & IP_VS_CONN_F_INACTIVE) {
+				atomic_dec(&dest->inactconns);
+			} else {
+				atomic_dec(&dest->activeconns);
+			}
+		}
+
+		/*
+		 * Simply decrease the refcnt of the dest, because the
+		 * dest will be either in service's destination list
+		 * or in the trash.
+		 */
+		atomic_dec(&dest->refcnt);
+	}
+}
+
+
+/*
+ *  Checking if the destination of a connection template is available.
+ *  If available, return 1, otherwise invalidate this connection
+ *  template and return 0.
+ */
+int ip_vs_check_template(struct ip_vs_conn *ct)
+{
+	struct ip_vs_dest *dest = ct->dest;
+
+	/*
+	 * Checking the dest server status.
+	 */
+	if ((dest == NULL) ||
+	    !(dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		IP_VS_DBG(9, "check_template: dest not available for "
+			  "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+			  "-> d:%u.%u.%u.%u:%d\n",
+			  ip_vs_proto_name(ct->protocol),
+			  NIPQUAD(ct->caddr), ntohs(ct->cport),
+			  NIPQUAD(ct->vaddr), ntohs(ct->vport),
+			  NIPQUAD(ct->daddr), ntohs(ct->dport));
+
+		/*
+		 * Invalidate the connection template
+		 */
+		ip_vs_conn_unhash(ct);
+		ct->dport = 65535;
+		ct->vport = 65535;
+		ct->cport = 0;
+		ip_vs_conn_hash(ct);
+
+		/*
+		 * Simply decrease the refcnt of the template,
+		 * don't restart its timer.
+		 */
+		atomic_dec(&ct->refcnt);
+		return 0;
+	}
+	return 1;
+}
+
+
+static inline void
+ip_vs_timeout_attach(struct ip_vs_conn *cp, struct ip_vs_timeout_table *vstim)
+{
+	atomic_inc(&vstim->refcnt);
+	cp->timeout_table = vstim;
+}
+
+static inline void ip_vs_timeout_detach(struct ip_vs_conn *cp)
+{
+	struct ip_vs_timeout_table *vstim = cp->timeout_table;
+
+	if (!vstim)
+		return;
+	cp->timeout_table = NULL;
+	atomic_dec(&vstim->refcnt);
+}
+
+
+static void ip_vs_conn_expire(unsigned long data)
+{
+	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+
+	if (cp->timeout_table)
+		cp->timeout = cp->timeout_table->timeout[IP_VS_S_TIME_WAIT];
+	else
+		cp->timeout = vs_timeout_table.timeout[IP_VS_S_TIME_WAIT];
+
+	/*
+	 *	hey, I'm using it
+	 */
+	atomic_inc(&cp->refcnt);
+
+	/*
+	 *	do I control anybody?
+	 */
+	if (atomic_read(&cp->n_control))
+		goto expire_later;
+
+	/*
+	 *	unhash it if it is hashed in the conn table
+	 */
+	ip_vs_conn_unhash(cp);
+
+	/*
+	 *	refcnt==1 implies I'm the only one referrer
+	 */
+	if (likely(atomic_read(&cp->refcnt) == 1)) {
+		/* make sure that there is no timer on it now */
+		if (timer_pending(&cp->timer))
+			del_sltimer(&cp->timer);
+
+		/* does anybody control me? */
+		if (cp->control)
+			ip_vs_control_del(cp);
+
+		ip_vs_unbind_dest(cp);
+		ip_vs_unbind_app(cp);
+		ip_vs_timeout_detach(cp);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+		atomic_dec(&ip_vs_conn_count);
+
+		kmem_cache_free(ip_vs_conn_cachep, cp);
+		return;
+	}
+
+	/* hash it back to the table */
+	ip_vs_conn_hash(cp);
+
+  expire_later:
+	IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n",
+		  atomic_read(&cp->refcnt)-1,
+		  atomic_read(&cp->n_control));
+
+	ip_vs_conn_put(cp);
+}
+
+
+void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
+{
+	cp->timeout = 0;
+	mod_sltimer(&cp->timer, jiffies);
+	__ip_vs_conn_put(cp);
+}
+
+/*
+ *  Create a new connection entry and hash it into the ip_vs_conn_tab.
+ */
+struct ip_vs_conn *
+ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport,
+	       __u32 daddr, __u16 dport, unsigned flags,
+	       struct ip_vs_dest *dest)
+{
+	struct ip_vs_conn *cp;
+
+	cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	if (cp == NULL) {
+		IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
+		return NULL;
+	}
+
+	memset(cp, 0, sizeof(*cp));
+	INIT_LIST_HEAD(&cp->c_list);
+	init_timer(&cp->timer);
+	cp->timer.data     = (unsigned long)cp;
+	cp->timer.function = ip_vs_conn_expire;
+	ip_vs_timeout_attach(cp, ip_vs_timeout_table);
+	cp->protocol	   = proto;
+	cp->caddr	   = caddr;
+	cp->cport	   = cport;
+	cp->vaddr	   = vaddr;
+	cp->vport	   = vport;
+	cp->daddr          = daddr;
+	cp->dport          = dport;
+	cp->flags	   = flags;
+	cp->app_data	   = NULL;
+	cp->control	   = NULL;
+	cp->lock           = SPIN_LOCK_UNLOCKED;
+
+	atomic_set(&cp->n_control, 0);
+	atomic_set(&cp->in_pkts, 0);
+
+	atomic_inc(&ip_vs_conn_count);
+	if (flags & IP_VS_CONN_F_NO_CPORT)
+		atomic_inc(&ip_vs_conn_no_cport_cnt);
+
+	/* Bind its application helper (only for VS/NAT) if any */
+	ip_vs_bind_app(cp);
+
+	/* Bind the connection with a destination server */
+	ip_vs_bind_dest(cp, dest);
+
+	/* Set its state and timeout */
+	vs_set_state_timeout(cp, IP_VS_S_NONE);
+
+	/* Bind its packet transmitter */
+	ip_vs_bind_xmit(cp);
+
+	/*
+	 * Set the entry is referenced by the current thread before hashing
+	 * it in the table, so that other thread run ip_vs_random_dropentry
+	 * but cannot drop this entry.
+	 */
+	atomic_set(&cp->refcnt, 1);
+
+	/* Hash it in the ip_vs_conn_tab finally */
+	ip_vs_conn_hash(cp);
+
+	return cp;
+}
+
+
+/*
+ *	/proc/net/ip_vs_conn entries
+ */
+static int
+ip_vs_conn_getinfo(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos=0;
+	int idx, len=0;
+	char temp[70];
+	struct ip_vs_conn *cp;
+	struct list_head *l, *e;
+
+	pos = 128;
+	if (pos > offset) {
+		len += sprintf(buffer+len, "%-127s\n",
+			       "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires");
+	}
+
+	for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		/*
+		 *	Lock is actually only need in next loop
+		 *	we are called from uspace: must stop bh.
+		 */
+		ct_read_lock_bh(idx);
+
+		l = &ip_vs_conn_tab[idx];
+		for (e=l->next; e!=l; e=e->next) {
+			cp = list_entry(e, struct ip_vs_conn, c_list);
+			pos += 128;
+			if (pos <= offset)
+				continue;
+			sprintf(temp,
+				"%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr), ntohs(cp->cport),
+				ntohl(cp->vaddr), ntohs(cp->vport),
+				ntohl(cp->daddr), ntohs(cp->dport),
+				ip_vs_state_name(cp->state),
+				(cp->timer.expires-jiffies)/HZ);
+			len += sprintf(buffer+len, "%-127s\n", temp);
+			if (pos >= offset+length) {
+				ct_read_unlock_bh(idx);
+				goto done;
+			}
+		}
+		ct_read_unlock_bh(idx);
+	}
+
+  done:
+	*start = buffer+len-(pos-offset);       /* Start of wanted data */
+	len = pos-offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+
+
+/*
+ *      Randomly drop connection entries before running out of memory
+ */
+static inline int todrop_entry(struct ip_vs_conn *cp)
+{
+	/*
+	 * The drop rate array needs tuning for real environments.
+	 * Called from timer bh only => no locking
+	 */
+	static char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+	static char todrop_counter[9] = {0};
+	int i;
+
+	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
+	   This will leave enough time for normal connection to get
+	   through. */
+	if (cp->timeout+jiffies-cp->timer.expires < 60*HZ)
+		return 0;
+
+	/* Don't drop the entry if its number of incoming packets is not
+	   located in [0, 8] */
+	i = atomic_read(&cp->in_pkts);
+	if (i > 8 || i < 0) return 0;
+
+	if (!todrop_rate[i]) return 0;
+	if (--todrop_counter[i] > 0) return 0;
+
+	todrop_counter[i] = todrop_rate[i];
+	return 1;
+}
+
+
+void ip_vs_random_dropentry(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+	struct list_head *l,*e;
+	struct ip_vs_conn *ct;
+
+	/*
+	 * Randomly scan 1/32 of the whole table every second
+	 */
+	for (idx=0; idx<(IP_VS_CONN_TAB_SIZE>>5); idx++) {
+		unsigned hash = net_random()&IP_VS_CONN_TAB_MASK;
+
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock(hash);
+
+		l = &ip_vs_conn_tab[hash];
+		for (e=l->next; e!=l; e=e->next) {
+			cp = list_entry(e, struct ip_vs_conn, c_list);
+			if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
+				/* connection template */
+				continue;
+			switch(cp->state) {
+			case IP_VS_S_SYN_RECV:
+			case IP_VS_S_SYNACK:
+				break;
+
+			case IP_VS_S_ESTABLISHED:
+			case IP_VS_S_UDP:
+				if (todrop_entry(cp))
+					break;
+				continue;
+
+			default:
+				continue;
+			}
+
+			/*
+			 * Drop the entry, and drop its ct if not referenced
+			 */
+			atomic_inc(&cp->refcnt);
+			ct_write_unlock(hash);
+
+			if ((ct = cp->control))
+				atomic_inc(&ct->refcnt);
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (ct) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(ct);
+			}
+			ct_write_lock(hash);
+		}
+		ct_write_unlock(hash);
+	}
+}
+
+
+/*
+ *      Flush all the connection entries in the ip_vs_conn_tab
+ */
+static void ip_vs_conn_flush(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+	struct list_head *l,*e;
+	struct ip_vs_conn *ct;
+
+  flush_again:
+	for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock_bh(idx);
+
+		l = &ip_vs_conn_tab[idx];
+		for (e=l->next; e!=l; e=e->next) {
+			cp = list_entry(e, struct ip_vs_conn, c_list);
+			atomic_inc(&cp->refcnt);
+			ct_write_unlock(idx);
+
+			if ((ct = cp->control))
+				atomic_inc(&ct->refcnt);
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (ct) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(ct);
+			}
+			ct_write_lock(idx);
+		}
+		ct_write_unlock_bh(idx);
+	}
+
+	/* the counter may be not NULL, because maybe some conn entries
+	   are run by slow timer handler or unhashed but still referred */
+	if (atomic_read(&ip_vs_conn_count) != 0) {
+		schedule();
+		goto flush_again;
+	}
+}
+
+
+int ip_vs_conn_init(void)
+{
+	int idx;
+
+	/*
+	 * Allocate the connection hash table and initialize its list heads
+	 */
+	ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+	if (!ip_vs_conn_tab)
+		return -ENOMEM;
+
+	/* Allocate ip_vs_conn slab cache */
+	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+					      sizeof(struct ip_vs_conn), 0,
+					      SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!ip_vs_conn_cachep) {
+		vfree(ip_vs_conn_tab);
+		return -ENOMEM;
+	}
+
+	IP_VS_INFO("Connection hash table configured "
+		   "(size=%d, memory=%ldKbytes)\n",
+		   IP_VS_CONN_TAB_SIZE,
+		   (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+	IP_VS_DBG(0, "Each connection entry needs %d bytes at least\n",
+		  sizeof(struct ip_vs_conn));
+
+	for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
+	}
+
+	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
+		__ip_vs_conntbl_lock_array[idx].l = RW_LOCK_UNLOCKED;
+	}
+
+	proc_net_create("ip_vs_conn", 0, ip_vs_conn_getinfo);
+
+	return 0;
+}
+
+void ip_vs_conn_cleanup(void)
+{
+	/* flush all the connection entries first */
+	ip_vs_conn_flush();
+
+	/* Release the empty cache */
+	kmem_cache_destroy(ip_vs_conn_cachep);
+	proc_net_remove("ip_vs_conn");
+	vfree(ip_vs_conn_tab);
+}
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_core.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_core.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_core.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_core.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,1290 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_core.c,v 1.31.2.2 2003/04/11 14:02:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+EXPORT_SYMBOL(register_ip_vs_scheduler);
+EXPORT_SYMBOL(unregister_ip_vs_scheduler);
+EXPORT_SYMBOL(ip_vs_skb_replace);
+EXPORT_SYMBOL(ip_vs_proto_name);
+EXPORT_SYMBOL(ip_vs_conn_new);
+EXPORT_SYMBOL(ip_vs_conn_in_get);
+EXPORT_SYMBOL(ip_vs_conn_out_get);
+EXPORT_SYMBOL(ip_vs_conn_listen);
+EXPORT_SYMBOL(ip_vs_conn_put);
+#ifdef CONFIG_IP_VS_DEBUG
+EXPORT_SYMBOL(ip_vs_get_debug_level);
+#endif
+EXPORT_SYMBOL(check_for_ip_vs_out);
+
+
+/* ID used in ICMP lookups */
+#define icmp_id(icmph)          ((icmph->un).echo.id)
+
+const char *ip_vs_proto_name(unsigned proto)
+{
+	static char buf[20];
+
+	switch (proto) {
+	case IPPROTO_IP:
+		return "IP";
+	case IPPROTO_UDP:
+		return "UDP";
+	case IPPROTO_TCP:
+		return "TCP";
+	case IPPROTO_ICMP:
+		return "ICMP";
+	default:
+		sprintf(buf, "IP_%d", proto);
+		return buf;
+	}
+}
+
+
+static inline void
+ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.inpkts++;
+		dest->stats.inbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.inpkts++;
+		dest->svc->stats.inbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.inpkts++;
+		ip_vs_stats.inbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.outpkts++;
+		dest->stats.outbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.outpkts++;
+		dest->svc->stats.outbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.outpkts++;
+		ip_vs_stats.outbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+{
+	spin_lock(&cp->dest->stats.lock);
+	cp->dest->stats.conns++;
+	spin_unlock(&cp->dest->stats.lock);
+
+	spin_lock(&svc->stats.lock);
+	svc->stats.conns++;
+	spin_unlock(&svc->stats.lock);
+
+	spin_lock(&ip_vs_stats.lock);
+	ip_vs_stats.conns++;
+	spin_unlock(&ip_vs_stats.lock);
+}
+
+/*
+ *  IPVS persistent scheduling function
+ *  It creates a connection entry according to its template if exists,
+ *  or selects a server and creates a connection entry plus a template.
+ *  Locking: we are svc user (svc->refcnt), so we hold all dests too
+ */
+static struct ip_vs_conn *
+ip_vs_sched_persist(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_dest *dest;
+	const __u16 *portp;
+	struct ip_vs_conn *ct;
+	__u16  dport;	 /* destination port to forward */
+	__u32  snet;	 /* source network of the client, after masking */
+
+	portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+
+	/* Mask saddr with the netmask to adjust template granularity */
+	snet = iph->saddr & svc->netmask;
+
+	IP_VS_DBG(6, "P-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
+		  "mnet %u.%u.%u.%u\n",
+		  NIPQUAD(iph->saddr), ntohs(portp[0]),
+		  NIPQUAD(iph->daddr), ntohs(portp[1]),
+		  NIPQUAD(snet));
+
+	/*
+	 * As far as we know, FTP is a very complicated network protocol, and
+	 * it uses control connection and data connections. For active FTP,
+	 * FTP server initialize data connection to the client, its source port
+	 * is often 20. For passive FTP, FTP server tells the clients the port
+	 * that it passively listens to,  and the client issues the data
+	 * connection. In the tunneling or direct routing mode, the load
+	 * balancer is on the client-to-server half of connection, the port
+	 * number is unknown to the load balancer. So, a conn template like
+	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
+	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
+	 * is created for other persistent services.
+	 */
+	if (portp[1] == svc->port) {
+		/* Check if a template already exists */
+		if (svc->port != FTPPORT)
+			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+					       iph->daddr, portp[1]);
+		else
+			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+					       iph->daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * No template found or the dest of the connection
+			 * template is not available.
+			 */
+			dest = svc->scheduler->schedule(svc, iph);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "P-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template like <protocol,caddr,0,
+			 * vaddr,vport,daddr,dport> for non-ftp service,
+			 * and <protocol,caddr,0,vaddr,0,daddr,0>
+			 * for ftp service.
+			 */
+			if (svc->port != FTPPORT)
+				ct = ip_vs_conn_new(iph->protocol,
+						    snet, 0,
+						    iph->daddr, portp[1],
+						    dest->addr, dest->port,
+						    0,
+						    dest);
+			else
+				ct = ip_vs_conn_new(iph->protocol,
+						    snet, 0,
+						    iph->daddr, 0,
+						    dest->addr, 0,
+						    0,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = dest->port;
+	} else {
+		/*
+		 * Note: persistent fwmark-based services and persistent
+		 * port zero service are handled here.
+		 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
+		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
+		 */
+		if (svc->fwmark)
+			ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+					       htonl(svc->fwmark), 0);
+		else
+			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+					       iph->daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * If it is not persistent port zero, return NULL,
+			 * otherwise create a connection template.
+			 */
+			if (svc->port)
+				return NULL;
+
+			dest = svc->scheduler->schedule(svc, iph);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "P-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template according to the service
+			 */
+			if (svc->fwmark)
+				ct = ip_vs_conn_new(IPPROTO_IP,
+						    snet, 0,
+						    htonl(svc->fwmark), 0,
+						    dest->addr, 0,
+						    0,
+						    dest);
+			else
+				ct = ip_vs_conn_new(iph->protocol,
+						    snet, 0,
+						    iph->daddr, 0,
+						    dest->addr, 0,
+						    0,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = portp[1];
+	}
+
+	/*
+	 *    Create a new connection according to the template
+	 */
+	cp = ip_vs_conn_new(iph->protocol,
+			    iph->saddr, portp[0],
+			    iph->daddr, portp[1],
+			    dest->addr, dport,
+			    0,
+			    dest);
+	if (cp == NULL) {
+		ip_vs_conn_put(ct);
+		return NULL;
+	}
+
+	/*
+	 *    Increase the inactive connection counter
+	 *    because it is in Syn-Received
+	 *    state (inactive) when the connection is created.
+	 */
+	atomic_inc(&dest->inactconns);
+
+	/*
+	 *    Add its control
+	 */
+	ip_vs_control_add(cp, ct);
+
+	ip_vs_conn_put(ct);
+	return cp;
+}
+
+
+/*
+ *  IPVS main scheduling function
+ *  It selects a server according to the virtual service, and
+ *  creates a connection entry.
+ */
+static struct ip_vs_conn *
+ip_vs_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_dest *dest;
+	const __u16 *portp;
+
+	/*
+	 *    Persistent service
+	 */
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+		return ip_vs_sched_persist(svc, iph);
+
+	/*
+	 *    Non-persistent service
+	 */
+	portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	if (!svc->fwmark && portp[1] != svc->port) {
+		if (!svc->port)
+			IP_VS_ERR("Schedule: port zero only supported "
+				  "in persistent services, "
+				  "check your ipvs configuration\n");
+		return NULL;
+	}
+
+	dest = svc->scheduler->schedule(svc, iph);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "Schedule: no dest found.\n");
+		return NULL;
+	}
+
+	/*
+	 *    Create a connection entry.
+	 */
+	cp = ip_vs_conn_new(iph->protocol,
+			    iph->saddr, portp[0],
+			    iph->daddr, portp[1],
+			    dest->addr, dest->port?dest->port:portp[1],
+			    0,
+			    dest);
+	if (cp == NULL)
+		return NULL;
+
+	/*
+	 *    Increase the inactive connection counter because it is in
+	 *    Syn-Received state (inactive) when the connection is created.
+	 */
+	atomic_inc(&dest->inactconns);
+
+	IP_VS_DBG(6, "Schedule fwd:%c s:%s c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
+		  "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+		  ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
+		  NIPQUAD(cp->caddr), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  cp->flags, atomic_read(&cp->refcnt));
+
+	return cp;
+}
+
+
+/*
+ *  Pass or drop the packet.
+ *  Called by ip_vs_in, when the virtual service is available but
+ *  no destination is available for a new connection.
+ */
+static int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb)
+{
+	struct iphdr *iph = skb->nh.iph;
+	__u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+
+	/* if it is fwmark-based service, the cache_bypass sysctl is up
+	   and the destination is RTN_UNICAST (and not local), then create
+	   a cache_bypass connection entry */
+	if (sysctl_ip_vs_cache_bypass && svc->fwmark
+	    && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
+		int ret;
+		struct ip_vs_conn *cp;
+
+		ip_vs_service_put(svc);
+
+		/* create a new connection entry */
+		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
+		cp = ip_vs_conn_new(iph->protocol,
+				    iph->saddr, portp[0],
+				    iph->daddr, portp[1],
+				    0, 0,
+				    IP_VS_CONN_F_BYPASS,
+				    NULL);
+		if (cp == NULL) {
+			kfree_skb(skb);
+			return NF_STOLEN;
+		}
+
+		/* statistics */
+		ip_vs_in_stats(cp, skb);
+
+		/* set state */
+		ip_vs_set_state(cp, VS_STATE_INPUT, iph, portp);
+
+		/* transmit the first SYN packet */
+		ret = cp->packet_xmit(skb, cp);
+
+		atomic_inc(&cp->in_pkts);
+		ip_vs_conn_put(cp);
+		return ret;
+	}
+
+	/*
+	 * When the virtual ftp service is presented, packets destined
+	 * for other services on the VIP may get here (except services
+	 * listed in the ipvs table), pass the packets, because it is
+	 * not ipvs job to decide to drop the packets.
+	 */
+	if ((svc->port == FTPPORT) && (portp[1] != FTPPORT)) {
+		ip_vs_service_put(svc);
+		return NF_ACCEPT;
+	}
+
+	ip_vs_service_put(svc);
+
+	/*
+	 * Notify the client that the destination is unreachable, and
+	 * release the socket buffer.
+	 * Since it is in IP layer, the TCP socket is not actually
+	 * created, the TCP RST packet cannot be sent, instead that
+	 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
+	 */
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
+
+/*
+ *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING
+ *      chain, and is used for VS/NAT.
+ *      It detects packets for VS/NAT connections and sends the packets
+ *      immediately. This can avoid that iptable_nat mangles the packets
+ *      for VS/NAT.
+ */
+static unsigned int ip_vs_post_routing(unsigned int hooknum,
+				       struct sk_buff **skb_p,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff  *skb = *skb_p;
+
+	if (!(skb->nfcache & NFC_IPVS_PROPERTY))
+		return NF_ACCEPT;
+
+	/* The packet was sent from IPVS, exit this chain */
+	(*okfn)(skb);
+
+	return NF_STOLEN;
+}
+
+
+/*
+ *	Handle ICMP messages in the inside-to-outside direction (outgoing).
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to the right destination host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ *      (Only used in VS/NAT)
+ */
+static int ip_vs_out_icmp(struct sk_buff **skb_p)
+{
+	struct sk_buff	*skb   = *skb_p;
+	struct iphdr	*iph;
+	struct icmphdr	*icmph;
+	struct iphdr	*ciph;	/* The ip header contained within the ICMP */
+	__u16		*pptr;	/* port numbers from TCP/UDP contained header */
+	unsigned short	ihl;
+	unsigned short	len;
+	unsigned short	clen, csize;
+	struct ip_vs_conn *cp;
+
+	/* reassemble IP fragments, but will it happen in ICMP packets?? */
+	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+		skb = ip_defrag(skb);
+		if (!skb)
+			return NF_STOLEN;
+		*skb_p = skb;
+	}
+
+	if (skb_is_nonlinear(skb)) {
+		if (skb_linearize(skb, GFP_ATOMIC) != 0)
+			return NF_DROP;
+		ip_send_check(skb->nh.iph);
+	}
+
+	iph = skb->nh.iph;
+	ihl = iph->ihl << 2;
+	icmph = (struct icmphdr *)((char *)iph + ihl);
+	len   = ntohs(iph->tot_len) - ihl;
+	if (len < sizeof(struct icmphdr))
+		return NF_DROP;
+
+	IP_VS_DBG(12, "outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+		  icmph->type, ntohs(icmp_id(icmph)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((icmph->type != ICMP_DEST_UNREACH) &&
+	    (icmph->type != ICMP_SOURCE_QUENCH) &&
+	    (icmph->type != ICMP_TIME_EXCEEDED))
+		return NF_ACCEPT;
+
+	/* Now find the contained IP header */
+	clen = len - sizeof(struct icmphdr);
+	if (clen < sizeof(struct iphdr))
+		return NF_DROP;
+	ciph = (struct iphdr *) (icmph + 1);
+	csize = ciph->ihl << 2;
+	if (clen < csize)
+		return NF_DROP;
+
+	/* We are only interested ICMPs generated from TCP or UDP packets */
+	if (ciph->protocol != IPPROTO_UDP && ciph->protocol != IPPROTO_TCP)
+		return NF_ACCEPT;
+
+	/* Skip non-first embedded TCP/UDP fragments */
+	if (ciph->frag_off & __constant_htons(IP_OFFSET))
+		return NF_ACCEPT;
+
+	/* We need at least TCP/UDP ports here */
+	if (clen < csize + sizeof(struct udphdr))
+		return NF_DROP;
+
+	/*
+	 * Find the ports involved - this packet was
+	 * incoming so the ports are right way round
+	 * (but reversed relative to outer IP header!)
+	 */
+	pptr = (__u16 *)&(((char *)ciph)[csize]);
+
+	/* Ensure the checksum is correct */
+	if (ip_compute_csum((unsigned char *) icmph, len)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1, "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+			  NIPQUAD(iph->saddr));
+		return NF_DROP;
+	}
+
+	IP_VS_DBG(11, "Handling outgoing ICMP for "
+		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
+		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
+		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));
+
+	/* ciph content is actually <protocol, caddr, cport, daddr, dport> */
+	cp = ip_vs_conn_out_get(ciph->protocol, ciph->daddr, pptr[1],
+				ciph->saddr, pptr[0]);
+	if (!cp)
+		return NF_ACCEPT;
+
+	if (IP_VS_FWD_METHOD(cp) != 0) {
+		IP_VS_ERR("shouldn't reach here, because the box is on the"
+			  "half connection in the tun/dr module.\n");
+	}
+
+	/* Now we do real damage to this packet...! */
+	/* First change the source IP address, and recalc checksum */
+	iph->saddr = cp->vaddr;
+	ip_send_check(iph);
+
+	/* Now change the *dest* address in the contained IP */
+	ciph->daddr = cp->vaddr;
+	ip_send_check(ciph);
+
+	/* the TCP/UDP dest port - cannot redo check */
+	pptr[1] = cp->vport;
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+	ip_vs_conn_put(cp);
+
+	IP_VS_DBG(11, "Forwarding correct outgoing ICMP to "
+		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
+		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
+		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));
+
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+
+	return NF_ACCEPT;
+}
+
+
+/*
+ *	It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
+ *	Check if outgoing packet belongs to the established ip_vs_conn,
+ *      rewrite addresses of the packet and send it on its way...
+ */
+static unsigned int ip_vs_out(unsigned int hooknum,
+			      struct sk_buff **skb_p,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff  *skb = *skb_p;
+	struct iphdr	*iph;
+	union ip_vs_tphdr h;
+	struct ip_vs_conn *cp;
+	int size;
+	int ihl;
+
+	EnterFunction(11);
+
+	if (skb->nfcache & NFC_IPVS_PROPERTY)
+		return NF_ACCEPT;
+
+	iph = skb->nh.iph;
+	if (iph->protocol == IPPROTO_ICMP)
+		return ip_vs_out_icmp(skb_p);
+
+	/* let it go if other IP protocols */
+	if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
+		return NF_ACCEPT;
+
+	/* reassemble IP fragments */
+	if (iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+		skb = ip_defrag(skb);
+		if (!skb)
+			return NF_STOLEN;
+		iph = skb->nh.iph;
+		*skb_p = skb;
+	}
+
+	/* make sure that protocol header available in skb data area,
+	   note that skb data area may be reallocated. */
+	ihl = iph->ihl << 2;
+	if (ip_vs_header_check(skb, iph->protocol, ihl) == -1)
+		return NF_DROP;
+
+	iph = skb->nh.iph;
+	h.raw = (char*) iph + ihl;
+
+	/*
+	 *	Check if the packet belongs to an old entry
+	 */
+	cp = ip_vs_conn_out_get(iph->protocol, iph->saddr, h.portp[0],
+				iph->daddr, h.portp[1]);
+	if (!cp) {
+		if (sysctl_ip_vs_nat_icmp_send &&
+		    ip_vs_lookup_real_service(iph->protocol,
+					      iph->saddr, h.portp[0])) {
+			/*
+			 * Notify the real server: there is no existing
+			 * entry if it is not RST packet or not TCP packet.
+			 */
+			if (!h.th->rst || iph->protocol != IPPROTO_TCP) {
+				icmp_send(skb, ICMP_DEST_UNREACH,
+					  ICMP_PORT_UNREACH, 0);
+				kfree_skb(skb);
+				return NF_STOLEN;
+			}
+		}
+		IP_VS_DBG(12, "packet for %s %d.%d.%d.%d:%d "
+			  "continue traversal as normal.\n",
+			  ip_vs_proto_name(iph->protocol),
+			  NIPQUAD(iph->daddr),
+			  ntohs(h.portp[1]));
+		if (skb_is_nonlinear(skb))
+			ip_send_check(iph);
+		return NF_ACCEPT;
+	}
+
+	/*
+	 * If it has ip_vs_app helper, the helper may change the payload,
+	 * so it needs full checksum checking and checksum calculation.
+	 * If not, only the header (addr/port) is changed, so it is fast
+	 * to do incremental checksum update, and let the destination host
+	 * do final checksum checking.
+	 */
+
+	if (cp->app && skb_is_nonlinear(skb)) {
+		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
+			ip_vs_conn_put(cp);
+			return NF_DROP;
+		}
+		iph = skb->nh.iph;
+		h.raw = (char*) iph + ihl;
+	}
+
+	size = skb->len - ihl;
+	IP_VS_DBG(11, "O-pkt: %s size=%d\n",
+		  ip_vs_proto_name(iph->protocol), size);
+
+	/* do TCP/UDP checksum checking if it has application helper */
+	if (cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
+		switch (skb->ip_summed) {
+		case CHECKSUM_NONE:
+			skb->csum = csum_partial(h.raw, size, 0);
+		case CHECKSUM_HW:
+			if (csum_tcpudp_magic(iph->saddr, iph->daddr, size,
+					      iph->protocol, skb->csum)) {
+				ip_vs_conn_put(cp);
+				IP_VS_DBG_RL("Outgoing failed %s checksum "
+					     "from %d.%d.%d.%d (size=%d)!\n",
+					     ip_vs_proto_name(iph->protocol),
+					     NIPQUAD(iph->saddr),
+					     size);
+				return NF_DROP;
+			}
+			break;
+		default:
+			/* CHECKSUM_UNNECESSARY */
+			break;
+		}
+	}
+
+	IP_VS_DBG(11, "Outgoing %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
+		  ip_vs_proto_name(iph->protocol),
+		  NIPQUAD(iph->saddr), ntohs(h.portp[0]),
+		  NIPQUAD(iph->daddr), ntohs(h.portp[1]));
+
+	/* mangle the packet */
+	iph->saddr = cp->vaddr;
+	h.portp[0] = cp->vport;
+
+	/*
+	 *	Call application helper if needed
+	 */
+	if (ip_vs_app_pkt_out(cp, skb) != 0) {
+		/* skb data has probably changed, update pointers */
+		iph = skb->nh.iph;
+		h.raw = (char*)iph + ihl;
+		size = skb->len - ihl;
+	}
+
+	/*
+	 *	Adjust TCP/UDP checksums
+	 */
+	if (!cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		ip_vs_fast_check_update(&h, cp->daddr, cp->vaddr,
+					cp->dport, cp->vport, iph->protocol);
+		if (skb->ip_summed == CHECKSUM_HW)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		switch (iph->protocol) {
+		case IPPROTO_TCP:
+			h.th->check = 0;
+			skb->csum = csum_partial(h.raw, size, 0);
+			h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+							size, iph->protocol,
+							skb->csum);
+			IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
+				  ip_vs_proto_name(iph->protocol), h.th->check,
+				  (char*)&(h.th->check) - (char*)h.raw);
+			break;
+		case IPPROTO_UDP:
+			h.uh->check = 0;
+			skb->csum = csum_partial(h.raw, size, 0);
+			h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+							size, iph->protocol,
+							skb->csum);
+			if (h.uh->check == 0)
+				h.uh->check = 0xFFFF;
+			IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
+				  ip_vs_proto_name(iph->protocol), h.uh->check,
+				  (char*)&(h.uh->check) - (char*)h.raw);
+			break;
+		}
+	}
+	ip_send_check(iph);
+
+	ip_vs_out_stats(cp, skb);
+	ip_vs_set_state(cp, VS_STATE_OUTPUT, iph, h.portp);
+	ip_vs_conn_put(cp);
+
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+
+	LeaveFunction(11);
+	return NF_ACCEPT;
+}
+
+
+/*
+ *      Check if the packet is for VS/NAT connections, then send it
+ *      immediately.
+ *      Called by ip_fw_compact to detect packets for VS/NAT before
+ *      they are changed by ipchains masquerading code.
+ */
+unsigned int check_for_ip_vs_out(struct sk_buff **skb_p,
+				 int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+
+	ret = ip_vs_out(NF_IP_FORWARD, skb_p, NULL, NULL, NULL);
+	if (ret != NF_ACCEPT) {
+		return ret;
+	} else {
+		/* send the packet immediately if it is already mangled
+		   by ip_vs_out */
+		if ((*skb_p)->nfcache & NFC_IPVS_PROPERTY) {
+			(*okfn)(*skb_p);
+			return NF_STOLEN;
+		}
+	}
+	return NF_ACCEPT;
+}
+
+
+/*
+ *	Handle ICMP messages in the outside-to-inside direction (incoming).
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to the right destination host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded
+ */
+static int ip_vs_in_icmp(struct sk_buff **skb_p)
+{
+	struct sk_buff	*skb   = *skb_p;
+	struct iphdr    *iph;
+	struct icmphdr  *icmph;
+	struct iphdr    *ciph;	/* The ip header contained within the ICMP */
+	__u16	        *pptr;	/* port numbers from TCP/UDP contained header */
+	unsigned short   len;
+	unsigned short	clen, csize;
+	struct ip_vs_conn *cp;
+	struct rtable *rt;			/* Route to the other host */
+	int    mtu;
+
+	if (skb_is_nonlinear(skb)) {
+		if (skb_linearize(skb, GFP_ATOMIC) != 0)
+			return NF_DROP;
+#if 0
+		ip_send_check(skb->nh.iph);
+#endif
+	}
+
+	iph = skb->nh.iph;
+	ip_send_check(iph);
+	icmph = (struct icmphdr *)((char *)iph+(iph->ihl<<2));
+	len = ntohs(iph->tot_len) - (iph->ihl<<2);
+	if (len < sizeof(struct icmphdr))
+		return NF_DROP;
+
+	IP_VS_DBG(12, "icmp in (%d,%d) %u.%u.%u.%u -> %u.%u.%u.%u\n",
+		  icmph->type, ntohs(icmp_id(icmph)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	if ((icmph->type != ICMP_DEST_UNREACH) &&
+	    (icmph->type != ICMP_SOURCE_QUENCH) &&
+	    (icmph->type != ICMP_TIME_EXCEEDED))
+		return NF_ACCEPT;
+
+	/*
+	 * If we get here we have an ICMP error of one of the above 3 types
+	 * Now find the contained IP header
+	 */
+	clen = len - sizeof(struct icmphdr);
+	if (clen < sizeof(struct iphdr))
+		return NF_DROP;
+	ciph = (struct iphdr *) (icmph + 1);
+	csize = ciph->ihl << 2;
+	if (clen < csize)
+		return NF_DROP;
+
+	/* We are only interested ICMPs generated from TCP or UDP packets */
+	if (ciph->protocol != IPPROTO_UDP && ciph->protocol != IPPROTO_TCP)
+		return NF_ACCEPT;
+
+	/* Skip non-first embedded TCP/UDP fragments */
+	if (ciph->frag_off & __constant_htons(IP_OFFSET))
+		return NF_ACCEPT;
+
+	/* We need at least TCP/UDP ports here */
+	if (clen < csize + sizeof(struct udphdr))
+		return NF_DROP;
+
+	/* Ensure the checksum is correct */
+	if (ip_compute_csum((unsigned char *) icmph, len)) {
+		/* Failed checksum! */
+		IP_VS_ERR_RL("incoming ICMP: failed checksum from "
+			     "%d.%d.%d.%d!\n", NIPQUAD(iph->saddr));
+		return NF_DROP;
+	}
+
+	pptr = (__u16 *)&(((char *)ciph)[csize]);
+
+	IP_VS_DBG(11, "Handling incoming ICMP for "
+		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
+		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
+		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));
+
+	/* This is pretty much what ip_vs_conn_in_get() does,
+	   except parameters are in the reverse order */
+	cp = ip_vs_conn_in_get(ciph->protocol,
+			       ciph->daddr, pptr[1],
+			       ciph->saddr, pptr[0]);
+	if (cp == NULL)
+		return NF_ACCEPT;
+
+	ip_vs_in_stats(cp, skb);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		int ret;
+		if (cp->packet_xmit)
+			ret = cp->packet_xmit(skb, cp);
+		else
+			ret = NF_ACCEPT;
+		atomic_inc(&cp->in_pkts);
+		ip_vs_conn_put(cp);
+		return ret;
+	}
+
+	/*
+	 * mangle and send the packet here
+	 */
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = rt->u.dst.pmtu;
+	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* copy-on-write the packet before mangling it */
+	if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len,
+			  &iph, (unsigned char**)&icmph)) {
+		ip_vs_conn_put(cp);
+		return NF_DROP;
+	}
+	ciph = (struct iphdr *) (icmph + 1);
+	pptr = (__u16 *)&(((char *)ciph)[csize]);
+
+	/* The ICMP packet for VS/NAT must be written to correct addresses
+	   before being forwarded to the right server */
+
+	/* First change the dest IP address, and recalc checksum */
+	iph->daddr = cp->daddr;
+	ip_send_check(iph);
+
+	/* Now change the *source* address in the contained IP */
+	ciph->saddr = cp->daddr;
+	ip_send_check(ciph);
+
+	/* the TCP/UDP source port - cannot redo check */
+	pptr[0] = cp->dport;
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	IP_VS_DBG(11, "Forwarding incoming ICMP to "
+		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
+		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
+		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+#endif /* CONFIG_NETFILTER_DEBUG */
+	ip_send(skb);
+	ip_vs_conn_put(cp);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	dev_kfree_skb(skb);
+	ip_vs_conn_put(cp);
+	return NF_STOLEN;
+}
+
+
+/*
+ *	Check if it's for virtual services, look it up,
+ *	and send it on its way...
+ */
+static unsigned int ip_vs_in(unsigned int hooknum,
+			     struct sk_buff **skb_p,
+			     const struct net_device *in,
+			     const struct net_device *out,
+			     int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff	*skb = *skb_p;
+	struct iphdr	*iph = skb->nh.iph;
+	union ip_vs_tphdr h;
+	struct ip_vs_conn *cp;
+	struct ip_vs_service *svc;
+	int ihl;
+	int ret;
+
+	/*
+	 *	Big tappo: only PACKET_HOST (nor loopback neither mcasts)
+	 *	... don't know why 1st test DOES NOT include 2nd (?)
+	 */
+	if (skb->pkt_type != PACKET_HOST || skb->dev == &loopback_dev) {
+		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+			  skb->pkt_type,
+			  iph->protocol,
+			  NIPQUAD(iph->daddr));
+		return NF_ACCEPT;
+	}
+
+	if (iph->protocol == IPPROTO_ICMP)
+		return ip_vs_in_icmp(skb_p);
+
+	/* let it go if other IP protocols */
+	if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
+		return NF_ACCEPT;
+
+	/* make sure that protocol header available in skb data area,
+	   note that skb data area may be reallocated. */
+	ihl = iph->ihl << 2;
+	if (ip_vs_header_check(skb, iph->protocol, ihl) == -1)
+		return NF_DROP;
+	iph = skb->nh.iph;
+	h.raw = (char*) iph + ihl;
+
+	/*
+	 * Check if the packet belongs to an existing connection entry
+	 */
+	cp = ip_vs_conn_in_get(iph->protocol, iph->saddr, h.portp[0],
+			       iph->daddr, h.portp[1]);
+
+	if (!cp &&
+	    (h.th->syn || (iph->protocol!=IPPROTO_TCP)) &&
+	    (svc = ip_vs_service_get(skb->nfmark, iph->protocol,
+				     iph->daddr, h.portp[1]))) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			return NF_DROP;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		cp = ip_vs_schedule(svc, iph);
+		if (!cp)
+			return ip_vs_leave(svc, skb);
+		ip_vs_conn_stats(cp, svc);
+		ip_vs_service_put(svc);
+	}
+
+	if (!cp) {
+		/* sorry, all this trouble for a no-hit :) */
+		IP_VS_DBG(12, "packet for %s %d.%d.%d.%d:%d continue "
+			  "traversal as normal.\n",
+			  ip_vs_proto_name(iph->protocol),
+			  NIPQUAD(iph->daddr),
+			  ntohs(h.portp[1]));
+		return NF_ACCEPT;
+	}
+
+	IP_VS_DBG(11, "Incoming %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
+		  ip_vs_proto_name(iph->protocol),
+		  NIPQUAD(iph->saddr), ntohs(h.portp[0]),
+		  NIPQUAD(iph->daddr), ntohs(h.portp[1]));
+
+	/* Check the server status */
+	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		/* the destination server is not availabe */
+
+		if (sysctl_ip_vs_expire_nodest_conn) {
+			/* try to expire the connection immediately */
+			ip_vs_conn_expire_now(cp);
+		} else {
+			/* don't restart its timer, and silently
+			   drop the packet. */
+			__ip_vs_conn_put(cp);
+		}
+		return NF_DROP;
+	}
+
+	ip_vs_in_stats(cp, skb);
+	ip_vs_set_state(cp, VS_STATE_INPUT, iph, h.portp);
+	if (cp->packet_xmit)
+		ret = cp->packet_xmit(skb, cp);
+	else {
+		IP_VS_DBG_RL("warning: packet_xmit is null");
+		ret = NF_ACCEPT;
+	}
+
+	/* increase its packet counter and check if it is needed
+	   to be synchronized */
+	atomic_inc(&cp->in_pkts);
+	if (ip_vs_sync_state == IP_VS_STATE_MASTER &&
+	    (cp->protocol != IPPROTO_TCP ||
+	     cp->state == IP_VS_S_ESTABLISHED) &&
+	    (atomic_read(&cp->in_pkts) % 50 == sysctl_ip_vs_sync_threshold))
+		ip_vs_sync_conn(cp);
+
+	ip_vs_conn_put(cp);
+	return ret;
+}
+
+
+/*
+ *	It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP
+ *      packets destined for 0.0.0.0/0.
+ *      When fwmark-based virtual service is used, such as transparent
+ *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
+ *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
+ *      sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain
+ *      and send them to ip_vs_in_icmp.
+ */
+static unsigned int ip_vs_forward_icmp(unsigned int hooknum,
+				       struct sk_buff **skb_p,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff	*skb = *skb_p;
+	struct iphdr	*iph = skb->nh.iph;
+
+	if (iph->protocol != IPPROTO_ICMP)
+		return NF_ACCEPT;
+
+	if (iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+		skb = ip_defrag(skb);
+		if (!skb)
+			return NF_STOLEN;
+		*skb_p = skb;
+	}
+
+	return ip_vs_in_icmp(skb_p);
+}
+
+
+/* After packet filtering, forward packet through VS/DR, VS/TUN,
+   or VS/NAT(change destination), so that filtering rules can be
+   applied to IPVS. */
+static struct nf_hook_ops ip_vs_in_ops = {
+	{ NULL, NULL },
+	ip_vs_in, PF_INET, NF_IP_LOCAL_IN, 100
+};
+
+/* After packet filtering, change source only for VS/NAT */
+static struct nf_hook_ops ip_vs_out_ops = {
+	{ NULL, NULL },
+	ip_vs_out, PF_INET, NF_IP_FORWARD, 100
+};
+
+/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+   destined for 0.0.0.0/0, which is for incoming IPVS connections */
+static struct nf_hook_ops ip_vs_forward_icmp_ops = {
+	{ NULL, NULL },
+	ip_vs_forward_icmp, PF_INET, NF_IP_FORWARD, 99
+};
+
+/* Before the netfilter connection tracking, exit from POST_ROUTING */
+static struct nf_hook_ops ip_vs_post_routing_ops = {
+	{ NULL, NULL },
+	ip_vs_post_routing, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC-1
+};
+
+
+/*
+ *	Initialize IP Virtual Server
+ */
+static int __init ip_vs_init(void)
+{
+	int ret;
+
+	ret = ip_vs_control_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup control.\n");
+		goto cleanup_nothing;
+	}
+
+	ip_vs_sltimer_init();
+
+	ret = ip_vs_conn_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup connection table.\n");
+		goto cleanup_sltimer;
+	}
+
+	ret = ip_vs_app_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup application helper.\n");
+		goto cleanup_conn;
+	}
+
+	ret = nf_register_hook(&ip_vs_in_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register in hook.\n");
+		goto cleanup_app;
+	}
+	ret = nf_register_hook(&ip_vs_out_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register out hook.\n");
+		goto cleanup_inops;
+	}
+	ret = nf_register_hook(&ip_vs_post_routing_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register post_routing hook.\n");
+		goto cleanup_outops;
+	}
+	ret = nf_register_hook(&ip_vs_forward_icmp_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register forward_icmp hook.\n");
+		goto cleanup_postroutingops;
+	}
+
+	IP_VS_INFO("ipvs loaded.\n");
+	return ret;
+
+  cleanup_postroutingops:
+	nf_unregister_hook(&ip_vs_post_routing_ops);
+  cleanup_outops:
+	nf_unregister_hook(&ip_vs_out_ops);
+  cleanup_inops:
+	nf_unregister_hook(&ip_vs_in_ops);
+  cleanup_app:
+	ip_vs_app_cleanup();
+  cleanup_conn:
+	ip_vs_conn_cleanup();
+  cleanup_sltimer:
+	ip_vs_sltimer_cleanup();
+	ip_vs_control_cleanup();
+  cleanup_nothing:
+	return ret;
+}
+
+static void __exit ip_vs_cleanup(void)
+{
+	nf_unregister_hook(&ip_vs_forward_icmp_ops);
+	nf_unregister_hook(&ip_vs_post_routing_ops);
+	nf_unregister_hook(&ip_vs_out_ops);
+	nf_unregister_hook(&ip_vs_in_ops);
+	ip_vs_app_cleanup();
+	ip_vs_conn_cleanup();
+	ip_vs_sltimer_cleanup();
+	ip_vs_control_cleanup();
+	IP_VS_INFO("ipvs unloaded.\n");
+}
+
+module_init(ip_vs_init);
+module_exit(ip_vs_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_ctl.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_ctl.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_ctl.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_ctl.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,2125 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_ctl.c,v 1.30.2.1 2002/11/14 10:05:23 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/timer.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <asm/uaccess.h>
+
+#include <net/ip_vs.h>
+
+/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+static DECLARE_MUTEX(__ip_vs_mutex);
+
+/* lock for service table */
+rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
+
+/* lock for table with the real services */
+static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
+
+/* lock for state and timeout tables */
+static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
+
+/* lock for drop entry handling */
+static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
+
+/* lock for drop packet handling */
+static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
+
+/* 1/rate drop and drop-entry variables */
+int ip_vs_drop_rate = 0;
+int ip_vs_drop_counter = 0;
+atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
+
+/* number of virtual services */
+static int ip_vs_num_services = 0;
+
+/* sysctl variables */
+static int sysctl_ip_vs_drop_entry = 0;
+static int sysctl_ip_vs_drop_packet = 0;
+static int sysctl_ip_vs_secure_tcp = 0;
+static int sysctl_ip_vs_amemthresh = 1024;
+static int sysctl_ip_vs_am_droprate = 10;
+int sysctl_ip_vs_cache_bypass = 0;
+int sysctl_ip_vs_expire_nodest_conn = 0;
+int sysctl_ip_vs_sync_threshold = 3;
+int sysctl_ip_vs_nat_icmp_send = 0;
+
+#ifdef CONFIG_IP_VS_DEBUG
+static int sysctl_ip_vs_debug_level = 0;
+
+int ip_vs_get_debug_level(void)
+{
+	return sysctl_ip_vs_debug_level;
+}
+#endif
+
+/*
+ *  update_defense_level is called from timer bh and from sysctl.
+ */
+void update_defense_level(void)
+{
+	int ip_vs_amem = nr_free_pages() + atomic_read(&page_cache_size) +
+		atomic_read(&buffermem_pages);
+	int nomem = (ip_vs_amem < sysctl_ip_vs_amemthresh);
+
+	/* drop_entry */
+	spin_lock(&__ip_vs_dropentry_lock);
+	switch (sysctl_ip_vs_drop_entry) {
+	case 0:
+		atomic_set(&ip_vs_dropentry, 0);
+		break;
+	case 1:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+			sysctl_ip_vs_drop_entry = 2;
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+		}
+		break;
+	case 2:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+			sysctl_ip_vs_drop_entry = 1;
+		};
+		break;
+	case 3:
+		atomic_set(&ip_vs_dropentry, 1);
+		break;
+	}
+	spin_unlock(&__ip_vs_dropentry_lock);
+
+	/* drop_packet */
+	spin_lock(&__ip_vs_droppacket_lock);
+	switch (sysctl_ip_vs_drop_packet) {
+	case 0:
+		ip_vs_drop_rate = 0;
+		break;
+	case 1:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-ip_vs_amem);
+			sysctl_ip_vs_drop_packet = 2;
+		} else {
+			ip_vs_drop_rate = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-ip_vs_amem);
+		} else {
+			ip_vs_drop_rate = 0;
+			sysctl_ip_vs_drop_packet = 1;
+		}
+		break;
+	case 3:
+		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+		break;
+	}
+	spin_unlock(&__ip_vs_droppacket_lock);
+
+	/* secure_tcp */
+	write_lock(&__ip_vs_securetcp_lock);
+	switch (sysctl_ip_vs_secure_tcp) {
+	case 0:
+		ip_vs_secure_tcp_set(0);
+		break;
+	case 1:
+		if (nomem) {
+			ip_vs_secure_tcp_set(1);
+			sysctl_ip_vs_secure_tcp = 2;
+		} else {
+			ip_vs_secure_tcp_set(0);
+		}
+		break;
+	case 2:
+		if (nomem) {
+			ip_vs_secure_tcp_set(1);
+		} else {
+			ip_vs_secure_tcp_set(0);
+			sysctl_ip_vs_secure_tcp = 1;
+		}
+		break;
+	case 3:
+		ip_vs_secure_tcp_set(1);
+		break;
+	}
+	write_unlock(&__ip_vs_securetcp_lock);
+}
+
+
+/*
+ *  Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+
+/* the service table hashed by <protocol, addr, port> */
+static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+/* the service table hashed by fwmark */
+static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+
+/*
+ *  Hash table: for real service lookups
+ */
+#define IP_VS_RTAB_BITS 4
+#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
+
+/*
+ * Trash for destinations
+ */
+static LIST_HEAD(ip_vs_dest_trash);
+
+/*
+ * FTP & NULL virtual service counters
+ */
+static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
+static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
+
+
+/*
+ *  Returns hash value for virtual service
+ */
+static __inline__ unsigned
+ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
+{
+	register unsigned porth = ntohs(port);
+
+	return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+		& IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *  Returns hash value of fwmark for virtual service lookup
+ */
+static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+{
+	return fwmark & IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *  Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
+ *  or in the ip_vs_svc_fwm_table by fwmark.
+ *  Should be called with locked tables.
+ *  Returns bool success.
+ */
+static int ip_vs_svc_hash(struct ip_vs_service *svc)
+{
+	unsigned hash;
+
+	if (svc->flags & IP_VS_SVC_F_HASHED) {
+		IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/*
+		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+		 */
+		hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
+		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+	} else {
+		/*
+		 *  Hash it by fwmark in ip_vs_svc_fwm_table
+		 */
+		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+	}
+
+	svc->flags |= IP_VS_SVC_F_HASHED;
+	/* increase its refcnt because it is referenced by the svc table */
+	atomic_inc(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *  Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *  Should be called with locked tables.
+ *  Returns bool success.
+ */
+static int ip_vs_svc_unhash(struct ip_vs_service *svc)
+{
+	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
+		IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/*
+		 * Remove it from the ip_vs_svc_table table.
+		 */
+		list_del(&svc->s_list);
+	} else {
+		/*
+		 * Remove it from the ip_vs_svc_fwm_table table.
+		 */
+		list_del(&svc->f_list);
+	}
+
+	svc->flags &= ~IP_VS_SVC_F_HASHED;
+	atomic_dec(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *  Get service by {proto,addr,port} in the service table.
+ */
+static __inline__ struct ip_vs_service *
+__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+	struct list_head *l,*e;
+
+	/*
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
+
+	l = &ip_vs_svc_table[hash];
+	for (e=l->next; e!=l; e=e->next) {
+		svc = list_entry(e, struct ip_vs_service, s_list);
+		if ((svc->addr == vaddr)
+		    && (svc->port == vport)
+		    && (svc->protocol == protocol)) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *  Get service by {fwmark} in the service table.
+ */
+static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+	struct list_head *l,*e;
+
+	/*
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_vs_svc_fwm_hashkey(fwmark);
+
+	l = &ip_vs_svc_fwm_table[hash];
+	for (e=l->next; e!=l; e=e->next) {
+		svc = list_entry(e, struct ip_vs_service, f_list);
+		if (svc->fwmark == fwmark) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+struct ip_vs_service *
+ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
+{
+	struct ip_vs_service *svc;
+
+	read_lock(&__ip_vs_svc_lock);
+
+	/*
+	 *	Check the table hashed by fwmark first
+	 */
+	if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
+		goto out;
+
+	/*
+	 *	Check the table hashed by <protocol,addr,port>
+	 *	for "full" addressed entries
+	 */
+	svc = __ip_vs_service_get(protocol, vaddr, vport);
+
+	if (svc == NULL
+	    && protocol == IPPROTO_TCP
+	    && atomic_read(&ip_vs_ftpsvc_counter)
+	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
+		/*
+		 * Check if ftp service entry exists, the packet
+		 * might belong to FTP data connections.
+		 */
+		svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
+	}
+
+	if (svc == NULL
+	    && atomic_read(&ip_vs_nullsvc_counter)) {
+		/*
+		 * Check if the catch-all port (port zero) exists
+		 */
+		svc = __ip_vs_service_get(protocol, vaddr, 0);
+	}
+
+  out:
+	read_unlock(&__ip_vs_svc_lock);
+
+	IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
+		  fwmark, ip_vs_proto_name(protocol),
+		  NIPQUAD(vaddr), ntohs(vport),
+		  svc?"hit":"not hit");
+
+	return svc;
+}
+
+
+static inline void
+__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	atomic_inc(&svc->refcnt);
+	dest->svc = svc;
+}
+
+static inline void
+__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+{
+	struct ip_vs_service *svc = dest->svc;
+
+	dest->svc = NULL;
+	if (atomic_dec_and_test(&svc->refcnt))
+		kfree(svc);
+}
+
+/*
+ *  Returns hash value for real service
+ */
+static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
+{
+	register unsigned porth = ntohs(port);
+
+	return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
+		& IP_VS_RTAB_MASK;
+}
+
+/*
+ *  Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
+ *  should be called with locked tables.
+ *  returns bool success.
+ */
+static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+{
+	unsigned hash;
+
+	if (!list_empty(&dest->d_list)) {
+		return 0;
+	}
+
+	/*
+	 *	Hash by proto,addr,port,
+	 *	which are the parameters of the real service.
+	 */
+	hash = ip_vs_rs_hashkey(dest->addr, dest->port);
+	list_add(&dest->d_list, &ip_vs_rtable[hash]);
+
+	return 1;
+}
+
+/*
+ *  UNhashes ip_vs_dest from ip_vs_rtable.
+ *  should be called with locked tables.
+ *  returns bool success.
+ */
+static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+{
+	/*
+	 * Remove it from the ip_vs_rtable table.
+	 */
+	if (!list_empty(&dest->d_list)) {
+		list_del(&dest->d_list);
+		INIT_LIST_HEAD(&dest->d_list);
+	}
+
+	return 1;
+}
+
+/*
+ *  Lookup real service by {proto,addr,port} in the real service table.
+ */
+struct ip_vs_dest *
+ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
+{
+	unsigned hash;
+	struct ip_vs_dest *dest;
+	struct list_head *l,*e;
+
+	/*
+	 *	Check for "full" addressed entries
+	 *	Return the first found entry
+	 */
+	hash = ip_vs_rs_hashkey(daddr, dport);
+
+	l = &ip_vs_rtable[hash];
+
+	read_lock(&__ip_vs_rs_lock);
+	for (e=l->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, d_list);
+		if ((dest->addr == daddr)
+		    && (dest->port == dport)
+		    && ((dest->protocol == protocol) ||
+			dest->vfwmark)) {
+			/* HIT */
+			read_unlock(&__ip_vs_rs_lock);
+			return dest;
+		}
+	}
+	read_unlock(&__ip_vs_rs_lock);
+
+	return NULL;
+}
+
+/*
+ *  Lookup destination by {addr,port} in the given service
+ */
+static struct ip_vs_dest *
+ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
+{
+	struct ip_vs_dest *dest;
+	struct list_head *l, *e;
+
+	/*
+	 * Find the destination for the given service
+	 */
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		if ((dest->addr == daddr) && (dest->port == dport)) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *  Lookup dest by {svc,addr,port} in the destination trash.
+ *  The destination trash is used to hold the destinations that are removed
+ *  from the service table but are still referenced by some conn entries.
+ *  The reason to add the destination trash is when the dest is temporary
+ *  down (either by administrator or by monitor program), the dest can be
+ *  picked back from the trash, the remaining connections to the dest can
+ *  continue, and the counting information of the dest is also useful for
+ *  scheduling.
+ */
+static struct ip_vs_dest *
+ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
+{
+	struct ip_vs_dest *dest;
+	struct list_head *l, *e;
+
+	/*
+	 * Find the destination in trash
+	 */
+	l = &ip_vs_dest_trash;
+
+	for (e=l->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
+			  "refcnt=%d\n",
+			  dest->vfwmark,
+			  NIPQUAD(dest->addr), ntohs(dest->port),
+			  atomic_read(&dest->refcnt));
+		if (dest->addr == daddr &&
+		    dest->port == dport &&
+		    dest->vfwmark == svc->fwmark &&
+		    dest->protocol == svc->protocol &&
+		    (svc->fwmark ||
+		     (dest->vaddr == svc->addr &&
+		      dest->vport == svc->port))) {
+			/* HIT */
+			return dest;
+		}
+
+		/*
+		 * Try to purge the destination from trash if not referenced
+		 */
+		if (atomic_read(&dest->refcnt) == 1) {
+			IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
+				  "from trash\n",
+				  dest->vfwmark,
+				  NIPQUAD(dest->addr), ntohs(dest->port));
+			e = e->prev;
+			list_del(&dest->n_list);
+			__ip_vs_dst_reset(dest);
+			__ip_vs_unbind_svc(dest);
+			kfree(dest);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *  Clean up all the destinations in the trash
+ *  Called by the ip_vs_control_cleanup()
+ *
+ *  When the ip_vs_control_clearup is activated by ipvs module exit,
+ *  the service tables must have been flushed and all the connections
+ *  are expired, and the refcnt of each destination in the trash must
+ *  be 1, so we simply release them here.
+ */
+static void ip_vs_trash_cleanup(void)
+{
+	struct ip_vs_dest *dest;
+	struct list_head *l;
+
+	l = &ip_vs_dest_trash;
+
+	while (l->next != l) {
+		dest = list_entry(l->next, struct ip_vs_dest, n_list);
+		list_del(&dest->n_list);
+		__ip_vs_dst_reset(dest);
+		__ip_vs_unbind_svc(dest);
+		kfree(dest);
+	}
+}
+
+
+/*
+ *  Update a destination in the given service
+ */
+static void __ip_vs_update_dest(struct ip_vs_service *svc,
+				struct ip_vs_dest *dest,
+				struct ip_vs_rule_user *ur)
+{
+	int conn_flags;
+
+	/*
+	 *    Set the weight and the flags
+	 */
+	atomic_set(&dest->weight, ur->weight);
+
+	conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
+
+	/*
+	 *    Check if local node and update the flags
+	 */
+	if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
+		conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+			| IP_VS_CONN_F_LOCALNODE;
+	}
+
+	/*
+	 *    Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading
+	 */
+	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
+		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
+	} else {
+		/*
+		 *    Put the real service in ip_vs_rtable if not present.
+		 *    For now only for NAT!
+		 */
+		write_lock_bh(&__ip_vs_rs_lock);
+		ip_vs_rs_hash(dest);
+		write_unlock_bh(&__ip_vs_rs_lock);
+	}
+	atomic_set(&dest->conn_flags, conn_flags);
+
+	/* bind the service */
+	if (!dest->svc) {
+		__ip_vs_bind_svc(dest, svc);
+	} else {
+		if (dest->svc != svc) {
+			__ip_vs_unbind_svc(dest);
+			__ip_vs_bind_svc(dest, svc);
+		}
+	}
+
+	/* set the dest status flags */
+	dest->flags |= IP_VS_DEST_F_AVAILABLE;
+}
+
+
+/*
+ *  Create a destination for the given service
+ */
+static int
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
+	       struct ip_vs_dest **destp)
+{
+	struct ip_vs_dest *dest;
+	unsigned atype;
+
+	EnterFunction(2);
+
+	atype = inet_addr_type(ur->daddr);
+	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+		return -EINVAL;
+
+	*destp = dest = (struct ip_vs_dest*)
+		kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+	if (dest == NULL) {
+		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
+		return -ENOMEM;
+	}
+	memset(dest, 0, sizeof(struct ip_vs_dest));
+
+	dest->protocol = svc->protocol;
+	dest->vaddr = svc->addr;
+	dest->vport = svc->port;
+	dest->vfwmark = svc->fwmark;
+	dest->addr = ur->daddr;
+	dest->port = ur->dport;
+
+	atomic_set(&dest->activeconns, 0);
+	atomic_set(&dest->inactconns, 0);
+	atomic_set(&dest->refcnt, 0);
+
+	INIT_LIST_HEAD(&dest->d_list);
+	dest->dst_lock = SPIN_LOCK_UNLOCKED;
+	dest->stats.lock = SPIN_LOCK_UNLOCKED;
+	__ip_vs_update_dest(svc, dest, ur);
+	ip_vs_new_estimator(&dest->stats);
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+/*
+ *  Add a destination into an existing service
+ */
+static int ip_vs_add_dest(struct ip_vs_service *svc,
+			  struct ip_vs_rule_user *ur)
+{
+	struct ip_vs_dest *dest;
+	__u32 daddr = ur->daddr;
+	__u16 dport = ur->dport;
+	int ret;
+
+	EnterFunction(2);
+
+	if (ur->weight < 0) {
+		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	/*
+	 * Check if the dest already exists in the list
+	 */
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest != NULL) {
+		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
+		return -EEXIST;
+	}
+
+	/*
+	 * Check if the dest already exists in the trash and
+	 * is from the same service
+	 */
+	dest = ip_vs_trash_get_dest(svc, daddr, dport);
+	if (dest != NULL) {
+		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
+			  "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
+			  NIPQUAD(daddr), ntohs(dport),
+			  atomic_read(&dest->refcnt),
+			  dest->vfwmark,
+			  NIPQUAD(dest->vaddr),
+			  ntohs(dest->vport));
+		__ip_vs_update_dest(svc, dest, ur);
+
+		/*
+		 * Get the destination from the trash
+		 */
+		list_del(&dest->n_list);
+
+		ip_vs_new_estimator(&dest->stats);
+
+		write_lock_bh(&__ip_vs_svc_lock);
+
+		/*
+		 * Wait until all other svc users go away.
+		 */
+		while (atomic_read(&svc->usecnt) > 1) {};
+
+		list_add(&dest->n_list, &svc->destinations);
+		svc->num_dests++;
+
+		/* call the update_service function of its scheduler */
+		svc->scheduler->update_service(svc);
+
+		write_unlock_bh(&__ip_vs_svc_lock);
+		return 0;
+	}
+
+	/*
+	 * Allocate and initialize the dest structure
+	 */
+	ret = ip_vs_new_dest(svc, ur, &dest);
+	if (ret) {
+		return ret;
+	}
+
+	/*
+	 * Add the dest entry into the list
+	 */
+	atomic_inc(&dest->refcnt);
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	while (atomic_read(&svc->usecnt) > 1) {};
+
+	list_add(&dest->n_list, &svc->destinations);
+	svc->num_dests++;
+
+	/* call the update_service function of its scheduler */
+	svc->scheduler->update_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *  Edit a destination in the given service
+ */
+static int ip_vs_edit_dest(struct ip_vs_service *svc,
+			   struct ip_vs_rule_user *ur)
+{
+	struct ip_vs_dest *dest;
+	__u32 daddr = ur->daddr;
+	__u16 dport = ur->dport;
+
+	EnterFunction(2);
+
+	if (ur->weight < 0) {
+		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	/*
+	 *  Lookup the destination list
+	 */
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
+		return -ENOENT;
+	}
+
+	__ip_vs_update_dest(svc, dest, ur);
+
+	/* call the update_service, because server weight may be changed */
+	svc->scheduler->update_service(svc);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *  Delete a destination (must be already unlinked from the service)
+ */
+static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+{
+	ip_vs_kill_estimator(&dest->stats);
+
+	/*
+	 *  Remove it from the d-linked list with the real services.
+	 */
+	write_lock_bh(&__ip_vs_rs_lock);
+	ip_vs_rs_unhash(dest);
+	write_unlock_bh(&__ip_vs_rs_lock);
+
+	/*
+	 *  Decrease the refcnt of the dest, and free the dest
+	 *  if nobody refers to it (refcnt=0). Otherwise, throw
+	 *  the destination into the trash.
+	 */
+	if (atomic_dec_and_test(&dest->refcnt)) {
+		__ip_vs_dst_reset(dest);
+		/* simply decrease svc->refcnt here, let the caller check
+		   and release the service if nobody refers to it.
+		   Only user context can release destination and service,
+		   and only one user context can update virtual service at a
+		   time, so the operation here is OK */
+		atomic_dec(&dest->svc->refcnt);
+		kfree(dest);
+	} else {
+		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
+			  NIPQUAD(dest->addr), ntohs(dest->port),
+			  atomic_read(&dest->refcnt));
+		list_add(&dest->n_list, &ip_vs_dest_trash);
+		atomic_inc(&dest->refcnt);
+	}
+}
+
+
+/*
+ *  Unlink a destination from the given service
+ */
+static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
+				struct ip_vs_dest *dest,
+				int svcupd)
+{
+	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
+
+	/*
+	 *  Remove it from the d-linked destination list.
+	 */
+	list_del(&dest->n_list);
+	svc->num_dests--;
+	if (svcupd) {
+		/*
+		 *  Call the update_service function of its scheduler
+		 */
+		svc->scheduler->update_service(svc);
+	}
+}
+
+
+/*
+ *  Delete a destination server in the given service
+ */
+static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
+{
+	struct ip_vs_dest *dest;
+	__u32 daddr = ur->daddr;
+	__u16 dport = ur->dport;
+
+	EnterFunction(2);
+
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
+		return -ENOENT;
+	}
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Wait until all other svc users go away.
+	 */
+	while (atomic_read(&svc->usecnt) > 1) {};
+
+	/*
+	 *	Unlink dest from the service
+	 */
+	__ip_vs_unlink_dest(svc, dest, 1);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Delete the destination
+	 */
+	__ip_vs_del_dest(dest);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *  Add a service into the service hash table
+ */
+static int
+ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
+{
+	int ret = 0;
+	struct ip_vs_scheduler *sched;
+	struct ip_vs_service *svc = NULL;
+
+	MOD_INC_USE_COUNT;
+
+	/*
+	 * Lookup the scheduler, by 'ur->sched_name'
+	 */
+	sched = ip_vs_scheduler_get(ur->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
+			   ur->sched_name);
+		ret = -ENOENT;
+		goto out_mod_dec;
+	}
+
+	svc = (struct ip_vs_service*)
+		kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+	if (svc == NULL) {
+		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
+		ret = -ENOMEM;
+		goto out_err;
+	}
+	memset(svc, 0, sizeof(struct ip_vs_service));
+
+	svc->protocol = ur->protocol;
+	svc->addr = ur->vaddr;
+	svc->port = ur->vport;
+	svc->fwmark = ur->vfwmark;
+	svc->flags = ur->vs_flags;
+	svc->timeout = ur->timeout * HZ;
+	svc->netmask = ur->netmask;
+
+	INIT_LIST_HEAD(&svc->destinations);
+	svc->sched_lock = RW_LOCK_UNLOCKED;
+	svc->stats.lock = SPIN_LOCK_UNLOCKED;
+
+	/*
+	 *    Bind the scheduler
+	 */
+	ret = ip_vs_bind_scheduler(svc, sched);
+	if (ret) {
+		goto out_err;
+	}
+
+	/*
+	 *    Update the virtual service counters
+	 */
+	if (svc->port == FTPPORT)
+		atomic_inc(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_inc(&ip_vs_nullsvc_counter);
+
+	/*
+	 *    I'm the first user of the service
+	 */
+	atomic_set(&svc->usecnt, 1);
+	atomic_set(&svc->refcnt, 0);
+
+	ip_vs_new_estimator(&svc->stats);
+	ip_vs_num_services++;
+
+	/*
+	 *    Hash the service into the service table
+	 */
+	write_lock_bh(&__ip_vs_svc_lock);
+	ip_vs_svc_hash(svc);
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	*svc_p = svc;
+	return 0;
+
+  out_err:
+	if (svc)
+		kfree(svc);
+	ip_vs_scheduler_put(sched);
+  out_mod_dec:
+	MOD_DEC_USE_COUNT;
+	return ret;
+}
+
+
+/*
+ *	Edit a service and bind it with a new scheduler
+ */
+static int ip_vs_edit_service(struct ip_vs_service *svc,
+			      struct ip_vs_rule_user *ur)
+{
+	struct ip_vs_scheduler *sched, *old_sched;
+	int ret = 0;
+
+	/*
+	 * Lookup the scheduler, by 'ur->sched_name'
+	 */
+	sched = ip_vs_scheduler_get(ur->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
+			   ur->sched_name);
+		return -ENOENT;
+	}
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	while (atomic_read(&svc->usecnt) > 1) {};
+
+	/*
+	 * Set the flags and timeout value
+	 */
+	svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
+	svc->timeout = ur->timeout * HZ;
+	svc->netmask = ur->netmask;
+
+	old_sched = svc->scheduler;
+	if (sched != old_sched) {
+		/*
+		 * Unbind the old scheduler
+		 */
+		if ((ret = ip_vs_unbind_scheduler(svc))) {
+			old_sched = sched;
+			goto out;
+		}
+
+		/*
+		 * Bind the new scheduler
+		 */
+		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
+			/*
+			 * If ip_vs_bind_scheduler fails, restore the old
+			 * scheduler.
+			 * The main reason of failure is out of memory.
+			 *
+			 * The question is if the old scheduler can be
+			 * restored all the time. TODO: if it cannot be
+			 * restored some time, we must delete the service,
+			 * otherwise the system may crash.
+			 */
+			ip_vs_bind_scheduler(svc, old_sched);
+			old_sched = sched;
+		}
+	}
+
+  out:
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	if (old_sched)
+		ip_vs_scheduler_put(old_sched);
+
+	return ret;
+}
+
+
+/*
+ *  Delete a service from the service list
+ *  The service must be unlinked, unlocked and not referenced!
+ */
+static void __ip_vs_del_service(struct ip_vs_service *svc)
+{
+	struct list_head *l;
+	struct ip_vs_dest *dest;
+	struct ip_vs_scheduler *old_sched;
+
+	ip_vs_num_services--;
+	ip_vs_kill_estimator(&svc->stats);
+
+	/*
+	 *    Unbind scheduler
+	 */
+	old_sched = svc->scheduler;
+	ip_vs_unbind_scheduler(svc);
+	if (old_sched && old_sched->module)
+		__MOD_DEC_USE_COUNT(old_sched->module);
+
+	/*
+	 *    Unlink the whole destination list
+	 */
+	l = &svc->destinations;
+	while (l->next != l) {
+		dest = list_entry(l->next, struct ip_vs_dest, n_list);
+		__ip_vs_unlink_dest(svc, dest, 0);
+		__ip_vs_del_dest(dest);
+	}
+
+	/*
+	 *    Update the virtual service counters
+	 */
+	if (svc->port == FTPPORT)
+		atomic_dec(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_dec(&ip_vs_nullsvc_counter);
+
+	/*
+	 *    Free the service if nobody refers to it
+	 */
+	if (atomic_read(&svc->refcnt) == 0)
+		kfree(svc);
+	MOD_DEC_USE_COUNT;
+}
+
+/*
+ *  Delete a service from the service list
+ */
+static int ip_vs_del_service(struct ip_vs_service *svc)
+{
+	if (svc == NULL)
+		return -EEXIST;
+
+	/*
+	 * Unhash it from the service table
+	 */
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	ip_vs_svc_unhash(svc);
+
+	/*
+	 * Wait until all the svc users go away.
+	 */
+	while (atomic_read(&svc->usecnt) > 1) {};
+
+	__ip_vs_del_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	return 0;
+}
+
+
+/*
+ *  Flush all the virtual services
+ */
+static int ip_vs_flush(void)
+{
+	int idx;
+	struct ip_vs_service *svc;
+	struct list_head *l;
+
+	/*
+	 * Flush the service table hashed by <protocol,addr,port>
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		l = &ip_vs_svc_table[idx];
+		while (l->next != l) {
+			svc = list_entry(l->next,struct ip_vs_service,s_list);
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			while (atomic_read(&svc->usecnt) > 0) {};
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	/*
+	 * Flush the service table hashed by fwmark
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		l = &ip_vs_svc_fwm_table[idx];
+		while (l->next != l) {
+			svc = list_entry(l->next,struct ip_vs_service,f_list);
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			while (atomic_read(&svc->usecnt) > 0) {};
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *  Zero counters in a service or all services
+ */
+static inline void
+__ip_vs_zero_stats(struct ip_vs_stats *stats)
+{
+	spin_lock_bh(&stats->lock);
+	memset(stats, 0, (char *)&stats->lock - (char *)stats);
+	spin_unlock_bh(&stats->lock);
+	ip_vs_zero_estimator(stats);
+}
+
+static int ip_vs_zero_service(struct ip_vs_service *svc)
+{
+	struct list_head *l;
+	struct ip_vs_dest *dest;
+
+	write_lock_bh(&__ip_vs_svc_lock);
+	list_for_each (l, &svc->destinations) {
+		dest = list_entry(l, struct ip_vs_dest, n_list);
+		__ip_vs_zero_stats(&dest->stats);
+	}
+	__ip_vs_zero_stats(&svc->stats);
+	write_unlock_bh(&__ip_vs_svc_lock);
+	return 0;
+}
+
+static int ip_vs_zero_all(void)
+{
+	int idx;
+	struct list_head *l;
+	struct ip_vs_service *svc;
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each (l, &ip_vs_svc_table[idx]) {
+			svc = list_entry(l, struct ip_vs_service, s_list);
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
+			svc = list_entry(l, struct ip_vs_service, f_list);
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	__ip_vs_zero_stats(&ip_vs_stats);
+	return 0;
+}
+
+
+static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
+	struct file * filp, void *buffer, size_t *lenp)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+	if (write && (*valp != val)) {
+		if ((*valp < 0) || (*valp > 3)) {
+			/* Restore the correct value */
+			*valp = val;
+		} else {
+			local_bh_disable();
+			update_defense_level();
+			local_bh_enable();
+		}
+	}
+	return ret;
+}
+
+
+/*
+ *      IPVS sysctl table
+ */
+struct ip_vs_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	ctl_table vs_vars[NET_IPV4_VS_LAST];
+	ctl_table vs_dir[2];
+	ctl_table ipv4_dir[2];
+	ctl_table root_dir[2];
+};
+
+
+static struct ip_vs_sysctl_table ipv4_vs_table = {
+	NULL,
+	{{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
+	  &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
+	  &proc_dointvec},
+#ifdef CONFIG_IP_VS_DEBUG
+	 {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
+	  &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
+	  &proc_dointvec},
+#endif
+	 {NET_IPV4_VS_AMDROPRATE, "am_droprate",
+	  &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
+	  &proc_dointvec},
+	 {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
+	  &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
+	  &ip_vs_sysctl_defense_mode},
+	 {NET_IPV4_VS_DROP_PACKET, "drop_packet",
+	  &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
+	  &ip_vs_sysctl_defense_mode},
+	 {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
+	  &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
+	  &ip_vs_sysctl_defense_mode},
+	 {NET_IPV4_VS_TO_ES, "timeout_established",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_SS, "timeout_synsent",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_SR, "timeout_synrecv",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_FW, "timeout_finwait",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_TW, "timeout_timewait",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_CL, "timeout_close",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_CW, "timeout_closewait",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_LA, "timeout_lastack",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_LI, "timeout_listen",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_SA, "timeout_synack",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_UDP, "timeout_udp",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
+	  &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
+	  &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
+	  &proc_dointvec},
+	 {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
+	  &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
+	  &proc_dointvec},
+	 {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
+	  &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
+	  &proc_dointvec},
+	 {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
+	  &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
+	  &proc_dointvec},
+	 {0}},
+	{{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
+	 {0}},
+	{{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
+	 {0}},
+	{{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
+	 {0}}
+};
+
+
+/*
+ *	Write the contents of the VS rule table to a PROCfs file.
+ *	(It is kept just for backward compatibility)
+ */
+static inline char *ip_vs_fwd_name(unsigned flags)
+{
+	char *fwd;
+
+	switch (flags & IP_VS_CONN_F_FWD_MASK) {
+	case IP_VS_CONN_F_LOCALNODE:
+		fwd = "Local";
+		break;
+	case IP_VS_CONN_F_TUNNEL:
+		fwd = "Tunnel";
+		break;
+	case IP_VS_CONN_F_DROUTE:
+		fwd = "Route";
+		break;
+	default:
+		fwd = "Masq";
+	}
+	return fwd;
+}
+
+static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
+{
+	int len=0;
+	off_t pos=0;
+	char temp[64], temp2[32];
+	int idx;
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct list_head *l, *e, *p, *q;
+
+	/*
+	 * Note: since the length of the buffer is usually the multiple
+	 * of 512, it is good to use fixed record of the divisor of 512,
+	 * so that records won't be truncated at buffer boundary.
+	 */
+	pos = 192;
+	if (pos > offset) {
+		sprintf(temp,
+			"IP Virtual Server version %d.%d.%d (size=%d)",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		len += sprintf(buf+len, "%-63s\n", temp);
+		len += sprintf(buf+len, "%-63s\n",
+			       "Prot LocalAddress:Port Scheduler Flags");
+		len += sprintf(buf+len, "%-63s\n",
+			       "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
+	}
+
+	read_lock_bh(&__ip_vs_svc_lock);
+
+	/* print the service table hashed by <protocol,addr,port> */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		l = &ip_vs_svc_table[idx];
+		for (e=l->next; e!=l; e=e->next) {
+			svc = list_entry(e, struct ip_vs_service, s_list);
+			pos += 64;
+			if (pos > offset) {
+				if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+					sprintf(temp2, "persistent %d %08X",
+						svc->timeout,
+						ntohl(svc->netmask));
+				else
+					temp2[0] = '\0';
+
+				sprintf(temp, "%s  %08X:%04X %s %s",
+					ip_vs_proto_name(svc->protocol),
+					ntohl(svc->addr),
+					ntohs(svc->port),
+					svc->scheduler->name, temp2);
+				len += sprintf(buf+len, "%-63s\n", temp);
+				if (len >= length)
+					goto done;
+			}
+
+			p = &svc->destinations;
+			for (q=p->next; q!=p; q=q->next) {
+				dest = list_entry(q, struct ip_vs_dest, n_list);
+				pos += 64;
+				if (pos <= offset)
+					continue;
+				sprintf(temp,
+					"  -> %08X:%04X      %-7s %-6d %-10d %-10d",
+					ntohl(dest->addr),
+					ntohs(dest->port),
+					ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					atomic_read(&dest->weight),
+					atomic_read(&dest->activeconns),
+					atomic_read(&dest->inactconns));
+				len += sprintf(buf+len, "%-63s\n", temp);
+				if (len >= length)
+					goto done;
+			}
+		}
+	}
+
+	/* print the service table hashed by fwmark */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		l = &ip_vs_svc_fwm_table[idx];
+		for (e=l->next; e!=l; e=e->next) {
+			svc = list_entry(e, struct ip_vs_service, f_list);
+			pos += 64;
+			if (pos > offset) {
+				if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+					sprintf(temp2, "persistent %d %08X",
+						svc->timeout,
+						ntohl(svc->netmask));
+				else
+					temp2[0] = '\0';
+
+				sprintf(temp, "FWM  %08X %s %s",
+					svc->fwmark,
+					svc->scheduler->name, temp2);
+				len += sprintf(buf+len, "%-63s\n", temp);
+				if (len >= length)
+					goto done;
+			}
+
+			p = &svc->destinations;
+			for (q=p->next; q!=p; q=q->next) {
+				dest = list_entry(q, struct ip_vs_dest, n_list);
+				pos += 64;
+				if (pos <= offset)
+					continue;
+				sprintf(temp,
+					"  -> %08X:%04X      %-7s %-6d %-10d %-10d",
+					ntohl(dest->addr),
+					ntohs(dest->port),
+					ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					atomic_read(&dest->weight),
+					atomic_read(&dest->activeconns),
+					atomic_read(&dest->inactconns));
+				len += sprintf(buf+len, "%-63s\n", temp);
+				if (len >= length)
+					goto done;
+			}
+		}
+	}
+
+  done:
+	read_unlock_bh(&__ip_vs_svc_lock);
+
+	*start = buf+len-(pos-offset);          /* Start of wanted data */
+	len = pos-offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+
+
+struct ip_vs_stats ip_vs_stats;
+
+static int
+ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
+{
+	int len=0;
+	off_t pos=0;
+	char temp[64];
+
+	pos += 320;
+	if (pos > offset) {
+		len += sprintf(buf+len, "%-63s\n%-63s\n",
+/*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
+			       "   Total Incoming Outgoing         Incoming         Outgoing",
+			       "   Conns  Packets  Packets            Bytes            Bytes");
+
+		spin_lock_bh(&ip_vs_stats.lock);
+		sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
+			ip_vs_stats.conns,
+			ip_vs_stats.inpkts,
+			ip_vs_stats.outpkts,
+			(__u32)(ip_vs_stats.inbytes>>32),
+			(__u32)ip_vs_stats.inbytes,
+			(__u32)(ip_vs_stats.outbytes>>32),
+			(__u32)ip_vs_stats.outbytes);
+		len += sprintf(buf+len, "%-62s\n\n", temp);
+
+		len += sprintf(buf+len, "%-63s\n",
+/*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
+			       " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s");
+		sprintf(temp, "%8X %8X %8X %16X %16X",
+			ip_vs_stats.cps,
+			ip_vs_stats.inpps,
+			ip_vs_stats.outpps,
+			ip_vs_stats.inbps,
+			ip_vs_stats.outbps);
+		len += sprintf(buf+len, "%-63s\n", temp);
+
+		spin_unlock_bh(&ip_vs_stats.lock);
+	}
+
+	*start = buf+len-(pos-offset);          /* Start of wanted data */
+	len = pos-offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+
+
+/*
+ * Set timeout values for tcp tcpfin udp in the vs_timeout_table.
+ */
+static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
+{
+	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
+		  u->tcp_timeout,
+		  u->tcp_fin_timeout,
+		  u->udp_timeout);
+
+	if (u->tcp_timeout) {
+		vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
+			= u->tcp_timeout * HZ;
+	}
+
+	if (u->tcp_fin_timeout) {
+		vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
+			= u->tcp_fin_timeout * HZ;
+	}
+
+	if (u->udp_timeout) {
+		vs_timeout_table.timeout[IP_VS_S_UDP]
+			= u->udp_timeout * HZ;
+	}
+	return 0;
+}
+
+
+static int
+do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
+{
+	int ret;
+	struct ip_vs_rule_user *urule;
+	struct ip_vs_service *svc = NULL;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	/*
+	 * Check the size of mm, no overflow...
+	 * len > 128000 is a sanity check.
+	 */
+	if (len < sizeof(struct ip_vs_rule_user)) {
+		IP_VS_ERR("set_ctl: len %u < %u\n",
+			  len, sizeof(struct ip_vs_rule_user));
+		return -EINVAL;
+	} else if (len > 128000) {
+		IP_VS_ERR("set_ctl: len %u > 128000\n", len);
+		return -EINVAL;
+	} else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
+		IP_VS_ERR("set_ctl: no mem for len %u\n", len);
+		return -ENOMEM;
+	} else if (copy_from_user(urule, user, len) != 0) {
+		ret = -EFAULT;
+		goto out_free;
+	}
+
+	MOD_INC_USE_COUNT;
+	if (down_interruptible(&__ip_vs_mutex)) {
+		ret = -ERESTARTSYS;
+		goto out_dec;
+	}
+
+	if (cmd == IP_VS_SO_SET_FLUSH) {
+		/* Flush the virtual service */
+		ret = ip_vs_flush();
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
+		/* Set timeout values for (tcp tcpfin udp) */
+		ret = ip_vs_set_timeouts(urule);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+		ret = start_sync_thread(urule->state, urule->mcast_ifn);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
+		ret = stop_sync_thread();
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_ZERO) {
+		/* if no service address is set, zero counters in all */
+		if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
+			ret = ip_vs_zero_all();
+			goto out_unlock;
+		}
+	}
+
+	/*
+	 * Check for valid protocol: TCP or UDP. Even for fwmark!=0
+	 */
+	if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
+		IP_VS_INFO("vs_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s",
+			   ntohs(urule->protocol), NIPQUAD(urule->vaddr),
+			   ntohs(urule->vport), urule->sched_name);
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	/*
+	 * Lookup the exact service by <protocol, vaddr, vport> or fwmark
+	 */
+	if (urule->vfwmark == 0)
+		svc = __ip_vs_service_get(urule->protocol,
+					  urule->vaddr, urule->vport);
+	else
+		svc = __ip_vs_svc_fwm_get(urule->vfwmark);
+
+	if (cmd != IP_VS_SO_SET_ADD
+	    && (svc == NULL || svc->protocol != urule->protocol)) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+
+	switch (cmd) {
+	case IP_VS_SO_SET_ADD:
+		if (svc != NULL)
+			ret = -EEXIST;
+		else
+			ret = ip_vs_add_service(urule, &svc);
+		break;
+	case IP_VS_SO_SET_EDIT:
+		ret = ip_vs_edit_service(svc, urule);
+		break;
+	case IP_VS_SO_SET_DEL:
+		ret = ip_vs_del_service(svc);
+		if (!ret)
+			goto out_unlock;
+		break;
+	case IP_VS_SO_SET_ADDDEST:
+		ret = ip_vs_add_dest(svc, urule);
+		break;
+	case IP_VS_SO_SET_EDITDEST:
+		ret = ip_vs_edit_dest(svc, urule);
+		break;
+	case IP_VS_SO_SET_DELDEST:
+		ret = ip_vs_del_dest(svc, urule);
+		break;
+	case IP_VS_SO_SET_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (svc)
+		ip_vs_service_put(svc);
+
+  out_unlock:
+	up(&__ip_vs_mutex);
+  out_dec:
+	MOD_DEC_USE_COUNT;
+  out_free:
+	kfree(urule);
+	return ret;
+}
+
+
+static inline void
+__ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+	spin_lock_bh(&src->lock);
+	memcpy(dst, src, (char*)&src->lock - (char*)src);
+	spin_unlock_bh(&src->lock);
+}
+
+static inline int
+__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+			    struct ip_vs_get_services *uptr)
+{
+	int idx, count=0;
+	struct ip_vs_service *svc;
+	struct list_head *l;
+	struct ip_vs_service_user entry;
+	int ret = 0;
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each (l, &ip_vs_svc_table[idx]) {
+			if (count >= get->num_services)
+				goto out;
+			svc = list_entry(l, struct ip_vs_service, s_list);
+			entry.protocol = svc->protocol;
+			entry.addr = svc->addr;
+			entry.port = svc->port;
+			entry.fwmark = svc->fwmark;
+			strcpy(entry.sched_name, svc->scheduler->name);
+			entry.flags = svc->flags;
+			entry.timeout = svc->timeout / HZ;
+			entry.netmask = svc->netmask;
+			entry.num_dests = svc->num_dests;
+			__ip_vs_copy_stats(&entry.stats, &svc->stats);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
+			if (count >= get->num_services)
+				goto out;
+			svc = list_entry(l, struct ip_vs_service, f_list);
+			entry.protocol = svc->protocol;
+			entry.addr = svc->addr;
+			entry.port = svc->port;
+			entry.fwmark = svc->fwmark;
+			strcpy(entry.sched_name, svc->scheduler->name);
+			entry.flags = svc->flags;
+			entry.timeout = svc->timeout / HZ;
+			entry.netmask = svc->netmask;
+			entry.num_dests = svc->num_dests;
+			__ip_vs_copy_stats(&entry.stats, &svc->stats);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+ out:
+	return ret;
+}
+
+static inline int
+__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+			 struct ip_vs_get_dests *uptr)
+{
+	struct ip_vs_service *svc;
+	int ret = 0;
+
+	if (get->fwmark)
+		svc = __ip_vs_svc_fwm_get(get->fwmark);
+	else
+		svc = __ip_vs_service_get(get->protocol,
+					  get->addr, get->port);
+	if (svc) {
+		int count = 0;
+		struct ip_vs_dest *dest;
+		struct list_head *l, *e;
+		struct ip_vs_dest_user entry;
+
+		l = &svc->destinations;
+		for (e=l->next; e!=l; e=e->next) {
+			if (count >= get->num_dests)
+				break;
+			dest = list_entry(e, struct ip_vs_dest, n_list);
+			entry.addr = dest->addr;
+			entry.port = dest->port;
+			entry.flags = atomic_read(&dest->conn_flags);
+			entry.weight = atomic_read(&dest->weight);
+			entry.activeconns = atomic_read(&dest->activeconns);
+			entry.inactconns = atomic_read(&dest->inactconns);
+			__ip_vs_copy_stats(&entry.stats, &dest->stats);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				break;
+			}
+			count++;
+		}
+		ip_vs_service_put(svc);
+	} else
+		ret = -ESRCH;
+	return ret;
+}
+
+static inline void
+__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+{
+	u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
+	u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
+	u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
+}
+
+static int
+do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
+{
+	int ret = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (down_interruptible(&__ip_vs_mutex))
+		return -ERESTARTSYS;
+
+	switch (cmd) {
+	case IP_VS_SO_GET_VERSION:
+	{
+		char buf[64];
+
+		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		if (*len < strlen(buf)+1) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+		*len = strlen(buf)+1;
+	}
+	break;
+
+	case IP_VS_SO_GET_INFO:
+	{
+		struct ip_vs_getinfo info;
+		info.version = IP_VS_VERSION_CODE;
+		info.size = IP_VS_CONN_TAB_SIZE;
+		info.num_services = ip_vs_num_services;
+		if (copy_to_user(user, &info, sizeof(info)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICES:
+	{
+		struct ip_vs_get_services get;
+
+		if (*len < sizeof(get)) {
+			IP_VS_ERR("length: %u < %u\n", *len, sizeof(get));
+			ret = -EINVAL;
+			goto out;
+		}
+		if (copy_from_user(&get, user, sizeof(get))) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
+			IP_VS_ERR("length: %u != %u\n", *len,
+				  sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_service_entries(&get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICE:
+	{
+		struct ip_vs_service_user get;
+		struct ip_vs_service *svc;
+
+		if (*len != sizeof(get)) {
+			IP_VS_ERR("length: %u != %u\n", *len, sizeof(get));
+			ret = -EINVAL;
+			goto out;
+		}
+		if (copy_from_user(&get, user, sizeof(get))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (get.fwmark)
+			svc = __ip_vs_svc_fwm_get(get.fwmark);
+		else
+			svc = __ip_vs_service_get(get.protocol,
+						  get.addr, get.port);
+		if (svc) {
+			strcpy(get.sched_name, svc->scheduler->name);
+			get.flags = svc->flags;
+			get.timeout = svc->timeout / HZ;
+			get.netmask = svc->netmask;
+			get.num_dests = svc->num_dests;
+			__ip_vs_copy_stats(&get.stats, &svc->stats);
+			if (copy_to_user(user, &get, *len) != 0)
+				ret = -EFAULT;
+			ip_vs_service_put(svc);
+		} else
+			ret = -ESRCH;
+	}
+	break;
+
+	case IP_VS_SO_GET_DESTS:
+	{
+		struct ip_vs_get_dests get;
+
+		if (*len < sizeof(get)) {
+			IP_VS_ERR("length: %u < %u\n", *len, sizeof(get));
+			ret = -EINVAL;
+			goto out;
+		}
+		if (copy_from_user(&get, user, sizeof(get))) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (*len != (sizeof(get) +
+			     sizeof(struct ip_vs_dest_user)*get.num_dests)) {
+			IP_VS_ERR("length: %u != %u\n", *len,
+				  sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_dest_entries(&get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_TIMEOUTS:
+	{
+		struct ip_vs_timeout_user u;
+
+		if (*len < sizeof(u)) {
+			IP_VS_ERR("length: %u < %u\n", *len, sizeof(u));
+			ret = -EINVAL;
+			goto out;
+		}
+		__ip_vs_get_timeouts(&u);
+		if (copy_to_user(user, &u, sizeof(u)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_DAEMON:
+	{
+		struct ip_vs_daemon_user u;
+
+		if (*len < sizeof(u)) {
+			IP_VS_ERR("length: %u < %u\n", *len, sizeof(u));
+			ret = -EINVAL;
+			goto out;
+		}
+		u.state = ip_vs_sync_state;
+		strcpy(u.mcast_ifn, ip_vs_mcast_ifn);
+		if (copy_to_user(user, &u, sizeof(u)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+  out:
+	up(&__ip_vs_mutex);
+	return ret;
+}
+
+
+static struct nf_sockopt_ops ip_vs_sockopts = {
+	{ NULL, NULL }, PF_INET,
+	IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
+	IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
+};
+
+
+int ip_vs_control_init(void)
+{
+	int ret;
+	int idx;
+
+	EnterFunction(2);
+
+	ret = nf_register_sockopt(&ip_vs_sockopts);
+	if (ret) {
+		IP_VS_ERR("cannot register sockopt.\n");
+		return ret;
+	}
+
+	proc_net_create("ip_vs", 0, ip_vs_get_info);
+	proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
+
+	ipv4_vs_table.sysctl_header =
+		register_sysctl_table(ipv4_vs_table.root_dir, 0);
+	/*
+	 * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
+	 * ip_vs_schedulers.
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
+		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+	}
+	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+	}
+
+	memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
+	ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
+	ip_vs_new_estimator(&ip_vs_stats);
+
+	LeaveFunction(2);
+	return 0;
+}
+
+void ip_vs_control_cleanup(void)
+{
+	EnterFunction(2);
+	ip_vs_trash_cleanup();
+	ip_vs_kill_estimator(&ip_vs_stats);
+	unregister_sysctl_table(ipv4_vs_table.sysctl_header);
+	proc_net_remove("ip_vs_stats");
+	proc_net_remove("ip_vs");
+	nf_unregister_sockopt(&ip_vs_sockopts);
+	LeaveFunction(2);
+}
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_dh.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_dh.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_dh.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_dh.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,265 @@
+/*
+ * IPVS:        Destination Hashing scheduling module
+ *
+ * Version:     $Id: ip_vs_dh.c,v 1.4 2001/10/19 15:05:17 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              Inspired by the consistent hashing scheduler patch from
+ *              Thomas Proell <proellt@gmx.de>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The dh algorithm is to select server by the hash key of destination IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[dest_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded, such as n.conns>2*n.weight) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet destination IP address to the current server
+ * array. If the dh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS DH bucket
+ */
+struct ip_vs_dh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS DH entry hash table
+ */
+#ifndef CONFIG_IP_VS_DH_TAB_BITS
+#define CONFIG_IP_VS_DH_TAB_BITS        8
+#endif
+#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
+#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
+#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS DH entry
+ */
+static inline unsigned ip_vs_dh_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __u32 addr)
+{
+	return (tbl[ip_vs_dh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl;
+
+	/* allocate the DH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "DH hash table (memory=%dbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "DH hash table (memory=%dbytes) released\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the number of active connections is twice larger than its weight,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)*2) {
+		return 1;
+	}
+	return 0;
+}
+
+
+/*
+ *      Destination hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_dh_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_dh_bucket *tbl;
+
+	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
+	dest = ip_vs_dh_get(tbl, iph->daddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS DH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_dh_scheduler =
+{
+	{0},                    /* n_list */
+	"dh",                   /* name */
+	ATOMIC_INIT(0),         /* refcnt */
+	THIS_MODULE,            /* this module */
+	ip_vs_dh_init_svc,      /* service initializer */
+	ip_vs_dh_done_svc,      /* service done */
+	ip_vs_dh_update_svc,    /* service updater */
+	ip_vs_dh_schedule,      /* select a server from the destination list */
+};
+
+
+static int __init ip_vs_dh_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+static void __exit ip_vs_dh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+module_init(ip_vs_dh_init);
+module_exit(ip_vs_dh_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_est.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_est.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_est.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_est.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,197 @@
+/*
+ * ip_vs_est.c  Simple rate estimator for IPVS
+ *
+ * Version:     $Id: ip_vs_est.c,v 1.3 2002/07/11 14:26:41 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include <net/ip_vs.h>
+
+/*
+  This code is to estimate rate in a shorter interval (such as 8
+  seconds) for virtual services and real servers. For measure rate in a
+  long interval, it is easy to implement a user level daemon which
+  periodically reads those statistical counters and measure rate.
+
+  Currently, the measurement is activated by slow timer handler. Hope
+  this measurement will not introduce too much load.
+
+  We measure rate during the last 8 seconds every 2 seconds:
+
+    avgrate = avgrate*(1-W) + rate*W
+
+    where W = 2^(-2)
+
+  NOTES.
+
+  * The stored value for average bps is scaled by 2^5, so that maximal
+    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+
+  * A lot code is taken from net/sched/estimator.c
+ */
+
+
+struct ip_vs_estimator
+{
+	struct ip_vs_estimator	*next;
+	struct ip_vs_stats	*stats;
+
+	u32			last_conns;
+	u32			last_inpkts;
+	u32			last_outpkts;
+	u64			last_inbytes;
+	u64			last_outbytes;
+
+	u32			cps;
+	u32			inpps;
+	u32			outpps;
+	u32			inbps;
+	u32			outbps;
+};
+
+
+static struct ip_vs_estimator *est_list = NULL;
+static rwlock_t est_lock = RW_LOCK_UNLOCKED;
+static struct timer_list est_timer;
+
+static void estimation_timer(unsigned long arg)
+{
+	struct ip_vs_estimator *e;
+	struct ip_vs_stats *s;
+	u32 n_conns;
+	u32 n_inpkts, n_outpkts;
+	u64 n_inbytes, n_outbytes;
+	u32 rate;
+
+	read_lock(&est_lock);
+	for (e = est_list; e; e = e->next) {
+		s = e->stats;
+		n_conns = s->conns;
+		n_inpkts = s->inpkts;
+		n_outpkts = s->outpkts;
+		n_inbytes = s->inbytes;
+		n_outbytes = s->outbytes;
+
+		/* scaled by 2^10, but divided 2 seconds */
+		rate = (n_conns - e->last_conns)<<9;
+		e->last_conns = n_conns;
+		e->cps += ((long)rate - (long)e->cps)>>2;
+		s->cps = (e->cps+0x1FF)>>10;
+
+		rate = (n_inpkts - e->last_inpkts)<<9;
+		e->last_inpkts = n_inpkts;
+		e->inpps += ((long)rate - (long)e->inpps)>>2;
+		s->inpps = (e->inpps+0x1FF)>>10;
+
+		rate = (n_outpkts - e->last_outpkts)<<9;
+		e->last_outpkts = n_outpkts;
+		e->outpps += ((long)rate - (long)e->outpps)>>2;
+		s->outpps = (e->outpps+0x1FF)>>10;
+
+		rate = (n_inbytes - e->last_inbytes)<<4;
+		e->last_inbytes = n_inbytes;
+		e->inbps += ((long)rate - (long)e->inbps)>>2;
+		s->inbps = (e->inbps+0xF)>>5;
+
+		rate = (n_outbytes - e->last_outbytes)<<4;
+		e->last_outbytes = n_outbytes;
+		e->outbps += ((long)rate - (long)e->outbps)>>2;
+		s->outbps = (e->outbps+0xF)>>5;
+	}
+	read_unlock(&est_lock);
+	mod_timer(&est_timer, jiffies + 2*HZ);
+}
+
+int ip_vs_new_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est;
+
+	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOMEM;
+
+	memset(est, 0, sizeof(*est));
+	est->stats = stats;
+	est->last_conns = stats->conns;
+	est->cps = stats->cps<<10;
+
+	est->last_inpkts = stats->inpkts;
+	est->inpps = stats->inpps<<10;
+
+	est->last_outpkts = stats->outpkts;
+	est->outpps = stats->outpps<<10;
+
+	est->last_inbytes = stats->inbytes;
+	est->inbps = stats->inbps<<5;
+
+	est->last_outbytes = stats->outbytes;
+	est->outbps = stats->outbps<<5;
+
+	est->next = est_list;
+	if (est->next == NULL) {
+		init_timer(&est_timer);
+		est_timer.expires = jiffies + 2*HZ;
+		est_timer.function = estimation_timer;
+		add_timer(&est_timer);
+	}
+	write_lock_bh(&est_lock);
+	est_list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est, **pest;
+	int killed = 0;
+
+	write_lock_bh(&est_lock);
+	pest = &est_list;
+	while ((est=*pest) != NULL) {
+		if (est->stats != stats) {
+			pest = &est->next;
+			continue;
+		}
+		*pest = est->next;
+		kfree(est);
+		killed++;
+	}
+	if (killed && est_list == NULL)
+		del_timer_sync(&est_timer);
+	write_unlock_bh(&est_lock);
+}
+
+void ip_vs_zero_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *e;
+
+	write_lock_bh(&est_lock);
+	for (e = est_list; e; e = e->next) {
+		if (e->stats != stats)
+			continue;
+
+		/* set counters zero */
+		e->last_conns = 0;
+		e->last_inpkts = 0;
+		e->last_outpkts = 0;
+		e->last_inbytes = 0;
+		e->last_outbytes = 0;
+		e->cps = 0;
+		e->inpps = 0;
+		e->outpps = 0;
+		e->inbps = 0;
+		e->outbps = 0;
+	}
+	write_unlock_bh(&est_lock);
+}
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_ftp.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_ftp.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_ftp.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_ftp.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,407 @@
+/*
+ * IP_VS        ftp application module
+ *
+ * Version:	$Id: ip_vs_ftp.c,v 1.12 2002/08/10 04:32:35 wensong Exp $
+ *
+ * Authors:	Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
+ * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
+ *
+ *		IP_MASQ_FTP ftp masquerading module
+ *
+ * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
+ *
+ * Author:	Wouter Gadeyne
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+
+#include <net/ip_vs.h>
+
+
+#define SERVER_STRING "227 Entering Passive Mode ("
+#define CLIENT_STRING "PORT "
+
+
+/*
+ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static int ports[IP_VS_APP_MAX_PORTS] = {21, 0};
+struct ip_vs_app *incarnations[IP_VS_APP_MAX_PORTS];
+
+/*
+ *	Debug level
+ */
+#ifdef CONFIG_IP_VS_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+MODULE_PARM(ports, "1-" __MODULE_STRING(IP_VS_APP_MAX_PORTS) "i");
+
+/*	Dummy variable */
+static int ip_vs_ftp_pasv;
+
+
+static int
+ip_vs_ftp_init_conn(struct ip_vs_app *vapp, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_ftp_done_conn(struct ip_vs_app *vapp, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+/*
+ * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
+ * with the "pattern" and terminated with the "term" character.
+ * <addr,port> is in network order.
+ */
+static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
+				  const char *pattern, size_t plen, char term,
+				  __u32 *addr, __u16 *port,
+				  char **start, char **end)
+{
+	unsigned char p1,p2,p3,p4,p5,p6;
+
+	while (data < data_limit) {
+		if (strnicmp(data, pattern, plen) != 0) {
+			data++;
+			continue;
+		}
+		*start = data+plen;
+		p1 = simple_strtoul(data+plen, &data, 10);
+		if (*data != ',')
+			continue;
+		p2 = simple_strtoul(data+1, &data, 10);
+		if (*data != ',')
+			continue;
+		p3 = simple_strtoul(data+1, &data, 10);
+		if (*data != ',')
+			continue;
+		p4 = simple_strtoul(data+1, &data, 10);
+		if (*data != ',')
+			continue;
+		p5 = simple_strtoul(data+1, &data, 10);
+		if (*data != ',')
+			continue;
+		p6 = simple_strtoul(data+1, &data, 10);
+		if (*data != term)
+			continue;
+
+		*end = data;
+		*addr = (p4<<24) | (p3<<16) | (p2<<8) | p1;
+		*port = (p6<<8) | p5;
+		return 1;
+	}
+	return 0;
+}
+
+
+/*
+ * Look at outgoing ftp packets to catch the response to a PASV command
+ * from the server (inside-to-outside).
+ * When we see one, we build a connection entry with the client address,
+ * client port 0 (unknown at the moment), the server address and the
+ * server port.  Mark the current connection entry as a control channel
+ * of the new entry. All this work is just to make the data connection
+ * can be scheduled to the right server later.
+ *
+ * The outgoing packet should be something like
+ *   "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
+ * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ */
+static int ip_vs_ftp_out(struct ip_vs_app *vapp,
+			 struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	char *start, *end;
+	__u32 from;
+	__u16 port;
+	struct ip_vs_conn *n_cp;
+	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
+	unsigned buf_len;
+	int diff;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_S_ESTABLISHED)
+		return 0;
+
+	if (cp->app_data == &ip_vs_ftp_pasv) {
+		iph = skb->nh.iph;
+		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+		data = (char *)th + (th->doff << 2);
+		data_limit = skb->tail;
+
+		if (ip_vs_ftp_get_addrport(data, data_limit,
+					   SERVER_STRING,
+					   sizeof(SERVER_STRING)-1, ')',
+					   &from, &port,
+					   &start, &end) == 0)
+			return 0;
+
+		IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> "
+			  "%u.%u.%u.%u:%d detected\n",
+			  NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
+
+		/*
+		 * Now update or create an connection entry for it
+		 */
+		n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
+					  cp->caddr, 0);
+		if (!n_cp) {
+			n_cp = ip_vs_conn_new(IPPROTO_TCP,
+					      cp->caddr, 0,
+					      cp->vaddr, port,
+					      from, port,
+					      IP_VS_CONN_F_NO_CPORT,
+					      cp->dest);
+			if (!n_cp)
+				return 0;
+
+			/* add its controller */
+			ip_vs_control_add(n_cp, cp);
+
+			/* increase dest's inactive connection counter */
+			if (cp->dest)
+				atomic_inc(&cp->dest->inactconns);
+		}
+
+		/*
+		 * Replace the old passive address with the new one
+		 */
+		from = n_cp->vaddr;
+		port = n_cp->vport;
+		sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
+			port&255, port>>8&255);
+		buf_len = strlen(buf);
+
+		/*
+		 * Calculate required delta-offset to keep TCP happy
+		 */
+		diff = buf_len - (end-start);
+
+		if (diff == 0) {
+			/* simply replace it with new passive address */
+			memcpy(start, buf, buf_len);
+		} else {
+			/* fixme: return value isn't checked here */
+			ip_vs_skb_replace(skb, GFP_ATOMIC, start,
+					  end-start, buf, buf_len);
+		}
+
+		cp->app_data = NULL;
+		ip_vs_conn_listen(n_cp);
+		ip_vs_conn_put(n_cp);
+		return diff;
+	}
+	return 0;
+}
+
+
+/*
+ * Look at incoming ftp packets to catch the PASV/PORT command
+ * (outside-to-inside).
+ *
+ * The incoming packet having the PORT command should be something like
+ *      "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
+ * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
+ * In this case, we create a connection entry using the client address and
+ * port, so that the active ftp data connection from the server can reach
+ * the client.
+ */
+static int ip_vs_ftp_in(struct ip_vs_app *vapp,
+			struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_start, *data_limit;
+	char *start, *end;
+	__u32 to;
+	__u16 port;
+	struct ip_vs_conn *n_cp;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_S_ESTABLISHED)
+		return 0;
+
+	/*
+	 * Detecting whether it is passive
+	 */
+	iph = skb->nh.iph;
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/* Since there may be OPTIONS in the TCP packet and the HLEN is
+	   the length of the header in 32-bit multiples, it is accurate
+	   to calculate data address by th+HLEN*4 */
+	data = data_start = (char *)th + (th->doff << 2);
+	data_limit = skb->tail;
+
+	while (data < data_limit) {
+		if (strnicmp(data, "PASV\r\n", 6) == 0) {
+			IP_VS_DBG(1-debug, "got PASV at %d of %d\n",
+				  data - data_start,
+				  data_limit - data_start);
+			cp->app_data = &ip_vs_ftp_pasv;
+			return 0;
+		}
+		data++;
+	}
+
+	/*
+	 * To support virtual FTP server, the scenerio is as follows:
+	 *       FTP client ----> Load Balancer ----> FTP server
+	 * First detect the port number in the application data,
+	 * then create a new connection entry for the coming data
+	 * connection.
+	 */
+	data = data_start;
+	data_limit = skb->h.raw + skb->len - 18;
+
+	if (ip_vs_ftp_get_addrport(data, data_limit,
+				   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
+				   '\r', &to, &port,
+				   &start, &end) == 0)
+		return 0;
+
+	IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n",
+		  NIPQUAD(to), ntohs(port));
+
+	/*
+	 * Now update or create a connection entry for it
+	 */
+	IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
+		  ip_vs_proto_name(iph->protocol),
+		  NIPQUAD(to), ntohs(port), NIPQUAD(iph->daddr), 0);
+
+	n_cp = ip_vs_conn_in_get(iph->protocol,
+				 to, port,
+				 iph->daddr, htons(ntohs(cp->vport)-1));
+	if (!n_cp) {
+		n_cp = ip_vs_conn_new(IPPROTO_TCP,
+				      to, port,
+				      cp->vaddr, htons(ntohs(cp->vport)-1),
+				      cp->daddr, htons(ntohs(cp->dport)-1),
+				      0,
+				      cp->dest);
+		if (!n_cp)
+			return 0;
+
+		/* add its controller */
+		ip_vs_control_add(n_cp, cp);
+
+		/* increase dest's inactive connection counter */
+		if (cp->dest)
+			atomic_inc(&cp->dest->inactconns);
+	}
+
+	/*
+	 *	Move tunnel to listen state
+	 */
+	ip_vs_conn_listen(n_cp);
+	ip_vs_conn_put(n_cp);
+
+	/* no diff required for incoming packets */
+	return 0;
+}
+
+
+static struct ip_vs_app ip_vs_ftp = {
+	{0},			/* n_list */
+	"ftp",			/* name */
+	0,                      /* type */
+	THIS_MODULE,            /* this module */
+	ip_vs_ftp_init_conn,    /* ip_vs_init_conn */
+	ip_vs_ftp_done_conn,    /* ip_vs_done_conn */
+	ip_vs_ftp_out,          /* pkt_out */
+	ip_vs_ftp_in,           /* pkt_in */
+};
+
+
+/*
+ *	ip_vs_ftp initialization
+ */
+static int __init ip_vs_ftp_init(void)
+{
+	int i, j;
+
+	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
+		if (ports[i]) {
+			if (!(incarnations[i] =
+			     kmalloc(sizeof(struct ip_vs_app), GFP_KERNEL)))
+				return -ENOMEM;
+
+			memcpy(incarnations[i], &ip_vs_ftp,
+			       sizeof(struct ip_vs_app));
+			if ((j = register_ip_vs_app(incarnations[i],
+						    IPPROTO_TCP,
+						    ports[i]))) {
+				return j;
+			}
+			IP_VS_DBG(1-debug,
+				  "Ftp: loaded support on port[%d] = %d\n",
+				  i, ports[i]);
+		} else {
+			/* To be safe, force the incarnation table entry
+			   to be NULL */
+			incarnations[i] = NULL;
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *	ip_vs_ftp finish.
+ */
+static void __exit ip_vs_ftp_exit(void)
+{
+	int i, j, k;
+
+	k=0;
+	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
+		if (incarnations[i]) {
+			if ((j = unregister_ip_vs_app(incarnations[i]))) {
+				k = j;
+			} else {
+				kfree(incarnations[i]);
+				incarnations[i] = NULL;
+				IP_VS_DBG(1-debug, "Ftp: unloaded support on port[%d] = %d\n",
+					  i, ports[i]);
+			}
+		}
+	}
+}
+
+
+module_init(ip_vs_ftp_init);
+module_exit(ip_vs_ftp_exit);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_lblc.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_lblc.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_lblc.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_lblc.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,624 @@
+/*
+ * IPVS:        Locality-Based Least-Connection scheduling module
+ *
+ * Version:     $Id: ip_vs_lblc.c,v 1.9 2002/03/25 12:44:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Martin Hamilton         :    fixed the terrible locking bugs
+ *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
+ *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
+ *     Wensong Zhang           :    added doing full expiration check to
+ *                                   collect stale entries of 24+ hours when
+ *                                   no partial expire check in a half hour
+ *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
+ *                                   to avoid the possible race between timer
+ *                                   handler and del_timer thread in SMP
+ *
+ */
+
+/*
+ * The lblc algorithm is as follows (pseudo code):
+ *
+ *       if cachenode[dest_ip] is null then
+ *               n, cachenode[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- cachenode[dest_ip];
+ *               if (n is dead) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                 n, cachenode[dest_ip] <- {weighted least-conn node};
+ *
+ *       return n;
+ *
+ * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
+ * me to write this module.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+/* for systcl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblc entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblc entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
+#define CONFIG_IP_VS_LBLC_TAB_BITS      10
+#endif
+#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
+#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
+#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS lblc entry represents an association between destination
+ *      IP address and its destination server
+ */
+struct ip_vs_lblc_entry {
+	struct list_head        list;
+	__u32                   addr;           /* destination IP address */
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblc hash table
+ */
+struct ip_vs_lblc_table {
+	rwlock_t	        lock;           /* lock for this table */
+	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLC sysctl table
+ */
+struct ip_vs_lblc_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	ctl_table vs_vars[2];
+	ctl_table vs_dir[2];
+	ctl_table ipv4_dir[2];
+	ctl_table root_dir[2];
+};
+
+
+static struct ip_vs_lblc_sysctl_table lblc_sysctl_table = {
+	NULL,
+	{{NET_IPV4_VS_LBLC_EXPIRE, "lblc_expiration",
+	  &sysctl_ip_vs_lblc_expiration,
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {0}},
+	{{NET_IPV4_VS, "vs", NULL, 0, 0555, lblc_sysctl_table.vs_vars},
+	 {0}},
+	{{NET_IPV4, "ipv4", NULL, 0, 0555, lblc_sysctl_table.vs_dir},
+	 {0}},
+	{{CTL_NET, "net", NULL, 0, 0555, lblc_sysctl_table.ipv4_dir},
+	 {0}}
+};
+
+
+/*
+ *      new/free a ip_vs_lblc_entry, which is a mapping of a destionation
+ *      IP address to a server.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblc_entry *en;
+
+	en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
+	if (en == NULL) {
+		IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&en->list);
+	en->addr = daddr;
+
+	atomic_inc(&dest->refcnt);
+	en->dest = dest;
+
+	return en;
+}
+
+
+static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+{
+	list_del(&en->list);
+	/*
+	 * We don't kfree dest because it is refered either by its service
+	 * or the trash dest list.
+	 */
+	atomic_dec(&en->dest->refcnt);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLC entry
+ */
+static inline unsigned ip_vs_lblc_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblc_table.
+ *	returns bool success.
+ */
+static int
+ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
+{
+	unsigned hash;
+
+	if (!list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 *	Hash by destination IP address
+	 */
+	hash = ip_vs_lblc_hashkey(en->addr);
+
+	write_lock(&tbl->lock);
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+
+
+#if 0000
+/*
+ *	Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
+ *	returns bool success.
+ */
+static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
+			     struct ip_vs_lblc_entry *en)
+{
+	if (list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 * Remove it from the table
+	 */
+	write_lock(&tbl->lock);
+	list_del(&en->list);
+	INIT_LIST_HEAD(&en->list);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+#endif
+
+
+/*
+ *  Get ip_vs_lblc_entry associated with supplied parameters.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr)
+{
+	unsigned hash;
+	struct ip_vs_lblc_entry *en;
+	struct list_head *l,*e;
+
+	hash = ip_vs_lblc_hashkey(addr);
+	l = &tbl->bucket[hash];
+
+	read_lock(&tbl->lock);
+
+	for (e=l->next; e!=l; e=e->next) {
+		en = list_entry(e, struct ip_vs_lblc_entry, list);
+		if (en->addr == addr) {
+			/* HIT */
+			read_unlock(&tbl->lock);
+			return en;
+		}
+	}
+
+	read_unlock(&tbl->lock);
+
+	return NULL;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+{
+	int i;
+	struct list_head *l;
+	struct ip_vs_lblc_entry *en;
+
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		write_lock(&tbl->lock);
+		for (l=&tbl->bucket[i]; l->next!=l; ) {
+			en = list_entry(l->next,
+					struct ip_vs_lblc_entry, list);
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+}
+
+
+static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
+{
+	unsigned long now = jiffies;
+	int i, j;
+	struct list_head *l, *e;
+	struct ip_vs_lblc_entry *en;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+		e = l = &tbl->bucket[j];
+		write_lock(&tbl->lock);
+		while (e->next != l) {
+			en = list_entry(e->next,
+					struct ip_vs_lblc_entry, list);
+			if ((now - en->lastuse) <
+			    sysctl_ip_vs_lblc_expiration) {
+				e = e->next;
+				continue;
+			}
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblc table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblc_check_expire(unsigned long data)
+{
+	struct ip_vs_lblc_table *tbl;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct list_head *l, *e;
+	struct ip_vs_lblc_entry *en;
+
+	tbl = (struct ip_vs_lblc_table *)data;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblc_full_check(tbl);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+		e = l = &tbl->bucket[j];
+		write_lock(&tbl->lock);
+		while (e->next != l) {
+			en = list_entry(e->next,
+					struct ip_vs_lblc_entry, list);
+			if ((now - en->lastuse) < ENTRY_TIMEOUT) {
+				e = e->next;
+				continue;
+			}
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&tbl->lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+
+static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblc_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblc_table for this service
+	 */
+	tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLC hash table (memory=%dbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_lblc_table));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->lock = RW_LOCK_UNLOCKED;
+	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	init_timer(&tbl->periodic_timer);
+	tbl->periodic_timer.data = (unsigned long)tbl;
+	tbl->periodic_timer.function = ip_vs_lblc_check_expire;
+	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
+	add_timer(&tbl->periodic_timer);
+
+	return 0;
+}
+
+
+static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblc_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "LBLC hash table (memory=%dbytes) released\n",
+		  sizeof(struct ip_vs_lblc_table));
+
+	return 0;
+}
+
+
+static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	register struct list_head *l, *e;
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		least = list_entry(e, struct ip_vs_dest, n_list);
+		if (atomic_read(&least->weight) > 0) {
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	for (e=e->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		register struct list_head *l, *e;
+		struct ip_vs_dest *d;
+
+		l = &svc->destinations;
+		for (e=l->next; e!=l; e=e->next) {
+			d = list_entry(e, struct ip_vs_dest, n_list);
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_lblc_table *tbl;
+	struct ip_vs_lblc_entry *en;
+
+	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_lblc_table *)svc->sched_data;
+	en = ip_vs_lblc_get(tbl, iph->daddr);
+	if (en == NULL) {
+		dest = __ip_vs_wlc_schedule(svc, iph);
+		if (dest == NULL) {
+			IP_VS_DBG(1, "no destination available\n");
+			return NULL;
+		}
+		en = ip_vs_lblc_new(iph->daddr, dest);
+		if (en == NULL) {
+			return NULL;
+		}
+		ip_vs_lblc_hash(tbl, en);
+	} else {
+		dest = en->dest;
+		if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
+		    || atomic_read(&dest->weight) <= 0
+		    || is_overloaded(dest, svc)) {
+			dest = __ip_vs_wlc_schedule(svc, iph);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "no destination available\n");
+				return NULL;
+			}
+			atomic_dec(&en->dest->refcnt);
+			atomic_inc(&dest->refcnt);
+			en->dest = dest;
+		}
+	}
+	en->lastuse = jiffies;
+
+	IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(en->addr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLC Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblc_scheduler =
+{
+	{0},                    /* n_list */
+	"lblc",                 /* name */
+	ATOMIC_INIT(0),         /* refcnt */
+	THIS_MODULE,		/* this module */
+	ip_vs_lblc_init_svc,    /* service initializer */
+	ip_vs_lblc_done_svc,    /* service done */
+	ip_vs_lblc_update_svc,  /* service updater */
+	ip_vs_lblc_schedule,    /* select a server from the destination list */
+};
+
+
+static int __init ip_vs_lblc_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
+	lblc_sysctl_table.sysctl_header =
+		register_sysctl_table(lblc_sysctl_table.root_dir, 0);
+	return register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+}
+
+
+static void __exit ip_vs_lblc_cleanup(void)
+{
+	unregister_sysctl_table(lblc_sysctl_table.sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+}
+
+
+module_init(ip_vs_lblc_init);
+module_exit(ip_vs_lblc_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_lblcr.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_lblcr.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_lblcr.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,884 @@
+/*
+ * IPVS:        Locality-Based Least-Connection with Replication scheduler
+ *
+ * Version:     $Id: ip_vs_lblcr.c,v 1.10 2002/03/25 12:44:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Julian Anastasov        :    Added the missing (dest->weight>0)
+ *                                  condition in the ip_vs_dest_set_max.
+ *
+ */
+
+/*
+ * The lblc/r algorithm is as follows (pseudo code):
+ *
+ *       if serverSet[dest_ip] is null then
+ *               n, serverSet[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- {least-conn (alive) node in serverSet[dest_ip]};
+ *               if (n is null) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                   n <- {weighted least-conn node};
+ *                   add n to serverSet[dest_ip];
+ *               if |serverSet[dest_ip]| > 1 AND
+ *                   now - serverSet[dest_ip].lastMod > T then
+ *                   m <- {most conn node in serverSet[dest_ip]};
+ *                   remove m from serverSet[dest_ip];
+ *       if serverSet[dest_ip] changed then
+ *               serverSet[dest_ip].lastMod <- now;
+ *
+ *       return n;
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+/* for systcl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+/* for proc_net_create/proc_net_remove */
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblcr entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblcr entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
+#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
+#endif
+#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
+#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
+#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS destination set structure and operations
+ */
+struct ip_vs_dest_list {
+	struct ip_vs_dest_list  *next;          /* list link */
+	struct ip_vs_dest       *dest;          /* destination server */
+};
+
+struct ip_vs_dest_set {
+	atomic_t                size;           /* set size */
+	unsigned long           lastmod;        /* last modified time */
+	struct ip_vs_dest_list  *list;          /* destination list */
+	rwlock_t	        lock;           /* lock for this list */
+};
+
+
+static struct ip_vs_dest_list *
+ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e;
+
+	for (e=set->list; e!=NULL; e=e->next) {
+		if (e->dest == dest)
+			/* already existed */
+			return NULL;
+	}
+
+	e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
+	if (e == NULL) {
+		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
+		return NULL;
+	}
+
+	atomic_inc(&dest->refcnt);
+	e->dest = dest;
+
+	/* link it to the list */
+	write_lock(&set->lock);
+	e->next = set->list;
+	set->list = e;
+	atomic_inc(&set->size);
+	write_unlock(&set->lock);
+
+	set->lastmod = jiffies;
+	return e;
+}
+
+static void
+ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	write_lock(&set->lock);
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		if (e->dest == dest) {
+			/* HIT */
+			*ep = e->next;
+			atomic_dec(&set->size);
+			set->lastmod = jiffies;
+			atomic_dec(&e->dest->refcnt);
+			kfree(e);
+			break;
+		}
+		ep = &e->next;
+	}
+	write_unlock(&set->lock);
+}
+
+static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	write_lock(&set->lock);
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		*ep = e->next;
+		/*
+		 * We don't kfree dest because it is refered either
+		 * by its service or by the trash dest list.
+		 */
+		atomic_dec(&e->dest->refcnt);
+		kfree(e);
+	}
+	write_unlock(&set->lock);
+}
+
+/* get weighted least-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	read_lock(&set->lock);
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		least = e->dest;
+		if ((atomic_read(&least->weight) > 0)
+		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	read_unlock(&set->lock);
+	return NULL;
+
+	/* find the destination with the weighted least load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if ((loh * atomic_read(&dest->weight) >
+		     doh * atomic_read(&least->weight))
+		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+	read_unlock(&set->lock);
+
+	IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+	return least;
+}
+
+
+/* get weighted most-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *most;
+	int moh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	read_lock(&set->lock);
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		most = e->dest;
+		if (atomic_read(&most->weight) > 0) {
+			moh = atomic_read(&most->activeconns) * 50
+				+ atomic_read(&most->inactconns);
+			goto nextstage;
+		}
+	}
+	read_unlock(&set->lock);
+	return NULL;
+
+	/* find the destination with the weighted most load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
+		if ((moh * atomic_read(&dest->weight) <
+		     doh * atomic_read(&most->weight))
+		    && (atomic_read(&dest->weight) > 0)) {
+			most = dest;
+			moh = doh;
+		}
+	}
+	read_unlock(&set->lock);
+
+	IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(most->addr), ntohs(most->port),
+		  atomic_read(&most->activeconns),
+		  atomic_read(&most->refcnt),
+		  atomic_read(&most->weight), moh);
+	return most;
+}
+
+
+/*
+ *      IPVS lblcr entry represents an association between destination
+ *      IP address and its destination server set
+ */
+struct ip_vs_lblcr_entry {
+	struct list_head        list;
+	__u32                   addr;           /* destination IP address */
+	struct ip_vs_dest_set   set;            /* destination server set */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblcr hash table
+ */
+struct ip_vs_lblcr_table {
+	rwlock_t	        lock;           /* lock for this table */
+	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLCR sysctl table
+ */
+struct ip_vs_lblcr_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	ctl_table vs_vars[2];
+	ctl_table vs_dir[2];
+	ctl_table ipv4_dir[2];
+	ctl_table root_dir[2];
+};
+
+
+static struct ip_vs_lblcr_sysctl_table lblcr_sysctl_table = {
+	NULL,
+	{{NET_IPV4_VS_LBLCR_EXPIRE, "lblcr_expiration",
+	  &sysctl_ip_vs_lblcr_expiration,
+	  sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+	 {0}},
+	{{NET_IPV4_VS, "vs", NULL, 0, 0555, lblcr_sysctl_table.vs_vars},
+	 {0}},
+	{{NET_IPV4, "ipv4", NULL, 0, 0555, lblcr_sysctl_table.vs_dir},
+	 {0}},
+	{{CTL_NET, "net", NULL, 0, 0555, lblcr_sysctl_table.ipv4_dir},
+	 {0}}
+};
+
+
+/*
+ *      new/free a ip_vs_lblcr_entry, which is a mapping of a destination
+ *      IP address to a server.
+ */
+static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr)
+{
+	struct ip_vs_lblcr_entry *en;
+
+	en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
+	if (en == NULL) {
+		IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&en->list);
+	en->addr = daddr;
+
+	/* initilize its dest set */
+	atomic_set(&(en->set.size), 0);
+	en->set.list = NULL;
+	en->set.lock = RW_LOCK_UNLOCKED;
+
+	return en;
+}
+
+
+static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
+{
+	list_del(&en->list);
+	ip_vs_dest_set_eraseall(&en->set);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLCR entry
+ */
+static inline unsigned ip_vs_lblcr_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblcr_table.
+ *	returns bool success.
+ */
+static int
+ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
+{
+	unsigned hash;
+
+	if (!list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 *	Hash by destination IP address
+	 */
+	hash = ip_vs_lblcr_hashkey(en->addr);
+
+	write_lock(&tbl->lock);
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+
+
+#if 0000
+/*
+ *	Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
+ *	returns bool success.
+ */
+static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
+			     struct ip_vs_lblcr_entry *en)
+{
+	if (list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 * Remove it from the table
+	 */
+	write_lock(&tbl->lock);
+	list_del(&en->list);
+	INIT_LIST_HEAD(&en->list);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+#endif
+
+
+/*
+ *  Get ip_vs_lblcr_entry associated with supplied parameters.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __u32 addr)
+{
+	unsigned hash;
+	struct ip_vs_lblcr_entry *en;
+	struct list_head *l,*e;
+
+	hash = ip_vs_lblcr_hashkey(addr);
+	l = &tbl->bucket[hash];
+
+	read_lock(&tbl->lock);
+
+	for (e=l->next; e!=l; e=e->next) {
+		en = list_entry(e, struct ip_vs_lblcr_entry, list);
+		if (en->addr == addr) {
+			/* HIT */
+			read_unlock(&tbl->lock);
+			return en;
+		}
+	}
+
+	read_unlock(&tbl->lock);
+
+	return NULL;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+{
+	int i;
+	struct list_head *l;
+	struct ip_vs_lblcr_entry *en;
+
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		write_lock(&tbl->lock);
+		for (l=&tbl->bucket[i]; l->next!=l; ) {
+			en = list_entry(l->next,
+					struct ip_vs_lblcr_entry, list);
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+}
+
+
+static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
+{
+	unsigned long now = jiffies;
+	int i, j;
+	struct list_head *l, *e;
+	struct ip_vs_lblcr_entry *en;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+		e = l = &tbl->bucket[j];
+		write_lock(&tbl->lock);
+		while (e->next != l) {
+			en = list_entry(e->next,
+					struct ip_vs_lblcr_entry, list);
+			if ((now - en->lastuse) <
+			    sysctl_ip_vs_lblcr_expiration) {
+				e = e->next;
+				continue;
+			}
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblcr table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblcr_check_expire(unsigned long data)
+{
+	struct ip_vs_lblcr_table *tbl;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct list_head *l, *e;
+	struct ip_vs_lblcr_entry *en;
+
+	tbl = (struct ip_vs_lblcr_table *)data;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblcr_full_check(tbl);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+		e = l = &tbl->bucket[j];
+		write_lock(&tbl->lock);
+		while (e->next != l) {
+			en = list_entry(e->next,
+					struct ip_vs_lblcr_entry, list);
+			if ((now - en->lastuse) < ENTRY_TIMEOUT) {
+				e = e->next;
+				continue;
+			}
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&tbl->lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+static struct ip_vs_lblcr_table *lblcr_table_list;
+
+/*
+ *	/proc/net/ip_vs_lblcr to display the mappings of
+ *                  destination IP address <==> its serverSet
+ */
+static int
+ip_vs_lblcr_getinfo(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos=0, begin;
+	int len=0, size;
+	struct ip_vs_lblcr_table *tbl;
+	unsigned long now = jiffies;
+	int i;
+	struct list_head *l, *e;
+	struct ip_vs_lblcr_entry *en;
+
+	tbl = lblcr_table_list;
+
+	size = sprintf(buffer, "LastTime Dest IP address  Server set\n");
+	pos += size;
+	len += size;
+
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		l = &tbl->bucket[i];
+		read_lock_bh(&tbl->lock);
+		for (e=l->next; e!=l; e=e->next) {
+			char tbuf[16];
+			struct ip_vs_dest_list *d;
+
+			en = list_entry(e, struct ip_vs_lblcr_entry, list);
+			sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(en->addr));
+			size = sprintf(buffer+len, "%8lu %-16s ",
+				       now-en->lastuse, tbuf);
+
+			read_lock(&en->set.lock);
+			for (d=en->set.list; d!=NULL; d=d->next) {
+				size += sprintf(buffer+len+size,
+						"%u.%u.%u.%u ",
+						NIPQUAD(d->dest->addr));
+			}
+			read_unlock(&en->set.lock);
+			size += sprintf(buffer+len+size, "\n");
+			len += size;
+			pos += size;
+			if (pos <= offset)
+				len=0;
+			if (pos >= offset+length) {
+				read_unlock_bh(&tbl->lock);
+				goto done;
+			}
+		}
+		read_unlock_bh(&tbl->lock);
+	}
+
+  done:
+	begin = len - (pos - offset);
+	*start = buffer + begin;
+	len -= begin;
+	if(len>length)
+		len = length;
+	return len;
+}
+#endif
+
+
+static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblcr_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblcr_table for this service
+	 */
+	tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLCR hash table (memory=%dbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_lblcr_table));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->lock = RW_LOCK_UNLOCKED;
+	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	init_timer(&tbl->periodic_timer);
+	tbl->periodic_timer.data = (unsigned long)tbl;
+	tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
+	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
+	add_timer(&tbl->periodic_timer);
+
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+	lblcr_table_list = tbl;
+#endif
+	return 0;
+}
+
+
+static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblcr_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "LBLCR hash table (memory=%dbytes) released\n",
+		  sizeof(struct ip_vs_lblcr_table));
+
+	return 0;
+}
+
+
+static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	register struct list_head *l, *e;
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		least = list_entry(e, struct ip_vs_dest, n_list);
+		if (atomic_read(&least->weight) > 0) {
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	for (e=e->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		register struct list_head *l, *e;
+		struct ip_vs_dest *d;
+
+		l = &svc->destinations;
+		for (e=l->next; e!=l; e=e->next) {
+			d = list_entry(e, struct ip_vs_dest, n_list);
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_lblcr_table *tbl;
+	struct ip_vs_lblcr_entry *en;
+
+	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
+	en = ip_vs_lblcr_get(tbl, iph->daddr);
+	if (en == NULL) {
+		dest = __ip_vs_wlc_schedule(svc, iph);
+		if (dest == NULL) {
+			IP_VS_DBG(1, "no destination available\n");
+			return NULL;
+		}
+		en = ip_vs_lblcr_new(iph->daddr);
+		if (en == NULL) {
+			return NULL;
+		}
+		ip_vs_dest_set_insert(&en->set, dest);
+		ip_vs_lblcr_hash(tbl, en);
+	} else {
+		dest = ip_vs_dest_set_min(&en->set);
+		if (!dest || is_overloaded(dest, svc)) {
+			dest = __ip_vs_wlc_schedule(svc, iph);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "no destination available\n");
+				return NULL;
+			}
+			ip_vs_dest_set_insert(&en->set, dest);
+		}
+		if (atomic_read(&en->set.size) > 1 &&
+		    jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
+			struct ip_vs_dest *m;
+			m = ip_vs_dest_set_max(&en->set);
+			if (m)
+				ip_vs_dest_set_erase(&en->set, m);
+		}
+	}
+	en->lastuse = jiffies;
+
+	IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(en->addr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLCR Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
+{
+	{0},                     /* n_list */
+	"lblcr",                 /* name */
+	ATOMIC_INIT(0),          /* refcnt */
+	THIS_MODULE,             /* this module */
+	ip_vs_lblcr_init_svc,    /* service initializer */
+	ip_vs_lblcr_done_svc,    /* service done */
+	ip_vs_lblcr_update_svc,  /* service updater */
+	ip_vs_lblcr_schedule,    /* select a server from the destination list */
+};
+
+
+static int __init ip_vs_lblcr_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
+	lblcr_sysctl_table.sysctl_header =
+		register_sysctl_table(lblcr_sysctl_table.root_dir, 0);
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+	proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
+#endif
+	return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+}
+
+
+static void __exit ip_vs_lblcr_cleanup(void)
+{
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+	proc_net_remove("ip_vs_lblcr");
+#endif
+	unregister_sysctl_table(lblcr_sysctl_table.sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+}
+
+
+module_init(ip_vs_lblcr_init);
+module_exit(ip_vs_lblcr_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_lc.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_lc.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_lc.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_lc.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,142 @@
+/*
+ * IPVS:        Least-Connection Scheduling module
+ *
+ * Version:     $Id: ip_vs_lc.c,v 1.8.2.1 2003/04/11 14:02:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     added the ip_vs_lc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_lc_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int ip_vs_lc_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int ip_vs_lc_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *	Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct list_head *l, *e;
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
+
+	/*
+	 * Simply select the server with the least number of
+	 *        (activeconns<<5) + inactconns
+	 * Except whose weight is equal to zero.
+	 * If the weight is equal to zero, it means that the server is
+	 * quiesced, the existing connections to the server still get
+	 * served, but no new connection is assigned to the server.
+	 */
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		least = list_entry (e, struct ip_vs_dest, n_list);
+		if (atomic_read(&least->weight) > 0) {
+			loh = ip_vs_lc_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	for (e=e->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		if (atomic_read(&dest->weight) == 0)
+			continue;
+		doh = ip_vs_lc_dest_overhead(dest);
+		if (doh < loh) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->inactconns));
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_lc_scheduler = {
+	{0},			/* n_list */
+	"lc",			/* name */
+	ATOMIC_INIT(0),		/* refcnt */
+	THIS_MODULE,		/* this module */
+	ip_vs_lc_init_svc,	/* service initializer */
+	ip_vs_lc_done_svc,	/* service done */
+	ip_vs_lc_update_svc,	/* service updater */
+	ip_vs_lc_schedule,	/* select a server from the destination list */
+};
+
+
+static int __init ip_vs_lc_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
+}
+
+static void __exit ip_vs_lc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+}
+
+module_init(ip_vs_lc_init);
+module_exit(ip_vs_lc_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_nq.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_nq.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_nq.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_nq.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,177 @@
+/*
+ * IPVS:        Never Queue scheduling module
+ *
+ * Version:     $Id: ip_vs_nq.c,v 1.1.2.1 2003/05/20 17:05:02 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The NQ algorithm adopts a two-speed model. When there is an idle server
+ * available, the job will be sent to the idle server, instead of waiting
+ * for a fast one. When there is no idle server available, the job will be
+ * sent to the server that minimize its expected delay (The Shortest
+ * Expected Delay scheduling algorithm).
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
+ *
+ * The difference between NQ and SED is that NQ can improve overall
+ * system utilization.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_nq_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_nq_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_nq_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_nq_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	register struct list_head *l, *e;
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		least = list_entry(e, struct ip_vs_dest, n_list);
+		if (atomic_read(&least->weight) > 0) {
+			loh = ip_vs_nq_dest_overhead(least);
+
+			/* return the server directly if it is idle */
+			if (atomic_read(&least->activeconns) == 0)
+				goto out;
+
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	for (e=e->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		doh = ip_vs_nq_dest_overhead(dest);
+
+		/* return the server directly if it is idle */
+		if (atomic_read(&dest->activeconns) == 0) {
+			least = dest;
+			loh = doh;
+			goto out;
+		}
+
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+  out:
+	IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_nq_scheduler =
+{
+	.name =			"nq",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_nq_init_svc,
+	.done_service =		ip_vs_nq_done_svc,
+	.update_service =	ip_vs_nq_update_svc,
+	.schedule =		ip_vs_nq_schedule,
+};
+
+
+static int __init ip_vs_nq_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+static void __exit ip_vs_nq_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+module_init(ip_vs_nq_init);
+module_exit(ip_vs_nq_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_rr.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_rr.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_rr.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_rr.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,120 @@
+/*
+ * IPVS:        Round-Robin Scheduling module
+ *
+ * Version:     $Id: ip_vs_rr.c,v 1.8 2001/10/19 15:05:17 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Fixes/Changes:
+ *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
+ *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_rr_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+static int ip_vs_rr_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+/*
+ * Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_rr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	register struct list_head *p, *q;
+	struct ip_vs_dest *dest;
+
+	IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
+
+	write_lock(&svc->sched_lock);
+	p = (struct list_head *)svc->sched_data;
+	p = p->next;
+	q = p;
+	do {
+		if (q == &svc->destinations) {
+			q = q->next;
+			continue;
+		}
+		dest = list_entry(q, struct ip_vs_dest, n_list);
+		if (atomic_read(&dest->weight) > 0)
+			/* HIT */
+			goto out;
+		q = q->next;
+	} while (q != p);
+	write_unlock(&svc->sched_lock);
+	return NULL;
+
+  out:
+	svc->sched_data = q;
+	write_unlock(&svc->sched_lock);
+	IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d\n",
+		  NIPQUAD(dest->addr), ntohs(dest->port),
+		  atomic_read(&dest->activeconns),
+		  atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_rr_scheduler = {
+	{0},			/* n_list */
+	"rr",			/* name */
+	ATOMIC_INIT(0),		/* refcnt */
+	THIS_MODULE,		/* this module */
+	ip_vs_rr_init_svc,	/* service initializer */
+	ip_vs_rr_done_svc,	/* service done */
+	ip_vs_rr_update_svc,	/* service updater */
+	ip_vs_rr_schedule,	/* select a server from the destination list */
+};
+
+static int __init ip_vs_rr_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+static void __exit ip_vs_rr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+module_init(ip_vs_rr_init);
+module_exit(ip_vs_rr_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_sched.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sched.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_sched.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sched.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,260 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_sched.c,v 1.11 2001/11/04 08:58:43 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <asm/softirq.h>                /* for local_bh_* */
+#include <asm/string.h>
+#include <linux/kmod.h>
+
+#include <net/ip_vs.h>
+
+/*
+ *  IPVS scheduler list
+ */
+static LIST_HEAD(ip_vs_schedulers);
+
+/* lock for service table */
+static rwlock_t __ip_vs_sched_lock = RW_LOCK_UNLOCKED;
+
+
+/*
+ *  Bind a service with a scheduler
+ */
+int ip_vs_bind_scheduler(struct ip_vs_service *svc,
+			 struct ip_vs_scheduler *scheduler)
+{
+	int ret;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+	if (scheduler == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
+		return -EINVAL;
+	}
+
+	svc->scheduler = scheduler;
+
+	if (scheduler->init_service) {
+		ret = scheduler->init_service(svc);
+		if (ret) {
+			IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *  Unbind a service with its scheduler
+ */
+int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+
+	sched = svc->scheduler;
+	if (sched == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
+		return -EINVAL;
+	}
+
+	if (sched->done_service) {
+		if (sched->done_service(svc) != 0) {
+			IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
+			return -EINVAL;
+		}
+	}
+
+	svc->scheduler = NULL;
+	return 0;
+}
+
+
+/*
+ *  Get scheduler in the scheduler list by name
+ */
+static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+	struct list_head *l, *e;
+
+	IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
+		  sched_name);
+
+	l = &ip_vs_schedulers;
+
+	read_lock_bh(&__ip_vs_sched_lock);
+
+	for (e=l->next; e!=l; e=e->next) {
+		sched = list_entry(e, struct ip_vs_scheduler, n_list);
+
+		/*
+		 * Test and MOD_INC_USE_COUNT atomically
+		 */
+		if (sched->module && !try_inc_mod_count(sched->module)) {
+			/*
+			 * This scheduler is just deleted
+			 */
+			continue;
+		}
+		if (strcmp(sched_name, sched->name)==0) {
+			/* HIT */
+			read_unlock_bh(&__ip_vs_sched_lock);
+			return sched;
+		}
+		if (sched->module)
+			__MOD_DEC_USE_COUNT(sched->module);
+	}
+
+	read_unlock_bh(&__ip_vs_sched_lock);
+	return NULL;
+}
+
+
+/*
+ *  Lookup scheduler and try to load it if it doesn't exist
+ */
+struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	/*
+	 *  Search for the scheduler by sched_name
+	 */
+	sched = ip_vs_sched_getbyname(sched_name);
+
+	/*
+	 *  If scheduler not found, load the module and search again
+	 */
+	if (sched == NULL) {
+		char module_name[IP_VS_SCHEDNAME_MAXLEN+8];
+		sprintf(module_name,"ip_vs_%s", sched_name);
+		request_module(module_name);
+		sched = ip_vs_sched_getbyname(sched_name);
+	}
+
+	return sched;
+}
+
+void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
+{
+	if (scheduler->module)
+		__MOD_DEC_USE_COUNT(scheduler->module);
+}
+
+
+/*
+ *  Register a scheduler in the scheduler list
+ */
+int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (!scheduler) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	if (!scheduler->name) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
+		return -EINVAL;
+	}
+
+	MOD_INC_USE_COUNT;
+
+	/*
+	 *  Make sure that the scheduler with this name doesn't exist
+	 *  in the scheduler list.
+	 */
+	sched = ip_vs_sched_getbyname(scheduler->name);
+	if (sched) {
+		ip_vs_scheduler_put(sched);
+		MOD_DEC_USE_COUNT;
+		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+			  "already existed in the system\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_sched_lock);
+
+	if (scheduler->n_list.next != &scheduler->n_list) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		MOD_DEC_USE_COUNT;
+		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+			  "already linked\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Add it into the d-linked scheduler list
+	 */
+	list_add(&scheduler->n_list, &ip_vs_schedulers);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
+
+	return 0;
+}
+
+
+/*
+ *  Unregister a scheduler from the scheduler list
+ */
+int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	if (!scheduler) {
+		IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_sched_lock);
+	if (scheduler->n_list.next == &scheduler->n_list) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
+			  "is not in the list. failed\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Remove it from the d-linked scheduler list
+	 */
+	list_del(&scheduler->n_list);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	MOD_DEC_USE_COUNT;
+
+	IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
+
+	return 0;
+}
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_sed.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sed.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_sed.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sed.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,167 @@
+/*
+ * IPVS:        Shortest Expected Delay scheduling module
+ *
+ * Version:     $Id: ip_vs_sed.c,v 1.1.2.1 2003/05/20 17:05:02 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The SED algorithm attempts to minimize each job's expected delay until
+ * completion. The expected delay that the job will experience is
+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
+ * jobs on the the ith server and Ui is the fixed service rate (weight) of
+ * the ith server. The SED algorithm adopts a greedy policy that each does
+ * what is in its own best interest, i.e. to join the queue which would
+ * minimize its expected delay of completion.
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
+ *
+ * The difference between SED and WLC is that SED includes the incoming
+ * job in the cost function (the increment of 1). SED may outperform
+ * WLC, while scheduling big jobs under larger heterogeneous systems
+ * (the server weight varies a lot).
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_sed_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_sed_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_sed_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sed_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	register struct list_head *l, *e;
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		least = list_entry(e, struct ip_vs_dest, n_list);
+		if (atomic_read(&least->weight) > 0) {
+			loh = ip_vs_sed_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	for (e=e->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		doh = ip_vs_sed_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_sed_scheduler =
+{
+	.name =			"sed",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_sed_init_svc,
+	.done_service =		ip_vs_sed_done_svc,
+	.update_service =	ip_vs_sed_update_svc,
+	.schedule =		ip_vs_sed_schedule,
+};
+
+
+static int __init ip_vs_sed_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+static void __exit ip_vs_sed_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+module_init(ip_vs_sed_init);
+module_exit(ip_vs_sed_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_sh.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sh.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_sh.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sh.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,262 @@
+/*
+ * IPVS:        Source Hashing scheduling module
+ *
+ * Version:     $Id: ip_vs_sh.c,v 1.4 2001/10/19 15:05:17 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The sh algorithm is to select server by the hash key of source IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[src_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded, such as n.conns>2*n.weight) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet source IP address to the current server
+ * array. If the sh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS SH bucket
+ */
+struct ip_vs_sh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS SH entry hash table
+ */
+#ifndef CONFIG_IP_VS_SH_TAB_BITS
+#define CONFIG_IP_VS_SH_TAB_BITS        8
+#endif
+#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
+#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
+#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS SH entry
+ */
+static inline unsigned ip_vs_sh_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __u32 addr)
+{
+	return (tbl[ip_vs_sh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl;
+
+	/* allocate the SH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "SH hash table (memory=%dbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "SH hash table (memory=%dbytes) released\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the number of active connections is twice larger than its weight,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)*2) {
+		return 1;
+	}
+	return 0;
+}
+
+
+/*
+ *      Source Hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sh_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_sh_bucket *tbl;
+
+	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
+	dest = ip_vs_sh_get(tbl, iph->saddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->saddr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS SH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_sh_scheduler =
+{
+	{0},                    /* n_list */
+	"sh",                   /* name */
+	ATOMIC_INIT(0),         /* refcnt */
+	THIS_MODULE,            /* this module */
+	ip_vs_sh_init_svc,      /* service initializer */
+	ip_vs_sh_done_svc,      /* service done */
+	ip_vs_sh_update_svc,    /* service updater */
+	ip_vs_sh_schedule,      /* select a server from the destination list */
+};
+
+
+static int __init ip_vs_sh_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+static void __exit ip_vs_sh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+module_init(ip_vs_sh_init);
+module_exit(ip_vs_sh_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_sync.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sync.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_sync.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_sync.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,793 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_sync.c,v 1.8 2002/08/17 14:06:02 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * ip_vs_sync:  sync connection info from master load balancer to backups
+ *              through multicast
+ */
+
+#define __KERNEL_SYSCALLS__             /*  for waitpid */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/igmp.h>                 /* for ip_mc_join_group */
+
+#include <net/ip.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>                /* for get_fs and set_fs */
+
+#include <net/ip_vs.h>
+
+#define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
+#define IP_VS_SYNC_PORT  8848          /* multicast port */
+
+
+/*
+ *	IPVS sync connection entry
+ */
+struct ip_vs_sync_conn {
+	__u8			reserved;
+
+	/* Protocol, addresses and port numbers */
+	__u8			protocol;       /* Which protocol (TCP/UDP) */
+	__u16			cport;
+	__u16                   vport;
+	__u16                   dport;
+	__u32                   caddr;          /* client address */
+	__u32                   vaddr;          /* virtual address */
+	__u32                   daddr;          /* destination address */
+
+	/* Flags and state transition */
+	__u16                   flags;          /* status flags */
+	__u16                   state;          /* state info */
+
+	/* The sequence options start here */
+};
+
+struct ip_vs_sync_conn_options {
+	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
+	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
+};
+
+#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ)
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+#define FULL_CONN_SIZE  \
+(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+
+
+/*
+  The master mulitcasts messages to the backup load balancers in the
+  following format.
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |   Reserved    |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (1)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                            .                                  |
+      |                            .                                  |
+      |                            .                                  |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (n)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+#define SYNC_MESG_MAX_SIZE      (24*50+4)
+struct ip_vs_sync_mesg {
+	__u8                    nr_conns;
+	__u8                    reserved;
+	__u16                   size;
+
+	/* ip_vs_sync_conn entries start here */
+};
+
+
+struct ip_vs_sync_buff {
+	struct list_head        list;
+	unsigned long           firstuse;
+
+	/* pointers for the message data */
+	struct ip_vs_sync_mesg  *mesg;
+	unsigned char           *head;
+	unsigned char           *end;
+};
+
+
+/* the sync_buff list head and the lock */
+static LIST_HEAD(ip_vs_sync_queue);
+static spinlock_t ip_vs_sync_lock = SPIN_LOCK_UNLOCKED;
+
+/* current sync_buff for accepting new conn entries */
+static struct ip_vs_sync_buff   *curr_sb = NULL;
+static spinlock_t curr_sb_lock = SPIN_LOCK_UNLOCKED;
+
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+	spin_lock(&ip_vs_sync_lock);
+	list_add_tail(&sb->list, &ip_vs_sync_queue);
+	spin_unlock(&ip_vs_sync_lock);
+}
+
+static inline struct ip_vs_sync_buff * sb_dequeue(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&ip_vs_sync_lock);
+	if (list_empty(&ip_vs_sync_queue)) {
+		sb = NULL;
+	} else {
+		sb = list_entry(ip_vs_sync_queue.next,
+				struct ip_vs_sync_buff,
+				list);
+		list_del(&sb->list);
+	}
+	spin_unlock_bh(&ip_vs_sync_lock);
+
+	return sb;
+}
+
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	if (!(sb->mesg=kmalloc(SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) {
+		kfree(sb);
+		return NULL;
+	}
+	sb->mesg->nr_conns = 0;
+	sb->mesg->size = 4;
+	sb->head = (unsigned char *)sb->mesg + 4;
+	sb->end = (unsigned char *)sb->mesg + SYNC_MESG_MAX_SIZE;
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
+{
+	kfree(sb->mesg);
+	kfree(sb);
+}
+
+/*
+ *	Get the current sync buffer if it has been created for more
+ *	than the specified time or the specified time is zero.
+ */
+static inline struct ip_vs_sync_buff *
+get_curr_sync_buff(unsigned long time)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&curr_sb_lock);
+	if (curr_sb &&
+	    (jiffies - curr_sb->firstuse > time || time == 0)) {
+		sb = curr_sb;
+		curr_sb = NULL;
+	} else
+		sb = NULL;
+	spin_unlock_bh(&curr_sb_lock);
+	return sb;
+}
+
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ */
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
+{
+	struct ip_vs_sync_mesg *m;
+	struct ip_vs_sync_conn *s;
+	int len;
+
+	spin_lock(&curr_sb_lock);
+	if (!curr_sb) {
+		if (!(curr_sb=ip_vs_sync_buff_create())) {
+			spin_unlock(&curr_sb_lock);
+			IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+	}
+
+	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+		SIMPLE_CONN_SIZE;
+	m = curr_sb->mesg;
+	s = (struct ip_vs_sync_conn *)curr_sb->head;
+
+	/* copy members */
+	s->protocol = cp->protocol;
+	s->cport = cp->cport;
+	s->vport = cp->vport;
+	s->dport = cp->dport;
+	s->caddr = cp->caddr;
+	s->vaddr = cp->vaddr;
+	s->daddr = cp->daddr;
+	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->state = htons(cp->state);
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		struct ip_vs_sync_conn_options *opt =
+			(struct ip_vs_sync_conn_options *)&s[1];
+		memcpy(opt, &cp->in_seq, sizeof(*opt));
+	}
+
+	m->nr_conns++;
+	m->size += len;
+	curr_sb->head += len;
+
+	/* check if there is a space for next one */
+	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
+		sb_queue_tail(curr_sb);
+		curr_sb = NULL;
+	}
+	spin_unlock(&curr_sb_lock);
+
+	/* synchronize its controller if it has */
+	if (cp->control)
+		ip_vs_sync_conn(cp->control);
+}
+
+
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ */
+static void ip_vs_process_message(const char *buffer, const size_t buflen)
+{
+	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
+	struct ip_vs_sync_conn *s;
+	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_conn *cp;
+	char *p;
+	int i;
+
+	if (buflen != m->size) {
+		IP_VS_ERR("bogus message\n");
+		return;
+	}
+
+	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+	for (i=0; i<m->nr_conns; i++) {
+		s = (struct ip_vs_sync_conn *)p;
+		cp = ip_vs_conn_in_get(s->protocol,
+				       s->caddr, s->cport,
+				       s->vaddr, s->vport);
+		if (!cp) {
+			cp = ip_vs_conn_new(s->protocol,
+					    s->caddr, s->cport,
+					    s->vaddr, s->vport,
+					    s->daddr, s->dport,
+					    ntohs(s->flags), NULL);
+			if (!cp) {
+				IP_VS_ERR("ip_vs_conn_new failed\n");
+				return;
+			}
+			cp->state = ntohs(s->state);
+		} else if (!cp->dest) {
+			/* it is an entry created by the synchronization */
+			cp->state = ntohs(s->state);
+			cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED;
+		}	/* Note that we don't touch its state and flags
+			   if it is a normal entry. */
+
+		if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) {
+			opt = (struct ip_vs_sync_conn_options *)&s[1];
+			memcpy(&cp->in_seq, opt, sizeof(*opt));
+			p += FULL_CONN_SIZE;
+		} else
+			p += SIMPLE_CONN_SIZE;
+
+		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold);
+		cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
+		ip_vs_conn_put(cp);
+
+		if (p > buffer+buflen) {
+			IP_VS_ERR("bogus message\n");
+			return;
+		}
+	}
+}
+
+
+/* ipvs sync daemon state */
+volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
+
+/* multicast interface name */
+char ip_vs_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+/* multicast addr */
+static struct sockaddr_in mcast_addr;
+
+
+/*
+ *      Setup loopback of outgoing multicasts on a sending socket
+ */
+static void set_mcast_loop(struct sock *sk, u_char loop)
+{
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
+	lock_sock(sk);
+	sk->protinfo.af_inet.mc_loop = loop ? 1 : 0;
+	release_sock(sk);
+}
+
+/*
+ *      Specify TTL for outgoing multicasts on a sending socket
+ */
+static void set_mcast_ttl(struct sock *sk, u_char ttl)
+{
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
+	lock_sock(sk);
+	sk->protinfo.af_inet.mc_ttl = ttl;
+	release_sock(sk);
+}
+
+/*
+ *      Specifiy default interface for outgoing multicasts
+ */
+static int set_mcast_if(struct sock *sk, char *ifname)
+{
+	struct net_device *dev;
+
+	if ((dev = __dev_get_by_name(ifname)) == NULL)
+		return -ENODEV;
+
+	if (sk->bound_dev_if && dev->ifindex != sk->bound_dev_if)
+		return -EINVAL;
+
+	lock_sock(sk);
+	sk->protinfo.af_inet.mc_index = dev->ifindex;
+	/*  sk->protinfo.af_inet.mc_addr  = 0; */
+	release_sock(sk);
+
+	return 0;
+}
+
+/*
+ *      Join a multicast group.
+ *      the group is specified by a class D multicast address 224.0.0.0/8
+ *      in the in_addr structure passed in as a parameter.
+ */
+static int
+join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+{
+	struct ip_mreqn mreq;
+	struct net_device *dev;
+	int ret;
+
+	memset(&mreq, 0, sizeof(mreq));
+	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+
+	if ((dev = __dev_get_by_name(ifname)) == NULL)
+		return -ENODEV;
+	if (sk->bound_dev_if && dev->ifindex != sk->bound_dev_if)
+		return -EINVAL;
+
+	mreq.imr_ifindex = dev->ifindex;
+
+	lock_sock(sk);
+	ret = ip_mc_join_group(sk, &mreq);
+	release_sock(sk);
+
+	return ret;
+}
+
+
+static int bind_mcastif_addr(struct socket *sock, char *ifname)
+{
+	struct net_device *dev;
+	u32 addr;
+	struct sockaddr_in sin;
+
+	if ((dev = __dev_get_by_name(ifname)) == NULL)
+		return -ENODEV;
+
+	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	if (!addr)
+		IP_VS_ERR("You probably need to specify IP address on "
+			  "multicast interface.\n");
+
+	IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
+		  ifname, NIPQUAD(addr));
+
+	/* Now bind the socket with the address of multicast interface */
+	sin.sin_family	     = AF_INET;
+	sin.sin_addr.s_addr  = addr;
+	sin.sin_port         = 0;
+
+	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+}
+
+/*
+ *      Set up sending multicast socket over UDP
+ */
+static struct socket * make_send_sock(void)
+{
+	struct socket *sock;
+
+	/* First create a socket */
+	if (sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return NULL;
+	}
+
+	if (set_mcast_if(sock->sk, ip_vs_mcast_ifn) < 0) {
+		IP_VS_ERR("Error setting outbound mcast interface\n");
+		goto error;
+	}
+
+	set_mcast_loop(sock->sk, 0);
+	set_mcast_ttl(sock->sk, 1);
+
+	if (bind_mcastif_addr(sock, ip_vs_mcast_ifn) < 0) {
+		IP_VS_ERR("Error binding address of the mcast interface\n");
+		goto error;
+	}
+
+	if (sock->ops->connect(sock,
+			       (struct sockaddr*)&mcast_addr,
+			       sizeof(struct sockaddr), 0) < 0) {
+		IP_VS_ERR("Error connecting to the multicast addr\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return NULL;
+}
+
+
+/*
+ *      Set up receiving multicast socket over UDP
+ */
+static struct socket * make_receive_sock(void)
+{
+	struct socket *sock;
+
+	/* First create a socket */
+	if (sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return NULL;
+	}
+
+	/* it is equivalent to the REUSEADDR option in user-space */
+	sock->sk->reuse = 1;
+
+	if (sock->ops->bind(sock,
+			    (struct sockaddr*)&mcast_addr,
+			    sizeof(struct sockaddr)) < 0) {
+		IP_VS_ERR("Error binding to the multicast addr\n");
+		goto error;
+	}
+
+	/* join the multicast group */
+	if (join_mcast_group(sock->sk,
+			     (struct in_addr*)&mcast_addr.sin_addr,
+			     ip_vs_mcast_ifn) < 0) {
+		IP_VS_ERR("Error joining to the multicast group\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return NULL;
+}
+
+
+static int
+ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
+{
+	struct msghdr	msg;
+	mm_segment_t	oldfs;
+	struct iovec	iov;
+	int		len;
+
+	EnterFunction(7);
+	iov.iov_base     = (void *)buffer;
+	iov.iov_len      = length;
+	msg.msg_name     = 0;
+	msg.msg_namelen  = 0;
+	msg.msg_iov	 = &iov;
+	msg.msg_iovlen   = 1;
+	msg.msg_control  = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags    = MSG_DONTWAIT|MSG_NOSIGNAL;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	len = sock_sendmsg(sock, &msg, (size_t)(length));
+	set_fs(oldfs);
+
+	LeaveFunction(7);
+	return len;
+}
+
+
+static int
+ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
+{
+	struct msghdr		msg;
+	struct iovec		iov;
+	int			len;
+	mm_segment_t		oldfs;
+
+	EnterFunction(7);
+
+	/* Receive a packet */
+	iov.iov_base     = buffer;
+	iov.iov_len      = (size_t)buflen;
+	msg.msg_name     = 0;
+	msg.msg_namelen  = 0;
+	msg.msg_iov	 = &iov;
+	msg.msg_iovlen   = 1;
+	msg.msg_control  = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags    = 0;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	len = sock_recvmsg(sock, &msg, buflen, 0);
+	set_fs(oldfs);
+
+	if (len < 0)
+		return -1;
+
+	LeaveFunction(7);
+	return len;
+}
+
+
+static int errno;
+
+static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
+static pid_t sync_pid = 0;
+
+static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
+static int stop_sync = 0;
+
+static void sync_master_loop(void)
+{
+	struct socket *sock;
+	struct ip_vs_sync_buff *sb;
+	struct ip_vs_sync_mesg *m;
+
+	/* create the sending multicast socket */
+	sock = make_send_sock();
+	if (!sock)
+		return;
+
+	for (;;) {
+		while ((sb=sb_dequeue())) {
+			m = sb->mesg;
+			if (ip_vs_send_async(sock, (char *)m,
+					     m->size) != m->size)
+				IP_VS_ERR("ip_vs_send_async error\n");
+			ip_vs_sync_buff_release(sb);
+		}
+
+		/* check if entries stay in curr_sb for 2 seconds */
+		if ((sb = get_curr_sync_buff(2*HZ))) {
+			m = sb->mesg;
+			if (ip_vs_send_async(sock, (char *)m,
+					     m->size) != m->size)
+				IP_VS_ERR("ip_vs_send_async error\n");
+			ip_vs_sync_buff_release(sb);
+		}
+
+		if (stop_sync)
+			break;
+
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	/* clean up the sync_buff queue */
+	while ((sb=sb_dequeue())) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* clean up the current sync_buff */
+	if ((sb = get_curr_sync_buff(0))) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* release the sending multicast socket */
+	sock_release(sock);
+}
+
+
+static void sync_backup_loop(void)
+{
+	struct socket *sock;
+	char *buf;
+	int len;
+
+	if (!(buf=kmalloc(SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) {
+		IP_VS_ERR("sync_backup_loop: kmalloc error\n");
+		return;
+	}
+
+	/* create the receiving multicast socket */
+	sock = make_receive_sock();
+	if (!sock)
+		goto out;
+
+	for (;;) {
+		/* do you have data now? */
+		while (!skb_queue_empty(&(sock->sk->receive_queue))) {
+			if ((len=ip_vs_receive(sock, buf,
+					       SYNC_MESG_MAX_SIZE))<=0) {
+				IP_VS_ERR("receiving message error\n");
+				break;
+			}
+			/* disable bottom half, because it accessed the data
+			   shared by softirq while getting/creating conns */
+			local_bh_disable();
+			ip_vs_process_message(buf, len);
+			local_bh_enable();
+		}
+
+		if (stop_sync)
+			break;
+
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	/* release the sending multicast socket */
+	sock_release(sock);
+
+  out:
+	kfree(buf);
+}
+
+
+static int sync_thread(void *startup)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	mm_segment_t oldmm;
+	int state;
+
+	MOD_INC_USE_COUNT;
+	daemonize();
+
+	oldmm = get_fs();
+	set_fs(KERNEL_DS);
+
+	if (ip_vs_sync_state == IP_VS_STATE_MASTER)
+		sprintf(current->comm, "ipvs syncmaster");
+	else if (ip_vs_sync_state == IP_VS_STATE_BACKUP)
+		sprintf(current->comm, "ipvs syncbackup");
+	else IP_VS_BUG();
+
+	spin_lock_irq(&current->sigmask_lock);
+	siginitsetinv(&current->blocked, 0);
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	/* set up multicast address */
+	mcast_addr.sin_family = AF_INET;
+	mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
+	mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
+
+	add_wait_queue(&sync_wait, &wait);
+
+	state = ip_vs_sync_state;
+	sync_pid = current->pid;
+	IP_VS_INFO("sync thread started.\n");
+	complete((struct completion *)startup);
+
+	/* processing master/backup loop here */
+	if (state == IP_VS_STATE_MASTER)
+		sync_master_loop();
+	else if (state == IP_VS_STATE_BACKUP)
+		sync_backup_loop();
+	else IP_VS_BUG();
+
+	remove_wait_queue(&sync_wait, &wait);
+
+	/* thread exits */
+	sync_pid = 0;
+	IP_VS_INFO("sync thread stopped!\n");
+
+	set_fs(oldmm);
+	MOD_DEC_USE_COUNT;
+
+	stop_sync = 0;
+	wake_up(&stop_sync_wait);
+
+	return 0;
+}
+
+
+static int fork_sync_thread(void *startup)
+{
+	/* fork the sync thread here, then the parent process of the
+	   sync thread is the init process after this thread exits. */
+	if (kernel_thread(sync_thread, startup, 0) < 0)
+		IP_VS_BUG();
+	return 0;
+}
+
+
+int start_sync_thread(int state, char *mcast_ifn)
+{
+	DECLARE_COMPLETION(startup);
+	pid_t pid;
+	int waitpid_result;
+
+	if (sync_pid)
+		return -EEXIST;
+
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %d bytes\n",
+		  sizeof(struct ip_vs_sync_conn));
+
+	ip_vs_sync_state = state;
+	strcpy(ip_vs_mcast_ifn, mcast_ifn);
+
+	if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0)
+		IP_VS_BUG();
+
+	if ((waitpid_result = waitpid(pid, NULL, __WCLONE)) != pid) {
+		IP_VS_ERR("%s: waitpid(%d,...) failed, errno %d\n",
+			  __FUNCTION__, pid, -waitpid_result);
+	}
+
+	wait_for_completion(&startup);
+
+	return 0;
+}
+
+
+int stop_sync_thread(void)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	if (!sync_pid)
+		return -ESRCH;
+
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_INFO("stopping sync thread %d ...\n", sync_pid);
+
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	add_wait_queue(&stop_sync_wait, &wait);
+	ip_vs_sync_state = IP_VS_STATE_NONE;
+	stop_sync = 1;
+	wake_up(&sync_wait);
+	schedule();
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&stop_sync_wait, &wait);
+
+	/* Note: no need to reap the sync thread, because its parent
+	   process is the init process */
+
+	if (stop_sync)
+		IP_VS_BUG();
+
+	return 0;
+}
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_timer.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_timer.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_timer.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_timer.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,258 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_timer.c,v 1.8.2.2 2003/05/20 17:05:02 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+
+#include <net/ip_vs.h>
+
+/*
+ * The following block implements slow timers for IPVS, most code is stolen
+ * from linux/kernel/timer.c.
+ * Slow timer is used to avoid the overhead of cascading timers, when lots
+ * of connection entries (>50,000) are cluttered in the system.
+ */
+#define SHIFT_BITS 6
+#define TVN_BITS 8
+#define TVR_BITS 10
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct sltimer_vec {
+	int index;
+	struct list_head vec[TVN_SIZE];
+};
+
+struct sltimer_vec_root {
+	int index;
+	struct list_head vec[TVR_SIZE];
+};
+
+static struct sltimer_vec sltv3 = { 0 };
+static struct sltimer_vec sltv2 = { 0 };
+static struct sltimer_vec_root sltv1 = { 0 };
+
+static struct sltimer_vec * const sltvecs[] = {
+	(struct sltimer_vec *)&sltv1, &sltv2, &sltv3
+};
+
+#define NOOF_SLTVECS (sizeof(sltvecs) / sizeof(sltvecs[0]))
+
+static void init_sltimervecs (void)
+{
+	int i;
+
+	for (i = 0; i < TVN_SIZE; i++) {
+		INIT_LIST_HEAD(sltv3.vec + i);
+		INIT_LIST_HEAD(sltv2.vec + i);
+	}
+	for (i = 0; i < TVR_SIZE; i++)
+		INIT_LIST_HEAD(sltv1.vec + i);
+}
+
+static unsigned long sltimer_jiffies = 0;
+
+static inline void internal_add_sltimer(struct timer_list *timer)
+{
+	/*
+	 * must hold the sltimer lock when calling this
+	 */
+	unsigned long expires = timer->expires;
+	unsigned long idx = expires - sltimer_jiffies;
+	struct list_head * vec;
+
+	if (idx < 1 << (SHIFT_BITS + TVR_BITS)) {
+		int i = (expires >> SHIFT_BITS) & TVR_MASK;
+		vec = sltv1.vec + i;
+	} else if (idx < 1 << (SHIFT_BITS + TVR_BITS + TVN_BITS)) {
+		int i = (expires >> (SHIFT_BITS+TVR_BITS)) & TVN_MASK;
+		vec = sltv2.vec + i;
+	} else if ((signed long) idx < 0) {
+		/*
+		 * can happen if you add a timer with expires == jiffies,
+		 * or you set a timer to go off in the past
+		 */
+		vec = sltv1.vec + sltv1.index;
+	} else if (idx <= 0xffffffffUL) {
+		int i = (expires >> (SHIFT_BITS+TVR_BITS+TVN_BITS)) & TVN_MASK;
+		vec = sltv3.vec + i;
+	} else {
+		/* Can only get here on architectures with 64-bit jiffies */
+		INIT_LIST_HEAD(&timer->list);
+	}
+	/*
+	 * Timers are FIFO!
+	 */
+	list_add(&timer->list, vec->prev);
+}
+
+
+static spinlock_t __ip_vs_sltimerlist_lock = SPIN_LOCK_UNLOCKED;
+
+void add_sltimer(struct timer_list *timer)
+{
+	spin_lock(&__ip_vs_sltimerlist_lock);
+	if (timer->list.next)
+		goto bug;
+	internal_add_sltimer(timer);
+  out:
+	spin_unlock(&__ip_vs_sltimerlist_lock);
+	return;
+
+  bug:
+	printk("bug: kernel sltimer added twice at %p.\n",
+	       __builtin_return_address(0));
+	goto out;
+}
+
+static inline int detach_sltimer(struct timer_list *timer)
+{
+	if (!timer_pending(timer))
+		return 0;
+	list_del(&timer->list);
+	return 1;
+}
+
+void mod_sltimer(struct timer_list *timer, unsigned long expires)
+{
+	int ret;
+
+	spin_lock(&__ip_vs_sltimerlist_lock);
+	timer->expires = expires;
+	ret = detach_sltimer(timer);
+	internal_add_sltimer(timer);
+	spin_unlock(&__ip_vs_sltimerlist_lock);
+}
+
+int del_sltimer(struct timer_list * timer)
+{
+	int ret;
+
+	spin_lock(&__ip_vs_sltimerlist_lock);
+	ret = detach_sltimer(timer);
+	timer->list.next = timer->list.prev = 0;
+	spin_unlock(&__ip_vs_sltimerlist_lock);
+	return ret;
+}
+
+
+static inline void cascade_sltimers(struct sltimer_vec *tv)
+{
+	/*
+	 * cascade all the timers from tv up one level
+	 */
+	struct list_head *head, *curr, *next;
+
+	head = tv->vec + tv->index;
+	curr = head->next;
+
+	/*
+	 * We are removing _all_ timers from the list, so we don't  have to
+	 * detach them individually, just clear the list afterwards.
+	 */
+	while (curr != head) {
+		struct timer_list *tmp;
+
+		tmp = list_entry(curr, struct timer_list, list);
+		next = curr->next;
+		list_del(curr); // not needed
+		internal_add_sltimer(tmp);
+		curr = next;
+	}
+	INIT_LIST_HEAD(head);
+	tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_sltimer_list(void)
+{
+	spin_lock(&__ip_vs_sltimerlist_lock);
+	while ((long)(jiffies - sltimer_jiffies) >= 0) {
+		struct list_head *head, *curr;
+		if (!sltv1.index) {
+			int n = 1;
+			do {
+				cascade_sltimers(sltvecs[n]);
+			} while (sltvecs[n]->index == 1 && ++n < NOOF_SLTVECS);
+		}
+	  repeat:
+		head = sltv1.vec + sltv1.index;
+		curr = head->next;
+		if (curr != head) {
+			struct timer_list *timer;
+			void (*fn)(unsigned long);
+			unsigned long data;
+
+			timer = list_entry(curr, struct timer_list, list);
+			fn = timer->function;
+			data= timer->data;
+
+			detach_sltimer(timer);
+			timer->list.next = timer->list.prev = NULL;
+			spin_unlock(&__ip_vs_sltimerlist_lock);
+			fn(data);
+			spin_lock(&__ip_vs_sltimerlist_lock);
+			goto repeat;
+		}
+		sltimer_jiffies += 1<<SHIFT_BITS;
+		sltv1.index = (sltv1.index + 1) & TVR_MASK;
+	}
+	spin_unlock(&__ip_vs_sltimerlist_lock);
+}
+
+static struct timer_list slow_timer;
+
+/*
+ *  Slow timer handler is activated every second
+ */
+#define SLTIMER_PERIOD	     1*HZ
+
+static void sltimer_handler(unsigned long data)
+{
+	run_sltimer_list();
+
+	update_defense_level();
+	if (atomic_read(&ip_vs_dropentry))
+		ip_vs_random_dropentry();
+
+	mod_timer(&slow_timer, (jiffies + SLTIMER_PERIOD));
+}
+
+
+void ip_vs_sltimer_init(void)
+{
+	/*
+	 * Hook the slow_timer handler in the system timer.
+	 */
+	init_sltimervecs();
+
+	init_timer(&slow_timer);
+	slow_timer.function = sltimer_handler;
+	slow_timer.expires = jiffies+SLTIMER_PERIOD;
+	add_timer(&slow_timer);
+}
+
+
+void ip_vs_sltimer_cleanup(void)
+{
+	del_timer_sync(&slow_timer);
+}
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_wlc.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_wlc.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_wlc.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_wlc.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,157 @@
+/*
+ * IPVS:        Weighted Least-Connection Scheduling module
+ *
+ * Version:     $Id: ip_vs_wlc.c,v 1.10.2.1 2003/04/11 14:02:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
+ *     Wensong Zhang            :     changed to use the inactconns in scheduling
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_wlc_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_wlc_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_wlc_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *    Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	register struct list_head *l, *e;
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *		  (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		least = list_entry(e, struct ip_vs_dest, n_list);
+		if (atomic_read(&least->weight) > 0) {
+			loh = ip_vs_wlc_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	for (e=e->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+
+		doh = ip_vs_wlc_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlc_scheduler =
+{
+	{0},			/* n_list */
+	"wlc",			/* name */
+	ATOMIC_INIT(0),         /* refcnt */
+	THIS_MODULE,		/* this module */
+	ip_vs_wlc_init_svc,	/* service initializer */
+	ip_vs_wlc_done_svc,	/* service done */
+	ip_vs_wlc_update_svc,	/* service updater */
+	ip_vs_wlc_schedule,	/* select a server from the destination list */
+};
+
+
+static int __init ip_vs_wlc_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+static void __exit ip_vs_wlc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+module_init(ip_vs_wlc_init);
+module_exit(ip_vs_wlc_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/ipvs/ip_vs_wrr.c linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_wrr.c
--- linux-2.4.20/net/ipv4/ipvs/ip_vs_wrr.c	Thu Jan  1 08:00:00 1970
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/ipvs/ip_vs_wrr.c	Wed May 21 11:36:21 2003
@@ -0,0 +1,240 @@
+/*
+ * IPVS:        Weighted Round-Robin Scheduling module
+ *
+ * Version:     $Id: ip_vs_wrr.c,v 1.11 2002/03/25 12:44:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
+ *     Julian Anastasov         :     fixed the bug of returning destination
+ *                                    with weight 0 when all weights are zero
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <net/ip_vs.h>
+
+/*
+ * current destination pointer for weighted round-robin scheduling
+ */
+struct ip_vs_wrr_mark {
+	struct list_head *cl;	/* current list head */
+	int cw;			/* current weight */
+	int mw;			/* maximum weight */
+	int di;			/* decreasing interval */
+};
+
+
+/*
+ *    Get the gcd of server weights
+ */
+static int gcd(int a, int b)
+{
+	int c;
+
+	while ((c = a % b)) {
+		a = b;
+		b = c;
+	}
+	return b;
+}
+
+static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
+{
+	register struct list_head *l, *e;
+	struct ip_vs_dest *dest;
+	int weight;
+	int g = 1;
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		weight = atomic_read(&dest->weight);
+		if (weight > 0) {
+			g = weight;
+			break;
+		}
+	}
+	if (e == l)
+		return g;
+
+	for (e=e->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		weight = atomic_read(&dest->weight);
+		if (weight > 0)
+			g = gcd(weight, g);
+	}
+
+	return g;
+}
+
+
+/*
+ *    Get the maximum weight of the service destinations.
+ */
+static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
+{
+	register struct list_head *l, *e;
+	struct ip_vs_dest *dest;
+	int weight = 0;
+
+	l = &svc->destinations;
+	for (e=l->next; e!=l; e=e->next) {
+		dest = list_entry(e, struct ip_vs_dest, n_list);
+		if (atomic_read(&dest->weight) > weight)
+			weight = atomic_read(&dest->weight);
+	}
+
+	return weight;
+}
+
+
+static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark;
+
+	/*
+	 *    Allocate the mark variable for WRR scheduling
+	 */
+	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+	if (mark == NULL) {
+		IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	mark->cl = &svc->destinations;
+	mark->cw = 0;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	svc->sched_data = mark;
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+{
+	/*
+	 *    Release the mark variable
+	 */
+	kfree(svc->sched_data);
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+	mark->cl = &svc->destinations;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	return 0;
+}
+
+
+/*
+ *    Weighted Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wrr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+	IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
+
+	/*
+	 * This loop will always terminate, because 0<mark->cw<max_weight,
+	 * and at least one server has its weight equal to max_weight.
+	 */
+	write_lock(&svc->sched_lock);
+	while (1) {
+		if (mark->cl == &svc->destinations) {
+			/* it is at the head of the destination list */
+
+			if (mark->cl == mark->cl->next) {
+				/* no dest entry */
+				write_unlock(&svc->sched_lock);
+				return NULL;
+			}
+
+			mark->cl = svc->destinations.next;
+			mark->cw -= mark->di;
+			if (mark->cw <= 0) {
+				mark->cw = mark->mw;
+				/*
+				 * Still zero, which means no availabe servers.
+				 */
+				if (mark->cw == 0) {
+					mark->cl = &svc->destinations;
+					write_unlock(&svc->sched_lock);
+					IP_VS_INFO("ip_vs_wrr_schedule(): "
+						   "no available servers\n");
+					return NULL;
+				}
+			}
+		}
+		else mark->cl = mark->cl->next;
+
+		if (mark->cl != &svc->destinations) {
+			/* not at the head of the list */
+			dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+			if (atomic_read(&dest->weight) >= mark->cw) {
+				write_unlock(&svc->sched_lock);
+				break;
+			}
+		}
+	}
+
+	IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d\n",
+		  NIPQUAD(dest->addr), ntohs(dest->port),
+		  atomic_read(&dest->activeconns),
+		  atomic_read(&dest->refcnt),
+		  atomic_read(&dest->weight));
+
+	return	dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
+	{0},			/* n_list */
+	"wrr",			/* name */
+	ATOMIC_INIT(0),		/* refcnt */
+	THIS_MODULE,		/* this module */
+	ip_vs_wrr_init_svc,	/* service initializer */
+	ip_vs_wrr_done_svc,	/* service done */
+	ip_vs_wrr_update_svc,	/* service updater */
+	ip_vs_wrr_schedule,	/* select a server from the destination list */
+};
+
+static int __init ip_vs_wrr_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
+}
+
+static void __exit ip_vs_wrr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+}
+
+module_init(ip_vs_wrr_init);
+module_exit(ip_vs_wrr_cleanup);
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.20/net/ipv4/netfilter/ip_fw_compat.c linux-2.4.20-ipvs-1.0.9/net/ipv4/netfilter/ip_fw_compat.c
--- linux-2.4.20/net/ipv4/netfilter/ip_fw_compat.c	Wed May 21 11:09:34 2003
+++ linux-2.4.20-ipvs-1.0.9/net/ipv4/netfilter/ip_fw_compat.c	Wed May 21 11:36:16 2003
@@ -47,6 +47,10 @@
 extern int __init masq_init(void);
 extern void masq_cleanup(void);
 
+/* From ip_vs_core.c */
+extern unsigned int
+check_for_ip_vs_out(struct sk_buff **skb_p, int (*okfn)(struct sk_buff *));
+
 /* They call these; we do what they want. */
 int register_firewall(int pf, struct firewall_ops *fw)
 {
@@ -172,8 +176,14 @@
 		return NF_ACCEPT;
 
 	case FW_MASQUERADE:
-		if (hooknum == NF_IP_FORWARD)
+		if (hooknum == NF_IP_FORWARD) {
+#ifdef CONFIG_IP_VS
+                        /* check if it is for ip_vs */
+                        if (check_for_ip_vs_out(pskb, okfn) == NF_STOLEN)
+                                return NF_STOLEN;
+#endif
 			return do_masquerade(pskb, out);
+                }
 		else return NF_ACCEPT;
 
 	case FW_REDIRECT:
diff -urN linux-2.4.20/net/netsyms.c linux-2.4.20-ipvs-1.0.9/net/netsyms.c
--- linux-2.4.20/net/netsyms.c	Wed May 21 11:09:34 2003
+++ linux-2.4.20-ipvs-1.0.9/net/netsyms.c	Wed May 21 11:36:16 2003
@@ -260,6 +260,7 @@
 EXPORT_SYMBOL(in_aton);
 EXPORT_SYMBOL(ip_mc_inc_group);
 EXPORT_SYMBOL(ip_mc_dec_group);
+EXPORT_SYMBOL(ip_mc_join_group);
 EXPORT_SYMBOL(ip_finish_output);
 EXPORT_SYMBOL(inet_stream_ops);
 EXPORT_SYMBOL(inet_dgram_ops);
