patch-2.0.30 linux/net/ipv4/af_inet.c

Next file: linux/net/ipv4/arp.c
Previous file: linux/net/ipv4/Makefile
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.0.29/linux/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c
@@ -50,6 +50,7 @@
  *		Alan Cox	:	Loosened bind a little.
  *		Mike McLagan	:	ADD/DEL DLCI Ioctls
  *	Willy Konynenberg	:	Transparent proxying support.
+ *		David S. Miller	:	New socket lookup architecture for ISS.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -104,6 +105,9 @@
 #ifdef CONFIG_KERNELD
 #include <linux/kerneld.h>
 #endif
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h>
+#endif	/* CONFIG_NET_RADIO */
 
 #define min(a,b)	((a)<(b)?(a):(b))
 
@@ -125,236 +129,27 @@
 int (*rarp_ioctl_hook)(unsigned int,void*) = NULL;
 
 /*
- *	See if a socket number is in use.
- */
- 
-static int sk_inuse(struct proto *prot, int num)
-{
-	struct sock *sk;
-
-	for(sk = prot->sock_array[num & (SOCK_ARRAY_SIZE -1 )];
-		sk != NULL;  sk=sk->next) 
-	{
-		if (sk->num == num) 
-			return(1);
-	}
-	return(0);
-}
-
-
-/*
- *	Pick a new socket number
- */
-
-unsigned short get_new_socknum(struct proto *prot, unsigned short base)
-{
-	static int start=0;
-
-	/*
-	 * Used to cycle through the port numbers so the
-	 * chances of a confused connection drop.
-	 */
-	 
-	int i, j;
-	int best = 0;
-	int size = 32767; /* a big num. */
-	struct sock *sk;
-
-	if (base == 0) 
-		base = PROT_SOCK+1+(start & 1023);
-	if (base <= PROT_SOCK) 
-	{
-		base += PROT_SOCK+(start & 1023);
-	}
-
-	/*
-	 *	Now look through the entire array and try to find an empty ptr. 
-	 */
-	 
-	for(i=0; i < SOCK_ARRAY_SIZE; i++) 
-	{
-		j = 0;
-		sk = prot->sock_array[(i+base+1) &(SOCK_ARRAY_SIZE -1)];
-		while(sk != NULL) 
-		{
-			sk = sk->next;
-			j++;
-		}
-		if (j == 0) 
-		{
-			start =(i+1+start )&1023;
-			return(i+base+1);
-		}
-		if (j < size) 
-		{
-			best = i;
-			size = j;
-		}
-	}
-
-	/* Now make sure the one we want is not in use. */
-
-	while(sk_inuse(prot, base +best+1)) 
-	{
-		best += SOCK_ARRAY_SIZE;
-	}
-	return(best+base+1);
-}
-
-/*
- *	Add a socket into the socket tables by number.
- */
-
-void put_sock(unsigned short num, struct sock *sk)
-{
-	struct sock **skp, *tmp;
-	int mask;
-	unsigned long flags;
-	
-	if(sk->type==SOCK_PACKET)
-		return;
-
-	sk->num = num;
-	sk->next = NULL;
-	num = num &(SOCK_ARRAY_SIZE -1);
-
-	/* 
-	 *	We can't have an interrupt re-enter here. 
-	 */
-	 
-	save_flags(flags);
-	cli();
-
-	sk->prot->inuse += 1;
-	if (sk->prot->highestinuse < sk->prot->inuse)
-		sk->prot->highestinuse = sk->prot->inuse;
-
-	if (sk->prot->sock_array[num] == NULL) 
-	{
-		sk->prot->sock_array[num] = sk;
-		restore_flags(flags);
-		return;
-	}
-	
-	restore_flags(flags);
-	for(mask = 0xff000000; mask != 0xffffffff; mask = (mask >> 8) | mask) 
-	{
-		if ((mask & sk->rcv_saddr) &&
-		    (mask & sk->rcv_saddr) != (mask & 0xffffffff)) 
-		{
-			mask = mask << 8;
-			break;
-		}
-	}
-
-	/*
-	 * add the socket to the sock_array[]..
-	 */
-	skp = sk->prot->sock_array + num;
-	cli();
-	while ((tmp = *skp) != NULL) {
-		if (!(tmp->rcv_saddr & mask))
-			break;
-		skp = &tmp->next;
-	}
-	sk->next = tmp;
-	*skp = sk;
-	sti();
-}
-
-/*
- *	Remove a socket from the socket tables.
- */
-
-static void remove_sock(struct sock *sk1)
-{
-	struct sock **p;
-	unsigned long flags;
-
-	if (sk1->type==SOCK_PACKET)
-		return;
-		
-	if (!sk1->prot) 
-	{
-		NETDEBUG(printk("sock.c: remove_sock: sk1->prot == NULL\n"));
-		return;
-	}
-
-	/* We can't have this changing out from under us. */
-	save_flags(flags);
-	cli();
-	
-	p=&(sk1->prot->sock_array[sk1->num & (SOCK_ARRAY_SIZE -1)]);
-	
-	while(*p!=NULL)
-	{
-		if(*p==sk1)
-		{
-			sk1->prot->inuse--;
-			*p=sk1->next;
-			break;
-		}
-		p=&((*p)->next);
-	}
-	restore_flags(flags);
-}
-
-/*
  *	Destroy an AF_INET socket
  */
  
-void destroy_sock(struct sock *sk)
+static __inline__ void kill_sk_queues(struct sock *sk)
 {
 	struct sk_buff *skb;
 
-	lock_sock(sk);			/* just to be safe. */
-
-  	remove_sock(sk);
-  
-  	/*
-  	 *	Now we can no longer get new packets or once the
-  	 *	timers are killed, send them.
-  	 */
-  	 
-  	delete_timer(sk);
-	del_timer(&sk->delack_timer);
-	del_timer(&sk->retransmit_timer);
-	
-	/*
-	 *	Drain any partial frames
-	 */
-	 
-	while ((skb = tcp_dequeue_partial(sk)) != NULL) 
-	{
-		IS_SKB(skb);
+	while((skb = tcp_dequeue_partial(sk)) != NULL)
 		kfree_skb(skb, FREE_WRITE);
-	}
 
-	/*
-	 *	Cleanup up the write buffer. 
-	 */
-	 
-  	while((skb = skb_dequeue(&sk->write_queue)) != NULL) {
-		IS_SKB(skb);
+	/* Next, the write queue. */
+	while((skb = skb_dequeue(&sk->write_queue)) != NULL)
 		kfree_skb(skb, FREE_WRITE);
-  	}
-  	
-  	/*
-  	 *	Clean up the read buffer.
-  	 */
 
-	while((skb=skb_dequeue(&sk->receive_queue))!=NULL) 
-	{
-		/*
-		 * This will take care of closing sockets that were
+	/* Then, the receive queue. */
+	while((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+		/* This will take care of closing sockets that were
 		 * listening and didn't accept everything.
 		 */
-		if (skb->sk != NULL && skb->sk != sk) 
-		{
-			IS_SKB(skb);
+		if (skb->sk != NULL && skb->sk != sk)
 			skb->sk->prot->close(skb->sk, 0);
-		}
-		IS_SKB(skb);
 		kfree_skb(skb, FREE_READ);
 	}
 
@@ -386,16 +181,61 @@
 	sk->send_next = NULL;
 	sti();
 
+  	/* Finally, the backlog. */
+  	while((skb=skb_dequeue(&sk->back_log)) != NULL) {
+		/* skb->sk = NULL; */
+		kfree_skb(skb, FREE_READ);
+	}
+}
+
+static __inline__ void kill_sk_now(struct sock *sk)
+{
+	/* No longer exists. */
+	del_from_prot_sklist(sk);
+
+	/* This is gross, but needed for SOCK_PACKET -DaveM */
+	if(sk->prot->unhash)
+		sk->prot->unhash(sk);
+
+	if(sk->opt)
+		kfree(sk->opt);
+	ip_rt_put(sk->ip_route_cache);
+	sk_free(sk);
+}
+
+static __inline__ void kill_sk_later(struct sock *sk)
+{
+	/* this should never happen. */
+	/* actually it can if an ack has just been sent. */
+	/* 
+	 * It's more normal than that...
+	 * It can happen because a skb is still in the device queues
+	 * [PR]
+	 */
+		  
+	NETDEBUG(printk("Socket destroy delayed (r=%d w=%d)\n",
+			sk->rmem_alloc, sk->wmem_alloc));
+
+	sk->destroy = 1;
+	sk->ack_backlog = 0;
+	release_sock(sk);
+	reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
+}
+
+void destroy_sock(struct sock *sk)
+{
+	lock_sock(sk);			/* just to be safe. */
+
   	/*
-  	 *	Now the backlog. 
+  	 *	Now we can no longer get new packets or once the
+  	 *	timers are killed, send them.
   	 */
   	 
-  	while((skb=skb_dequeue(&sk->back_log))!=NULL) 
-  	{
-		/* this should [almost] never happen. */
-		skb->sk = NULL;
-		kfree_skb(skb, FREE_READ);
-	}
+  	delete_timer(sk);
+	del_timer(&sk->delack_timer);
+	del_timer(&sk->retransmit_timer);
+	
+	kill_sk_queues(sk);
 
 	/*
 	 *	Now if it has a half accepted/ closed socket. 
@@ -414,28 +254,9 @@
 	 */
 
 	if (sk->rmem_alloc == 0 && sk->wmem_alloc == 0) 
-	{
-		if(sk->opt)
-			kfree(sk->opt);
-		ip_rt_put(sk->ip_route_cache);
-		/*
-		 *	This one is pure paranoia. I'll take it out
-		 *	later once I know the bug is buried.
-		 */
-		tcp_cache_zap();
-		sk_free(sk);
-	} 
-	else 
-	{
-		/* this should never happen. */
-		/* actually it can if an ack has just been sent. */
-		NETDEBUG(printk("Socket destroy delayed (r=%d w=%d)\n",
-			sk->rmem_alloc, sk->wmem_alloc));
-		sk->destroy = 1;
-		sk->ack_backlog = 0;
-		release_sock(sk);
-		reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
-  	}
+		kill_sk_now(sk);
+	else
+		kill_sk_later(sk);
 }
 
 /*
@@ -508,15 +329,13 @@
 static int inet_autobind(struct sock *sk)
 {
 	/* We may need to bind the socket. */
-	if (sk->num == 0) 
-	{
-		sk->num = get_new_socknum(sk->prot, 0);
+	if (sk->num == 0) {
+		sk->num = sk->prot->good_socknum();
 		if (sk->num == 0) 
 			return(-EAGAIN);
-		udp_cache_zap();
-		tcp_cache_zap();
-		put_sock(sk->num, sk);
 		sk->dummy_th.source = ntohs(sk->num);
+		sk->prot->rehash(sk);
+		add_to_prot_sklist(sk);
 	}
 	return 0;
 }
@@ -529,7 +348,7 @@
 {
 	struct sock *sk = (struct sock *) sock->data;
 
-	if(inet_autobind(sk)!=0)
+	if(inet_autobind(sk) != 0)
 		return -EAGAIN;
 
 	/* We might as well re use these. */ 
@@ -538,16 +357,17 @@
 	 * somewhere. We might as well truncate it to what everybody
 	 * else does..
 	 * Now truncate to 128 not 5. 
+	 *
+	 * This was wrong, truncate both cases to SOMAXCONN. -DaveM
 	 */
-	if ((unsigned) backlog == 0)	/* BSDism */
-		backlog = 1;
-	if ((unsigned) backlog > SOMAXCONN)
+	if (((unsigned) backlog == 0) || ((unsigned) backlog > SOMAXCONN))
 		backlog = SOMAXCONN;
 	sk->max_ack_backlog = backlog;
-	if (sk->state != TCP_LISTEN)
-	{
+	if (sk->state != TCP_LISTEN) {
 		sk->ack_backlog = 0;
 		sk->state = TCP_LISTEN;
+		sk->prot->rehash(sk);
+		add_to_prot_sklist(sk);
 	}
 	return(0);
 }
@@ -592,76 +412,41 @@
 {
 	struct sock *sk;
 	struct proto *prot;
-	int err;
 
 	sk = sk_alloc(GFP_KERNEL);
 	if (sk == NULL) 
-		return(-ENOBUFS);
+		goto do_oom;
+#if 0 /* sk_alloc() does this for us. -DaveM */
 	memset(sk,0,sizeof(*sk));	/* Efficient way to set most fields to zero */
+#endif
 	/*
 	 *	Note for tcp that also wiped the dummy_th block for us.
 	 */
-	switch(sock->type) 
-	{
-		case SOCK_STREAM:
-		case SOCK_SEQPACKET:
-			if (protocol && protocol != IPPROTO_TCP) 
-			{
-				sk_free(sk);
-				return(-EPROTONOSUPPORT);
-			}
-			protocol = IPPROTO_TCP;
-			sk->no_check = TCP_NO_CHECK;
-			prot = &tcp_prot;
-			break;
-
-		case SOCK_DGRAM:
-			if (protocol && protocol != IPPROTO_UDP) 
-			{
-				sk_free(sk);
-				return(-EPROTONOSUPPORT);
-			}
-			protocol = IPPROTO_UDP;
-			sk->no_check = UDP_NO_CHECK;
-			prot=&udp_prot;
-			break;
-      
-		case SOCK_RAW:
-			if (!suser()) 
-			{
-				sk_free(sk);
-				return(-EPERM);
-			}
-			if (!protocol) 
-			{
-				sk_free(sk);
-				return(-EPROTONOSUPPORT);
-			}
-			prot = &raw_prot;
-			sk->reuse = 1;
-			sk->num = protocol;
-			break;
-
-		case SOCK_PACKET:
-			if (!suser()) 
-			{
-				sk_free(sk);
-				return(-EPERM);
-			}
-			if (!protocol) 
-			{
-				sk_free(sk);
-				return(-EPROTONOSUPPORT);
-			}
-			prot = &packet_prot;
-			sk->reuse = 1;
-			sk->num = protocol;
-			break;
-
-		default:
-			sk_free(sk);
-			return(-ESOCKTNOSUPPORT);
+	if(sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET) {
+		if (protocol && protocol != IPPROTO_TCP) 
+			goto free_and_noproto;
+		protocol = IPPROTO_TCP;
+		sk->no_check = TCP_NO_CHECK;
+		prot = &tcp_prot;
+	} else if(sock->type == SOCK_DGRAM) {
+		if (protocol && protocol != IPPROTO_UDP) 
+			goto free_and_noproto;
+		protocol = IPPROTO_UDP;
+		sk->no_check = UDP_NO_CHECK;
+		prot=&udp_prot;
+	} else if(sock->type == SOCK_RAW || sock->type == SOCK_PACKET) {
+		if (!suser()) 
+			goto free_and_badperm;
+		if (!protocol) 
+			goto free_and_noproto;
+		prot = &raw_prot;
+		prot = (sock->type == SOCK_RAW) ? &raw_prot : &packet_prot;
+		sk->reuse = 1;
+		sk->num = protocol;
+	} else {
+		goto free_and_badtype;
 	}
+
 	sk->socket = sock;
 #ifdef CONFIG_TCP_NAGLE_OFF
 	sk->nonagle = 1;
@@ -717,28 +502,43 @@
 	sk->write_space = def_callback3;
 	sk->error_report = def_callback1;
 
-	if (sk->num) 
-	{
-	/*
-	 * It assumes that any protocol which allows
-	 * the user to assign a number at socket
-	 * creation time automatically
-	 * shares.
-	 */
-		put_sock(sk->num, sk);
+	if (sk->num) {
+		/* It assumes that any protocol which allows
+		 * the user to assign a number at socket
+		 * creation time automatically
+		 * shares.
+		 */
 		sk->dummy_th.source = ntohs(sk->num);
+
+		/* This is gross, but needed for SOCK_PACKET -DaveM */
+		if(sk->prot->hash)
+			sk->prot->hash(sk);
+		add_to_prot_sklist(sk);
 	}
 
-	if (sk->prot->init) 
-	{
-		err = sk->prot->init(sk);
-		if (err != 0) 
-		{
+	if (sk->prot->init) {
+		int err = sk->prot->init(sk);
+		if (err != 0) {
 			destroy_sock(sk);
 			return(err);
 		}
 	}
 	return(0);
+
+free_and_badtype:
+	sk_free(sk);
+	return -ESOCKTNOSUPPORT;
+
+free_and_badperm:
+	sk_free(sk);
+	return -EPERM;
+
+free_and_noproto:
+	sk_free(sk);
+	return -EPROTONOSUPPORT;
+
+do_oom:
+	return -ENOBUFS;
 }
 
 
@@ -759,191 +559,103 @@
  
 static int inet_release(struct socket *sock, struct socket *peer)
 {
-	unsigned long timeout;
 	struct sock *sk = (struct sock *) sock->data;
 
-	if (sk == NULL) 
-		return(0);
+	if (sk) {
+		unsigned long timeout;
 
-	sk->state_change(sk);
+		sk->state_change(sk);
 
-	/* Start closing the connection.  This may take a while. */
+		/* Start closing the connection.  This may take a while. */
 
 #ifdef CONFIG_IP_MULTICAST
-	/* Applications forget to leave groups before exiting */
-	ip_mc_drop_socket(sk);
+		/* Applications forget to leave groups before exiting */
+		ip_mc_drop_socket(sk);
 #endif
-	/*
-	 * If linger is set, we don't return until the close
-	 * is complete.  Otherwise we return immediately. The
-	 * actually closing is done the same either way.
-	 *
-	 * If the close is due to the process exiting, we never
-	 * linger..
-	 */
-	timeout = 0;
-	if (sk->linger) {
-		timeout = ~0UL;
-		if (!sk->lingertime)
-			timeout = jiffies + HZ*sk->lingertime;
-	}
-	if (current->flags & PF_EXITING)
+		/*
+		 * If linger is set, we don't return until the close
+		 * is complete.  Otherwise we return immediately. The
+		 * actually closing is done the same either way.
+		 *
+		 * If the close is due to the process exiting, we never
+		 * linger..
+		 */
 		timeout = 0;
+		if (sk->linger && !(current->flags & PF_EXITING)) {
+			if (sk->lingertime)
+				timeout = jiffies + HZ*sk->lingertime;
+		}
 
-	sock->data = NULL;
-	sk->socket = NULL;
+		sock->data = NULL;
+		sk->socket = NULL;
 
-	sk->prot->close(sk, timeout);
+		sk->prot->close(sk, timeout);
+	}
 	return(0);
 }
 
 
-static int inet_bind(struct socket *sock, struct sockaddr *uaddr,
-	       int addr_len)
+static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
-	struct sock *sk=(struct sock *)sock->data, *sk2;
-	unsigned short snum = 0 /* Stoopid compiler.. this IS ok */;
+	struct sock *sk=(struct sock *)sock->data;
+	unsigned short snum;
 	int chk_addr_ret;
 
-	/*
-	 *	If the socket has its own bind function then use it.
-	 */
-	 
+	/* If the socket has its own bind function then use it. (RAW AND PACKET) */
 	if(sk->prot->bind)
-		return sk->prot->bind(sk,uaddr, addr_len);
+		return sk->prot->bind(sk, uaddr, addr_len);
 		
-	/* check this error. */
-	if (sk->state != TCP_CLOSE)
-		return(-EINVAL);
-	if(addr_len<sizeof(struct sockaddr_in))
+	/* Check these errors (active socket, bad address length, double bind). */
+	if ((sk->state != TCP_CLOSE)			||
+	    (addr_len < sizeof(struct sockaddr_in))	||
+	    (sk->num != 0))
 		return -EINVAL;
-		
-	if(sock->type != SOCK_RAW)
-	{
-		if (sk->num != 0) 
-			return(-EINVAL);
 
-		snum = ntohs(addr->sin_port);
-		
+	snum = ntohs(addr->sin_port);
 #ifdef CONFIG_IP_MASQUERADE
-		/*
-		 *	The kernel masquerader needs some ports
-		 */		
-		if(snum>=PORT_MASQ_BEGIN && snum<=PORT_MASQ_END)
-			return -EADDRINUSE;
+	/* The kernel masquerader needs some ports. */		
+	if(snum>=PORT_MASQ_BEGIN && snum<=PORT_MASQ_END)
+		return -EADDRINUSE;
 #endif		 
-
-		if (snum == 0) 
-			snum = get_new_socknum(sk->prot, 0);
-		if (snum < PROT_SOCK && !suser()) 
-			return(-EACCES);
-	}
+	if (snum == 0) 
+		snum = sk->prot->good_socknum();
+	if (snum < PROT_SOCK && !suser()) 
+		return(-EACCES);
 	
 	chk_addr_ret = ip_chk_addr(addr->sin_addr.s_addr);
+	if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR &&
+	    chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) {
 #ifdef CONFIG_IP_TRANSPARENT_PROXY
-	/*
-	 * Superuser may bind to any address to allow transparent proxying.
-	 */
-	if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST && !suser())
-#else
-	if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST)
+		/* Superuser may bind to any address to allow transparent proxying. */
+		if(!suser())
 #endif
-		return(-EADDRNOTAVAIL);	/* Source address MUST be ours! */
-
-#ifndef CONFIG_IP_TRANSPARENT_PROXY
-	/*
-	 * Am I just thick or is this test really always true after the one
-	 * above?  Just taking the test out appears to be the easiest way to
-	 * make binds to remote addresses for transparent proxying work.
-	 */
-	if (chk_addr_ret || addr->sin_addr.s_addr == 0)
-	{
-#endif
-		/*
-		 *      We keep a pair of addresses. rcv_saddr is the one
-		 *      used by get_sock_*(), and saddr is used for transmit.
-		 *
-		 *      In the BSD API these are the same except where it
-		 *      would be illegal to use them (multicast/broadcast) in
-		 *      which case the sending device address is used.
-		 */
-		sk->rcv_saddr = addr->sin_addr.s_addr;
-		if(chk_addr_ret==IS_MULTICAST||chk_addr_ret==IS_BROADCAST)
-			sk->saddr = 0;  /* Use device */
-		else
-			sk->saddr = addr->sin_addr.s_addr;
-#ifndef CONFIG_IP_TRANSPARENT_PROXY
+			return(-EADDRNOTAVAIL);	/* Source address MUST be ours! */
 	}
-#endif
-	if(sock->type != SOCK_RAW)
-	{
-		/* Make sure we are allowed to bind here. */
-		cli();
-		for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)];
-					sk2 != NULL; sk2 = sk2->next) 
-		{
-			/*
-			 *	Hash collision or real match ?
-			 */
-			 
-			if (sk2->num != snum) 
-				continue;
-				
-			/*
-			 *	Either bind on the port is wildcard means
-			 *	they will overlap and thus be in error
-			 */			
-			 
-			if (!sk2->rcv_saddr || !sk->rcv_saddr)
-			{
-				/*
-				 *	Allow only if both are setting reuse.
-				 */
-				if(sk2->reuse && sk->reuse && sk2->state!=TCP_LISTEN)
-					continue;
-				sti();
-				return(-EADDRINUSE);
-			}
 
-			/*
-			 *	Two binds match ?
-			 */
-
-			if (sk2->rcv_saddr != sk->rcv_saddr) 
-				continue;
-			/*
-			 *	Reusable port ?
-			 */
-
-			if (!sk->reuse)
-			{
-				sti();
-				return(-EADDRINUSE);
-			}
-			
-			/*
-			 *	Reuse ?
-			 */
-			 
-			if (!sk2->reuse || sk2->state==TCP_LISTEN)
-			{
-				sti();
-				return(-EADDRINUSE);
-			}
-		}
-		sti();
+	/*
+	 *      We keep a pair of addresses. rcv_saddr is the one
+	 *      used by hash lookups, and saddr is used for transmit.
+	 *
+	 *      In the BSD API these are the same except where it
+	 *      would be illegal to use them (multicast/broadcast) in
+	 *      which case the sending device address is used.
+	 */
+	sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
+	if(chk_addr_ret == IS_MULTICAST || chk_addr_ret == IS_BROADCAST)
+		sk->saddr = 0;  /* Use device */
+
+	/* Make sure we are allowed to bind here. */
+	if(sk->prot->verify_bind(sk, snum))
+		return -EADDRINUSE;
+
+	sk->num = snum;
+	sk->dummy_th.source = ntohs(sk->num);
+	sk->daddr = 0;
+	sk->dummy_th.dest = 0;
+	sk->prot->rehash(sk);
+	add_to_prot_sklist(sk);
 
-		remove_sock(sk);
-		if(sock->type==SOCK_DGRAM)
-			udp_cache_zap();
-		if(sock->type==SOCK_STREAM)
-			tcp_cache_zap();
-		put_sock(snum, sk);
-		sk->dummy_th.source = ntohs(sk->num);
-		sk->daddr = 0;
-		sk->dummy_th.dest = 0;
-	}
 	ip_rt_put(sk->ip_route_cache);
 	sk->ip_route_cache=NULL;
 	return(0);
@@ -961,23 +673,21 @@
 	int err;
 	sock->conn = NULL;
 
-	if (sock->state == SS_CONNECTING && tcp_connected(sk->state))
-	{
+	if (sock->state == SS_CONNECTING && tcp_connected(sk->state)) {
 		sock->state = SS_CONNECTED;
 		/* Connection completing after a connect/EINPROGRESS/select/connect */
 		return 0;	/* Rock and roll */
 	}
 
-	if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK))
-	{
+	if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK)) {
 		if(sk->err!=0)
 			return sock_error(sk);
 		return -EALREADY;	/* Connecting is currently in progress */
   	}
-	if (sock->state != SS_CONNECTING) 
-	{
+
+	if (sock->state != SS_CONNECTING) {
 		/* We may need to bind the socket. */
-		if(inet_autobind(sk)!=0)
+		if(inet_autobind(sk) != 0)
 			return(-EAGAIN);
 		if (sk->prot->connect == NULL) 
 			return(-EOPNOTSUPP);
@@ -987,8 +697,7 @@
   		sock->state = SS_CONNECTING;
 	}
 	
-	if (sk->state > TCP_FIN_WAIT2 && sock->state==SS_CONNECTING)
-	{
+	if (sk->state > TCP_FIN_WAIT2 && sock->state==SS_CONNECTING) {
 		sock->state=SS_UNCONNECTED;
 		return sock_error(sk);
 	}
@@ -997,18 +706,15 @@
 	  	return(-EINPROGRESS);
 
 	cli(); /* avoid the race condition */
-	while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) 
-	{
+	while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
 		interruptible_sleep_on(sk->sleep);
-		if (current->signal & ~current->blocked) 
-		{
+		if (current->signal & ~current->blocked) {
 			sti();
 			return(-ERESTARTSYS);
 		}
 		/* This fixes a nasty in the tcp/ip code. There is a hideous hassle with
 		   icmp error packets wanting to close a tcp or udp socket. */
-		if(sk->err && sk->protocol == IPPROTO_TCP)
-		{
+		if(sk->err && sk->protocol == IPPROTO_TCP) {
 			sock->state = SS_UNCONNECTED;
 			sti();
 			return sock_error(sk); /* set by tcp_err() */
@@ -1017,8 +723,7 @@
 	sti();
 	sock->state = SS_CONNECTED;
 
-	if (sk->state != TCP_ESTABLISHED && sk->err) 
-	{
+	if (sk->state != TCP_ESTABLISHED && sk->err) {
 		sock->state = SS_UNCONNECTED;
 		return sock_error(sk);
 	}
@@ -1049,8 +754,7 @@
 	 *	its own when it accepts one.
 	 */
 	 
-	if (newsock->data)
-	{
+	if (newsock->data) {
 	  	struct sock *sk=(struct sock *)newsock->data;
 	  	newsock->data=NULL;
 	  	destroy_sock(sk);
@@ -1063,18 +767,13 @@
 	 *	Restore the state if we have been interrupted, and then returned. 
 	 */
 	 
-	if (sk1->pair != NULL ) 
-	{
+	if (sk1->pair != NULL) {
 		sk2 = sk1->pair;
 		sk1->pair = NULL;
-	} 
-	else
-	{
+	} else {
 		sk2 = sk1->prot->accept(sk1,flags);
-		if (sk2 == NULL) 
-		{
+		if (sk2 == NULL)
 			return sock_error(sk1);
-		}
 	}
 	newsock->data = (void *)sk2;
 	sk2->sleep = newsock->wait;
@@ -1084,11 +783,9 @@
 		return(0);
 
 	cli(); /* avoid the race. */
-	while(sk2->state == TCP_SYN_RECV) 
-	{
+	while(sk2->state == TCP_SYN_RECV) {
 		interruptible_sleep_on(sk2->sleep);
-		if (current->signal & ~current->blocked) 
-		{
+		if (current->signal & ~current->blocked) {
 			sti();
 			sk1->pair = sk2;
 			sk2->sleep = NULL;
@@ -1099,15 +796,14 @@
 	}
 	sti();
 
-	if (sk2->state != TCP_ESTABLISHED && sk2->err > 0) 
-	{
+	if (sk2->state != TCP_ESTABLISHED && sk2->err > 0) {
 		err = sock_error(sk2);
 		destroy_sock(sk2);
 		newsock->data = NULL;
 		return err;
 	}
-	if (sk2->state == TCP_CLOSE)
-	{
+
+	if (sk2->state == TCP_CLOSE) {
 		destroy_sock(sk2);
 		newsock->data=NULL;
 		return -ECONNABORTED;
@@ -1129,15 +825,12 @@
   
 	sin->sin_family = AF_INET;
 	sk = (struct sock *) sock->data;
-	if (peer) 
-	{
+	if (peer) {
 		if (!tcp_connected(sk->state)) 
 			return(-ENOTCONN);
 		sin->sin_port = sk->dummy_th.dest;
 		sin->sin_addr.s_addr = sk->daddr;
-	} 
-	else 
-	{
+	} else {
 		__u32 addr = sk->rcv_saddr;
 		if (!addr) {
 			addr = sk->saddr;
@@ -1162,9 +855,11 @@
 		return(-EOPNOTSUPP);
 	if(sk->err)
 		return sock_error(sk);
+
 	/* We may need to bind the socket. */
-	if(inet_autobind(sk)!=0)
+	if(inet_autobind(sk) != 0)
 		return(-EAGAIN);
+
 	return(sk->prot->recvmsg(sk, ubuf, size, noblock, flags,addr_len));
 }
 
@@ -1173,8 +868,7 @@
 	   int flags)
 {
 	struct sock *sk = (struct sock *) sock->data;
-	if (sk->shutdown & SEND_SHUTDOWN) 
-	{
+	if (sk->shutdown & SEND_SHUTDOWN) {
 		send_sig(SIGPIPE, current, 1);
 		return(-EPIPE);
 	}
@@ -1182,9 +876,11 @@
 		return(-EOPNOTSUPP);
 	if(sk->err)
 		return sock_error(sk);
+
 	/* We may need to bind the socket. */
-	if(inet_autobind(sk)!=0)
+	if(inet_autobind(sk) != 0)
 		return -EAGAIN;
+
 	return(sk->prot->sendmsg(sk, msg, size, noblock, flags));
 			   
 }
@@ -1218,9 +914,8 @@
 {
 	struct sock *sk=(struct sock *) sock->data;
 	if (sk->prot->select == NULL) 
-	{
 		return(0);
-	}
+
 	return(sk->prot->select(sk, sel_type, wait));
 }
 
@@ -1346,6 +1041,12 @@
 			   (cmd <= (SIOCDEVPRIVATE + 15)))
 				return(dev_ioctl(cmd,(void *) arg));
 
+#ifdef CONFIG_NET_RADIO
+			if((cmd >= SIOCIWFIRST) &&
+			   (cmd <= SIOCIWLAST))
+				return(dev_ioctl(cmd,(void *) arg));
+#endif	/* CONFIG_NET_RADIO */
+
 			if (sk->prot->ioctl==NULL) 
 				return(-EINVAL);
 			return(sk->prot->ioctl(sk, cmd, arg));
@@ -1354,222 +1055,6 @@
 	return(0);
 }
 
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-/*
- * Some routines for the for loop in get_sock which sometimes needs to walk
- * two linked lists in sequence.  Could use macros as well.
- * Does anyone know a nicer way to code this?
- */
-static __inline__ struct sock *secondlist(unsigned short hpnum, struct sock *s,
-				int *pfirstpass, struct proto *prot)
-{
-	if (hpnum && s == NULL && (*pfirstpass)-- )
-		return prot->sock_array[hpnum & (SOCK_ARRAY_SIZE - 1)];
-	else
-		return s;
-}
-static __inline__ struct sock *get_sock_loop_init(unsigned short hnum,
-			unsigned short hpnum, struct sock *s,
-			int *pfirstpass, struct proto *prot)
-{
-	s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)];
-	return secondlist(hpnum, s, pfirstpass, prot);
-}
-static __inline__ struct sock *get_sock_loop_next(unsigned short hnum,
-			unsigned short hpnum, struct sock *s,
-			int *pfirstpass, struct proto *prot)
-{
-	s = s->next;
-	return secondlist(hpnum, s, pfirstpass, prot);
-}
-#endif
-
-/*
- * This routine must find a socket given a TCP or UDP header.
- * Everything is assumed to be in net order.
- *
- * We give priority to more closely bound ports: if some socket
- * is bound to a particular foreign address, it will get the packet
- * rather than somebody listening to any address..
- */
-
-struct sock *get_sock(struct proto *prot, unsigned short num,
-				unsigned long raddr,
-				unsigned short rnum, unsigned long laddr,
-				unsigned long paddr, unsigned short pnum)
-{
-	struct sock *s = 0;
-	struct sock *result = NULL;
-	int badness = -1;
-	unsigned short hnum;
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-	unsigned short hpnum;
-	int firstpass = 1;
-#endif 
-
-	hnum = ntohs(num);
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-	hpnum = ntohs(pnum);
-#endif 
-
-	/*
-	 * SOCK_ARRAY_SIZE must be a power of two.  This will work better
-	 * than a prime unless 3 or more sockets end up using the same
-	 * array entry.  This should not be a problem because most
-	 * well known sockets don't overlap that much, and for
-	 * the other ones, we can just be careful about picking our
-	 * socket number when we choose an arbitrary one.
-	 */
-
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-	for(s = get_sock_loop_init(hnum, hpnum, s, &firstpass, prot);
-		s != NULL;
-		s = get_sock_loop_next(hnum, hpnum, s, &firstpass, prot))
-#else
-	for(s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)];
-			s != NULL; s = s->next) 
-#endif
-	{
-		int score = 0;
-
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-		/* accept the addressed port or the redirect (proxy) port */
-		if (s->num != hnum && (hpnum == 0 || s->num != hpnum))
-#else
-		if (s->num != hnum) 
-#endif
-			continue;
-
-		if(s->dead && (s->state == TCP_CLOSE))
-			continue;
-		/* local address matches? */
-		if (s->rcv_saddr) {
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-			/*
-			 * If this is redirected traffic, it must either
-			 * match on the redirected port/ip-address or on
-			 * the actual destination, not on a mixture.
-			 * There must be a simpler way to express this...
-			 */
-			if (hpnum
-			    ? ((s->num != hpnum || s->rcv_saddr != paddr)
-			      && (s->num != hnum || s->rcv_saddr != laddr))
-			    : (s->rcv_saddr != laddr))
-#else
-			if (s->rcv_saddr != laddr)
-#endif
-				continue;
-			score++;
-		}
-		/* remote address matches? */
-		if (s->daddr) {
-			if (s->daddr != raddr)
-				continue;
-			score++;
-		}
-		/* remote port matches? */
-		if (s->dummy_th.dest) {
-			if (s->dummy_th.dest != rnum)
-				continue;
-			score++;
-		}
-		/* perfect match? */
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-		if (score == 3 && s->num == hnum)
-#else
-		if (score == 3)
-#endif
-			return s;
-		/* no, check if this is the best so far.. */
-		if (score <= badness)
-			continue;
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-		/* don't accept near matches on the actual destination
-		 * port with IN_ADDR_ANY for redirected traffic, but do
-		 * allow explicit remote address listens.  (disputable)
-		 */
-		if (hpnum && s->num != hpnum && !s->rcv_saddr)
-			continue;
-#endif
-		result = s;
-		badness = score;
-  	}
-  	return result;
-}
-
-/*
- *	Deliver a datagram to raw sockets.
- */
- 
-struct sock *get_sock_raw(struct sock *sk, 
-				unsigned short num,
-				unsigned long raddr,
-				unsigned long laddr)
-{
-	struct sock *s;
-
-	s=sk;
-
-	for(; s != NULL; s = s->next) 
-	{
-		if (s->num != num) 
-			continue;
-		if(s->dead && (s->state == TCP_CLOSE))
-			continue;
-		if(s->daddr && s->daddr!=raddr)
-			continue;
- 		if(s->rcv_saddr && s->rcv_saddr != laddr)
-			continue;
-		return(s);
-  	}
-  	return(NULL);
-}
-
-#ifdef CONFIG_IP_MULTICAST
-/*
- *	Deliver a datagram to broadcast/multicast sockets.
- */
- 
-struct sock *get_sock_mcast(struct sock *sk, 
-				unsigned short num,
-				unsigned long raddr,
-				unsigned short rnum, unsigned long laddr)
-{
-	struct sock *s;
-	unsigned short hnum;
-
-	hnum = ntohs(num);
-
-	/*
-	 * SOCK_ARRAY_SIZE must be a power of two.  This will work better
-	 * than a prime unless 3 or more sockets end up using the same
-	 * array entry.  This should not be a problem because most
-	 * well known sockets don't overlap that much, and for
-	 * the other ones, we can just be careful about picking our
-	 * socket number when we choose an arbitrary one.
-	 */
-	
-	s=sk;
-
-	for(; s != NULL; s = s->next) 
-	{
-		if (s->num != hnum) 
-			continue;
-		if(s->dead && (s->state == TCP_CLOSE))
-			continue;
-		if(s->daddr && s->daddr!=raddr)
-			continue;
-		if (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) 
-			continue;
- 		if(s->rcv_saddr  && s->rcv_saddr != laddr)
-			continue;
-		return(s);
-  	}
-  	return(NULL);
-}
-
-#endif
-
 static struct proto_ops inet_proto_ops = {
 	AF_INET,
 
@@ -1601,8 +1086,6 @@
 void inet_proto_init(struct net_proto *pro)
 {
 	struct inet_protocol *p;
-	int i;
-
 
 	printk("Swansea University Computer Society TCP/IP for NET3.034\n");
 
@@ -1618,19 +1101,6 @@
 	 *	Add all the protocols. 
 	 */
 	 
-	for(i = 0; i < SOCK_ARRAY_SIZE; i++) 
-	{
-		tcp_prot.sock_array[i] = NULL;
-		udp_prot.sock_array[i] = NULL;
-		raw_prot.sock_array[i] = NULL;
-  	}
-	tcp_prot.inuse = 0;
-	tcp_prot.highestinuse = 0;
-	udp_prot.inuse = 0;
-	udp_prot.highestinuse = 0;
-	raw_prot.inuse = 0;
-	raw_prot.highestinuse = 0;
-
 	printk("IP Protocols: ");
 	for(p = inet_protocol_base; p != NULL;) 
 	{

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov