Index: sys/sys/callout.h =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/sys/callout.h,v retrieving revision 1.17 diff -u -r1.17 callout.h --- sys/sys/callout.h 4 Feb 2003 01:21:06 -0000 1.17 +++ sys/sys/callout.h 17 Jul 2003 07:42:27 -0000 @@ -83,6 +83,7 @@ #define CALLOUT_PENDING 0x0002 /* callout is on the queue */ #define CALLOUT_FIRED 0x0004 /* callout has fired */ +#define CALLOUT_INVOKING 0x0008 /* callout function is being invoked */ #define CALLOUT_INITIALIZER_SETFUNC(func, arg) \ { { NULL, NULL }, func, arg, 0, 0 } @@ -100,6 +101,8 @@ #define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) #define callout_expired(c) ((c)->c_flags & CALLOUT_FIRED) +#define callout_invoking(c) ((c)->c_flags & CALLOUT_INVOKING) +#define callout_ack(c) ((c)->c_flags &= ~CALLOUT_INVOKING) #endif /* _KERNEL */ #endif /* !_SYS_CALLOUT_H_ */ Index: sys/kern/kern_timeout.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/kern/kern_timeout.c,v retrieving revision 1.7 diff -u -r1.7 kern_timeout.c --- sys/kern/kern_timeout.c 14 Jul 2003 14:59:01 -0000 1.7 +++ sys/kern/kern_timeout.c 17 Jul 2003 07:43:20 -0000 @@ -258,7 +258,7 @@ /* Initialize the time here, it won't change. */ old_time = c->c_time; c->c_time = to_ticks + hardclock_ticks; - c->c_flags &= ~CALLOUT_FIRED; + c->c_flags &= ~(CALLOUT_FIRED|CALLOUT_INVOKING); c->c_func = func; c->c_arg = arg; @@ -299,7 +299,7 @@ /* Initialize the time here, it won't change. */ old_time = c->c_time; c->c_time = to_ticks + hardclock_ticks; - c->c_flags &= ~CALLOUT_FIRED; + c->c_flags &= ~(CALLOUT_FIRED|CALLOUT_INVOKING); /* * If this timeout is already scheduled and now is moved @@ -334,7 +334,7 @@ if (callout_pending(c)) CIRCQ_REMOVE(&c->c_list); - c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); + c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED|CALLOUT_INVOKING); CALLOUT_UNLOCK(s); } @@ -393,7 +393,7 @@ callout_ev_late.ev_count++; #endif c->c_flags = (c->c_flags & ~CALLOUT_PENDING) | - CALLOUT_FIRED; + (CALLOUT_FIRED|CALLOUT_INVOKING); func = c->c_func; arg = c->c_arg; Index: sys/netinet/tcp_input.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_input.c,v retrieving revision 1.172 diff -u -r1.172 tcp_input.c --- sys/netinet/tcp_input.c 2 Jul 2003 19:33:20 -0000 1.172 +++ sys/netinet/tcp_input.c 17 Jul 2003 07:45:37 -0000 @@ -2799,7 +2799,10 @@ (void) m_free((sc)->sc_ipopts); \ if ((sc)->sc_route4.ro_rt != NULL) \ RTFREE((sc)->sc_route4.ro_rt); \ - pool_put(&syn_cache_pool, (sc)); \ + if (callout_invoking(&(sc)->sc_timer)) \ + (sc)->sc_flags |= SCF_DEAD; \ + else \ + pool_put(&syn_cache_pool, (sc)); \ } while (/*CONSTCOND*/0) struct pool syn_cache_pool; @@ -2946,6 +2949,14 @@ int s; s = splsoftnet(); + callout_ack(&sc->sc_timer); + + if (__predict_false(sc->sc_flags & SCF_DEAD)) { + tcpstat.tcps_sc_delayed_free++; + pool_put(&syn_cache_pool, sc); + splx(s); + return; + } if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) { /* Drop it -- too many retransmissions. */ Index: sys/netinet/tcp_subr.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_subr.c,v retrieving revision 1.143 diff -u -r1.143 tcp_subr.c --- sys/netinet/tcp_subr.c 3 Jul 2003 08:28:16 -0000 1.143 +++ sys/netinet/tcp_subr.c 17 Jul 2003 07:48:14 -0000 @@ -1031,6 +1031,32 @@ } /* + * Return whether this tcpcb is marked as dead, indicating + * to the calling timer function that no further action should + * be taken, as we are about to release this tcpcb. The release + * of the storage will be done if this is the last timer running. + * + * This is typically called from the callout handler function before + * callout_ack() is done, therefore we need to test the number of + * running timer functions against 1 below, not 0. + */ +int +tcp_isdead(tp) + struct tcpcb *tp; +{ + int dead = (tp->t_flags & TF_DEAD); + + if (__predict_false(dead)) { + if (tcp_timers_invoking(tp) > 1) + /* not quite there yet -- count separately? */ + return dead; + tcpstat.tcps_delayed_free++; + pool_put(&tcpcb_pool, tp); + } + return dead; +} + +/* * Close a TCP control block: * discard all space held by the tcp * discard internet protocol block @@ -1148,7 +1174,11 @@ m_free(tp->t_template); tp->t_template = NULL; } - pool_put(&tcpcb_pool, tp); + if (tcp_timers_invoking(tp)) + tp->t_flags |= TF_DEAD; + else + pool_put(&tcpcb_pool, tp); + if (inp) { inp->inp_ppcb = 0; soisdisconnected(so); Index: sys/netinet/tcp_timer.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_timer.c,v retrieving revision 1.62 diff -u -r1.62 tcp_timer.c --- sys/netinet/tcp_timer.c 3 Feb 2003 23:51:04 -0000 1.62 +++ sys/netinet/tcp_timer.c 17 Jul 2003 08:22:41 -0000 @@ -196,6 +196,24 @@ } /* + * Return how many timers are currently being invoked. + */ +int +tcp_timers_invoking(struct tcpcb *tp) +{ + int i; + int count = 0; + + for (i = 0; i < TCPT_NTIMERS; i++) + if (callout_invoking(&tp->t_timer[i])) + count++; + if (callout_invoking(&tp->t_delack_ch)) + count++; + + return count; +} + +/* * Callout to process delayed ACKs for a TCPCB. */ void @@ -211,6 +229,12 @@ */ s = splsoftnet(); + callout_ack(&tp->t_delack_ch); + if (tcp_isdead(tp)) { + splx(s); + return; + } + tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); splx(s); @@ -267,6 +291,11 @@ #endif s = splsoftnet(); + callout_ack(&tp->t_timer[TCPT_KEEP]); + if (tcp_isdead(tp)) { + splx(s); + return; + } #ifdef TCP_DEBUG #ifdef INET @@ -414,6 +443,11 @@ #endif s = splsoftnet(); + callout_ack(&tp->t_timer[TCPT_PERSIST]); + if (tcp_isdead(tp)) { + splx(s); + return; + } #ifdef TCP_DEBUG #ifdef INET @@ -476,6 +510,11 @@ #endif s = splsoftnet(); + callout_ack(&tp->t_timer[TCPT_KEEP]); + if (tcp_isdead(tp)) { + splx(s); + return; + } #ifdef TCP_DEBUG ostate = tp->t_state; @@ -558,6 +597,11 @@ #endif s = splsoftnet(); + callout_ack(&tp->t_timer[TCPT_2MSL]); + if (tcp_isdead(tp)) { + splx(s); + return; + } #ifdef TCP_DEBUG #ifdef INET Index: sys/netinet/tcp_var.h =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/sys/netinet/tcp_var.h,v retrieving revision 1.102 diff -u -r1.102 tcp_var.h --- sys/netinet/tcp_var.h 29 Jun 2003 22:32:00 -0000 1.102 +++ sys/netinet/tcp_var.h 17 Jul 2003 07:51:57 -0000 @@ -185,6 +185,7 @@ #define TF_CANT_TXSACK 0x1000 /* other side said I could not SACK */ #define TF_IGNR_RXSACK 0x2000 /* ignore received SACK blocks */ #define TF_REASSEMBLING 0x4000 /* we're busy reassembling */ +#define TF_DEAD 0x8000 /* dead and to-be-released */ struct mbuf *t_template; /* skeletal packet for transmit */ @@ -417,6 +418,7 @@ #define SCF_UNREACH 0x0001 /* we've had an unreach error */ #define SCF_TIMESTAMP 0x0002 /* peer will do timestamps */ +#define SCF_DEAD 0x0004 /* this entry to be released */ struct mbuf *sc_ipopts; /* IP options */ u_int16_t sc_peermaxseg; @@ -547,6 +549,7 @@ u_quad_t tcps_noport; /* no socket on port */ u_quad_t tcps_badsyn; /* received ack for which we have no SYN in compressed state */ + u_quad_t tcps_delayed_free; /* delayed pool_put() of tcpcb */ /* These statistics deal with the SYN cache. */ u_quad_t tcps_sc_added; /* # of entries added */ @@ -561,6 +564,7 @@ u_quad_t tcps_sc_dropped; /* # of SYNs dropped (no route/mem) */ u_quad_t tcps_sc_collisions; /* # of hash collisions */ u_quad_t tcps_sc_retransmitted; /* # of retransmissions */ + u_quad_t tcps_sc_delayed_free; /* # of delayed pool_put()s */ u_quad_t tcps_selfquench; /* # of ENOBUFS we get on output */ }; @@ -706,8 +710,10 @@ int tcp_attach __P((struct socket *)); void tcp_canceltimers __P((struct tcpcb *)); +int tcp_timers_invoking __P((struct tcpcb*)); struct tcpcb * tcp_close __P((struct tcpcb *)); +int tcp_isdead __P((struct tcpcb *)); #ifdef INET6 void tcp6_ctlinput __P((int, struct sockaddr *, void *)); #endif Index: usr.bin/netstat/inet.c =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/usr.bin/netstat/inet.c,v retrieving revision 1.56 diff -u -r1.56 inet.c --- usr.bin/netstat/inet.c 12 Jul 2003 13:39:23 -0000 1.56 +++ usr.bin/netstat/inet.c 13 Jul 2003 15:15:24 -0000 @@ -263,6 +263,7 @@ p2(tcps_closed, tcps_drops, "\t%llu connection%s closed (including %llu drop%s)\n"); p(tcps_conndrops, "\t%llu embryonic connection%s dropped\n"); + p(tcps_delayed_free, "\t%llu delayed free%s of tcpcb\n"); p2(tcps_rttupdated, tcps_segstimed, "\t%llu segment%s updated rtt (of %llu attempt%s)\n"); p(tcps_rexmttimeo, "\t%llu retransmit timeout%s\n"); @@ -292,6 +293,8 @@ ps(tcps_sc_bucketoverflow, "\t\t%llu dropped due to bucket overflow\n"); ps(tcps_sc_reset, "\t\t%llu dropped due to RST\n"); ps(tcps_sc_unreach, "\t\t%llu dropped due to ICMP unreachable\n"); + ps(tcps_sc_delayed_free, "\t\t%llu delayed free of SYN cache " + "entries\n"); p(tcps_sc_retransmitted, "\t%llu SYN,ACK%s retransmitted\n"); p(tcps_sc_dupesyn, "\t%llu duplicate SYN%s received for entries " "already in the cache\n"); Index: share/man/man9/callout.9 =================================================================== RCS file: /usr/users/he/nbcvs/netbsd/src/share/man/man9/callout.9,v retrieving revision 1.9 diff -u -r1.9 callout.9 --- share/man/man9/callout.9 16 Apr 2003 13:35:25 -0000 1.9 +++ share/man/man9/callout.9 17 Jul 2003 07:54:41 -0000 @@ -43,6 +43,9 @@ .Nm callout_schedule , .Nm callout_setfunc , .Nm callout_stop , +.Nm callout_expired , +.Nm callout_invoking , +.Nm callout_ack , .Nm CALLOUT_INITIALIZER , .Nm CALLOUT_INITIALIZER_SETFUNC .Nd execute a function after a specified length of time @@ -63,6 +66,10 @@ .Fn "callout_pending" "struct callout *c" .Ft int .Fn "callout_expired" "struct callout *c" +.Ft int +.Fn "callout_invoking" "struct callout *c" +.Ft void +.Fn "callout_ack" "struct callout *c" .Fd CALLOUT_INITIALIZER .Pp .Fd CALLOUT_INITIALIZER_SETFUNC(func, arg) @@ -117,8 +124,10 @@ Once the timer is started, the callout handle is marked as .Em PENDING . Once the timer expires, -the handle is marked at +the handle is marked as .Em EXPIRED +and +.Em RUNNING and the .Em PENDING status is cleared. @@ -153,11 +162,11 @@ function stops the timer associated the callout handle .Fa c . The -.Em PENDING +.Em PENDING , +.Em EXPIRED , +and +.Em RUNNING status for the callout handle is cleared. -The -.Em EXPIRED -status is not affected. It is safe to call .Fn callout_stop on a callout handle that is not pending, so long as it is initialized. @@ -183,6 +192,42 @@ .Fn callout_expired function tests to see if the callout's timer has expired and its function called. +.Pp +The +.Fn callout_invoking +function tests the +.Em INVOKING +status of the callout handle +.Fa c . +This flag is set just before a callout's function is being called. +Since the priority level is lowered prior to invocation of the +callout function, other pending higher-priority code may run before +the callout function is allowed to run. +This may create a race condition if this higher-priority code +deallocates storage containing one or more callout structures whose +callout functions are about to be run. +In such cases, one technique to prevent references to deallocated +storage would be to test whether any callout functions are in the +.Em INVOKING +state using +.Fn callout_invoking , +and if so, to mark the data structure and defer storage +deallocation until the callout function is allowed to run. +For this handshake protocol to work, the callout function will +have to use the +.Fn callout_ack +function to clear this flag. +.Pp +The +.Fn callout_ack +function clears the +.Em INVOKING +state in the callout handle +.Fa c . +This is used in situations where it is necessary to protect against +the race condition described under +.Fn callout_invoking . +The .Sh SEE ALSO .Xr hz 9 .Sh HISTORY