diff --git a/include/net/gen/arp_io.h b/include/net/gen/arp_io.h new file mode 100644 index 000000000..583cd6f31 --- /dev/null +++ b/include/net/gen/arp_io.h @@ -0,0 +1,23 @@ +/* +net/gen/arp_io.h + +Created: Jan 2001 by Philip Homburg +*/ + +typedef struct nwio_arp +{ + int nwa_entno; + u32_t nwa_flags; + ipaddr_t nwa_ipaddr; + ether_addr_t nwa_ethaddr; +} nwio_arp_t; + +#define NWAF_EMPTY 0 +#define NWAF_INCOMPLETE 1 +#define NWAF_DEAD 2 +#define NWAF_PERM 4 +#define NWAF_PUB 8 + +/* + * $PchId: arp_io.h,v 1.2 2004/08/03 11:01:59 philip Exp $ + */ diff --git a/include/net/gen/ether.h b/include/net/gen/ether.h index d2ccc3566..690e76570 100755 --- a/include/net/gen/ether.h +++ b/include/net/gen/ether.h @@ -19,7 +19,17 @@ typedef struct ether_addr typedef u16_t ether_type_t; typedef U16_t Ether_type_t; -#define ETH_ARP_PROTO 0x806 -#define ETH_IP_PROTO 0x800 +#define ETH_ARP_PROTO 0x806 +#define ETH_IP_PROTO 0x800 +#define ETH_VLAN_PROTO 0x8100 + +/* Tag Control Information field for VLAN and Priority tagging */ +#define ETH_TCI_PRIO_MASK 0xe000 +#define ETH_TCI_CFI 0x1000 /* Canonical Formal Indicator */ +#define ETH_TCI_VLAN_MASK 0x0fff /* 12-bit vlan number */ #endif /* __SERVER__IP__GEN__ETHER_H__ */ + +/* + * $PchId: ether.h,v 1.6 2005/01/27 17:33:35 philip Exp $ + */ diff --git a/include/net/gen/icmp.h b/include/net/gen/icmp.h index cb3ae37b4..efa081999 100755 --- a/include/net/gen/icmp.h +++ b/include/net/gen/icmp.h @@ -5,7 +5,7 @@ server/ip/gen/icmp.h #ifndef __SERVER__IP__GEN__ICMP_H__ #define __SERVER__IP__GEN__ICMP_H__ -#define ICMP_MIN_HDR_LEN 4 +#define ICMP_MIN_HDR_SIZE 4 #define ICMP_TYPE_ECHO_REPL 0 #define ICMP_TYPE_DST_UNRCH 3 @@ -33,8 +33,17 @@ server/ip/gen/icmp.h #define ICMP_TYPE_INFO_REQ 15 #define ICMP_TYPE_INFO_REPL 16 +/* Preferences for router advertisements. A router daemon installs itself + * as the default router in the router's interfaces by sending router + * advertisements to localhost with preference ICMP_RA_LOCAL_PREF. + */ +#define ICMP_RA_DEFAULT_PREF 0x00000000 +#define ICMP_RA_INVAL_PREF 0x80000000 +#define ICMP_RA_MAX_PREF 0x7fffffff +#define ICMP_RA_LOCAL_PREF 0x10000000 + #endif /* __SERVER__IP__GEN__ICMP_H__ */ /* - * $PchId: icmp.h,v 1.5 1995/11/17 22:38:46 philip Exp $ + * $PchId: icmp.h,v 1.6 2002/06/10 07:10:26 philip Exp $ */ diff --git a/include/net/gen/icmp_hdr.h b/include/net/gen/icmp_hdr.h index dbd5dd0f0..8e6571dce 100755 --- a/include/net/gen/icmp_hdr.h +++ b/include/net/gen/icmp_hdr.h @@ -29,6 +29,12 @@ typedef struct icmp_pp u8_t ipp_unused[3]; } icmp_pp_t; +typedef struct icmp_mtu /* RFC 1191 */ +{ + u16_t im_unused; + u16_t im_mtu; +} icmp_mtu_t; + typedef struct icmp_hdr { u8_t ih_type, ih_code; @@ -40,6 +46,7 @@ typedef struct icmp_hdr ipaddr_t ihh_gateway; icmp_ram_t ihh_ram; icmp_pp_t ihh_pp; + icmp_mtu_t ihh_mtu; } ih_hun; union { @@ -51,5 +58,5 @@ typedef struct icmp_hdr #endif /* __SERVER__IP__GEN__ICMP_HDR_H__ */ /* - * $PchId: icmp_hdr.h,v 1.4 1995/11/17 22:28:58 philip Exp $ + * $PchId: icmp_hdr.h,v 1.5 2002/06/10 07:10:48 philip Exp $ */ diff --git a/include/net/gen/in.h b/include/net/gen/in.h index d86ee4e60..2c5b269a4 100755 --- a/include/net/gen/in.h +++ b/include/net/gen/in.h @@ -8,9 +8,11 @@ server/ip/gen/in.h #define IP_MIN_HDR_SIZE 20 #define IP_MAX_HDR_SIZE 60 /* 15 * 4 */ #define IP_VERSION 4 +#define IP_DEF_TTL 64 #define IP_MAX_TTL 255 -#define IP_DEF_MSS 576 -#define IP_MAX_PACKSIZE 40000 /* 8192 */ +#define IP_DEF_MTU 576 +#define IP_MIN_MTU (IP_MAX_HDR_SIZE+8) +#define IP_MAX_PACKSIZE 40000 /* Note: this restriction is not part of the IP-protocol but introduced by this implementation. */ @@ -18,6 +20,8 @@ server/ip/gen/in.h #define IPPROTO_TCP 6 #define IPPROTO_UDP 17 +#define IP_MC_ALL_SYSTEMS 0xE0000001 /* 224.0.0.1 */ + typedef u32_t ipaddr_t; typedef u8_t ipproto_t; typedef struct ip_hdropt @@ -29,5 +33,5 @@ typedef struct ip_hdropt #endif /* __SERVER__IP__GEN__IN_H__ */ /* - * $PchId: in.h,v 1.3 1995/11/17 22:27:50 philip Exp $ + * $PchId: in.h,v 1.6 2002/06/10 07:11:15 philip Exp $ */ diff --git a/include/net/gen/ip_hdr.h b/include/net/gen/ip_hdr.h index ba1c5336c..f56205281 100755 --- a/include/net/gen/ip_hdr.h +++ b/include/net/gen/ip_hdr.h @@ -29,14 +29,19 @@ typedef struct ip_hdr #define IP_OPT_COPIED 0x80 #define IP_OPT_NUMBER 0x1f -#define IP_OPT_EOL 0x00 -#define IP_OPT_NOP 0x01 -#define IP_OPT_LSRR 0x83 -#define IP_OPT_RR 0x07 +#define IP_OPT_EOL 0x00 /* End of Options List, RFC-791 */ +#define IP_OPT_NOP 0x01 /* No Operation, RFC-791 */ +#define IP_OPT_RR 0x07 /* Record Route, RFC-791 */ +#define IP_OPT_TS 0x44 /* Timestamp, RFC-791 */ +#define IP_OPT_SEC 0x82 /* Security, RFC-1108 */ +#define IP_OPT_LSRR 0x83 /* Loose Source Route, RFC-791 */ +#define IP_OPT_SSRR 0x89 /* Strict Source Route, RFC-791 */ +#define IP_OPT_RTRALT 0x94 /* Router Alert, RFC-2113 */ + #define IP_OPT_RR_MIN 4 #endif /* __SERVER__IP__GEN__HDR_H__ */ /* - * $PchId: ip_hdr.h,v 1.4 1995/11/17 22:26:00 philip Exp $ + * $PchId: ip_hdr.h,v 1.5 2002/06/10 07:11:46 philip Exp $ */ diff --git a/include/net/gen/ip_io.h b/include/net/gen/ip_io.h index 97fac609a..07e87f088 100755 --- a/include/net/gen/ip_io.h +++ b/include/net/gen/ip_io.h @@ -5,17 +5,26 @@ server/ip/gen/ip_io.h #ifndef __SERVER__IP__GEN__IP_IO_H__ #define __SERVER__IP__GEN__IP_IO_H__ +typedef struct nwio_ipconf2 +{ + u32_t nwic_flags; + ipaddr_t nwic_ipaddr; + ipaddr_t nwic_netmask; +} nwio_ipconf2_t; + typedef struct nwio_ipconf { u32_t nwic_flags; ipaddr_t nwic_ipaddr; ipaddr_t nwic_netmask; + u16_t nwic_mtu; } nwio_ipconf_t; #define NWIC_NOFLAGS 0x0 -#define NWIC_FLAGS 0x3 +#define NWIC_FLAGS 0x7 # define NWIC_IPADDR_SET 0x1 # define NWIC_NETMASK_SET 0x2 +# define NWIC_MTU_SET 0x4 typedef struct nwio_ipopt { @@ -53,3 +62,7 @@ typedef struct nwio_ipopt # define NWIO_RWDATALL 0x10000000l #endif /* __SERVER__IP__GEN__IP_IO_H__ */ + +/* + * $PchId: ip_io.h,v 1.5 2001/03/12 22:17:25 philip Exp $ + */ diff --git a/include/net/gen/psip_hdr.h b/include/net/gen/psip_hdr.h index 5f85406a2..5dc8bc4e4 100755 --- a/include/net/gen/psip_hdr.h +++ b/include/net/gen/psip_hdr.h @@ -9,6 +9,7 @@ typedef struct psip_io_hdr { u8_t pih_flags; u8_t pih_dummy[3]; + u32_t pih_nexthop; } psip_io_hdr_t; #define PF_LOC_REM_MASK 1 @@ -18,5 +19,5 @@ typedef struct psip_io_hdr #endif /* __SERVER__IP__GEN__PSIP_HDR_H__ */ /* - * $PchId: psip_hdr.h,v 1.2 1995/11/17 22:22:35 philip Exp $ + * $PchId: psip_hdr.h,v 1.3 2001/02/19 07:35:38 philip Exp $ */ diff --git a/include/net/gen/psip_io.h b/include/net/gen/psip_io.h index 335798342..ba71a4eea 100755 --- a/include/net/gen/psip_io.h +++ b/include/net/gen/psip_io.h @@ -13,9 +13,12 @@ typedef struct nwio_psipopt #define NWPO_PROMISC_MASK 0x0001L #define NWPO_EN_PROMISC 0x00000001L #define NWUO_DI_PROMISC 0x00010000L +#define NWPO_NEXTHOP_MASK 0x0002L +#define NWPO_EN_NEXTHOP 0x00000002L +#define NWUO_DI_NEXTHOP 0x00020000L #endif /* __SERVER__IP__GEN__PSIP_IO_H__ */ /* - * $PchId: psip_io.h,v 1.2 1995/11/17 22:22:16 philip Exp $ + * $PchId: psip_io.h,v 1.3 2001/02/19 07:35:58 philip Exp $ */ diff --git a/include/net/gen/tcp_hdr.h b/include/net/gen/tcp_hdr.h index b0a42b2a8..f2d93780c 100755 --- a/include/net/gen/tcp_hdr.h +++ b/include/net/gen/tcp_hdr.h @@ -34,12 +34,16 @@ typedef struct tcp_hdropt u8_t tho_data[TCP_MAX_HDR_SIZE-TCP_MIN_HDR_SIZE]; } tcp_hdropt_t; -#define TCP_OPT_EOL 0 -#define TCP_OPT_NOP 1 -#define TCP_OPT_MSS 2 +#define TCP_OPT_EOL 0 +#define TCP_OPT_NOP 1 +#define TCP_OPT_MSS 2 +#define TCP_OPT_WSOPT 3 /* RFC-1323, window scale option */ +#define TCP_OPT_SACKOK 4 /* RFC-2018, SACK permitted */ +#define TCP_OPT_TS 8 /* RFC-1323, Timestamps option */ +#define TCP_OPT_CCNEW 12 /* RFC-1644, new connection count */ #endif /* __SERVER__IP__GEN__TCP_HDR_H__ */ /* - * $PchId: tcp_hdr.h,v 1.3 1995/11/17 22:18:13 philip Exp $ + * $PchId: tcp_hdr.h,v 1.4 2002/06/10 07:12:22 philip Exp $ */ diff --git a/include/net/gen/tcp_io.h b/include/net/gen/tcp_io.h index b41eb64ac..62339d684 100755 --- a/include/net/gen/tcp_io.h +++ b/include/net/gen/tcp_io.h @@ -58,9 +58,12 @@ typedef struct nwio_tcpopt # define NWTO_NOTBSD_URG 0x00040000L #define NWTO_DEL_RST_MASK 0x0008L # define NWTO_DEL_RST 0x00000008L +#define NWTO_BULK_MASK 0x0010L +# define NWTO_BULK 0x00000010L +# define NWTO_NOBULK 0x00100000L #endif /* __SERVER__IP__GEN__TCP_IO_H__ */ /* - * $PchId: tcp_io.h,v 1.4 1995/11/17 22:17:47 philip Exp $ + * $PchId: tcp_io.h,v 1.5 2001/02/19 07:36:55 philip Exp $ */ diff --git a/include/net/ioctl.h b/include/net/ioctl.h index e412809bc..33bb22de9 100755 --- a/include/net/ioctl.h +++ b/include/net/ioctl.h @@ -13,7 +13,14 @@ #define NWIOGETHOPT _IOR('n', 17, struct nwio_ethopt) #define NWIOGETHSTAT _IOR('n', 18, struct nwio_ethstat) +#define NWIOARPGIP _IORW('n',20, struct nwio_arp) +#define NWIOARPGNEXT _IORW('n',21, struct nwio_arp) +#define NWIOARPSIP _IOW ('n',22, struct nwio_arp) +#define NWIOARPDIP _IOW ('n',23, struct nwio_arp) + +#define NWIOSIPCONF2 _IOW('n', 32, struct nwio_ipconf2) #define NWIOSIPCONF _IOW('n', 32, struct nwio_ipconf) +#define NWIOGIPCONF2 _IOR('n', 33, struct nwio_ipconf2) #define NWIOGIPCONF _IOR('n', 33, struct nwio_ipconf) #define NWIOSIPOPT _IOW('n', 34, struct nwio_ipopt) #define NWIOGIPOPT _IOR('n', 35, struct nwio_ipopt) @@ -21,7 +28,6 @@ #define NWIOGIPOROUTE _IORW('n', 40, struct nwio_route) #define NWIOSIPOROUTE _IOW ('n', 41, struct nwio_route) #define NWIODIPOROUTE _IOW ('n', 42, struct nwio_route) - #define NWIOGIPIROUTE _IORW('n', 43, struct nwio_route) #define NWIOSIPIROUTE _IOW ('n', 44, struct nwio_route) #define NWIODIPIROUTE _IOW ('n', 45, struct nwio_route) @@ -34,11 +40,19 @@ #define NWIOTCPSHUTDOWN _IO ('n', 53) #define NWIOSTCPOPT _IOW('n', 54, struct nwio_tcpopt) #define NWIOGTCPOPT _IOR('n', 55, struct nwio_tcpopt) +#define NWIOTCPPUSH _IO ('n', 56) #define NWIOSUDPOPT _IOW('n', 64, struct nwio_udpopt) #define NWIOGUDPOPT _IOR('n', 65, struct nwio_udpopt) +#define NWIOUDPPEEK _IOR('n', 66, struct udp_io_hdr) #define NWIOSPSIPOPT _IOW('n', 80, struct nwio_psipopt) #define NWIOGPSIPOPT _IOR('n', 81, struct nwio_psipopt) +#define NWIOQUERYPARAM _IORW('n',96, struct svrqueryparam) + #endif /* _NET__IOCTL_H */ + +/* + * $PchId: ioctl.h,v 1.2 2003/07/25 14:34:03 philip Exp $ + */ diff --git a/servers/inet/Makefile b/servers/inet/Makefile index c7a1ecbe4..6e7307861 100644 --- a/servers/inet/Makefile +++ b/servers/inet/Makefile @@ -1,47 +1,40 @@ -# Makefile for Network Server (INET). -SERVER = inet +# Makefile for inet. -# directories -u = /usr -i = $u/include -s = $i/sys -h = $i/minix -n = $i/net +# Directories g = generic -# programs, flags, and libraries +# Programs, flags, and libraries CC = cc -CPPFLAGS = -I. -I.. -CFLAGS = $(OPT) $(CPPFLAGS) -m -LDFLAGS = -i +CPPFLAGS = -I. -D_MINIX +CFLAGS = $(OPT) $(CPPFLAGS) +LDFLAGS = LIBS = -lsys -lutils +.c.o: + $(CC) $(CFLAGS) -o $@ -c $< + OBJ = buf.o clock.o inet.o inet_config.o \ - mnx_eth.o mq.o sr.o stacktrace.o \ + mnx_eth.o mq.o qp.o sr.o stacktrace.o \ $g/udp.o $g/arp.o $g/eth.o $g/event.o \ $g/icmp.o $g/io.o $g/ip.o $g/ip_ioctl.o \ $g/ip_lib.o $g/ip_read.o $g/ip_write.o \ - $g/ipr.o $g/tcp.o $g/tcp_lib.o \ + $g/ipr.o $g/rand256.o $g/tcp.o $g/tcp_lib.o \ $g/tcp_recv.o $g/tcp_send.o $g/ip_eth.o \ - $g/ip_ps.o $g/psip.o + $g/ip_ps.o $g/psip.o \ + minix3/queryparam.o sha2.o -# build local binary -all build: $(SERVER) -$(SERVER): inet.a - $(CC) -o $@ $(LDFLAGS) inet.a version.c $(LIBS) - install -S 2kw $@ +all: inet -inet.a: $(OBJ) - @rm -f $@ - aal cr $@ $(OBJ) +inet: $(OBJ) + $(CC) -o $@ $(LDFLAGS) $(OBJ) version.c $(LIBS) -# install with other servers -install: /usr/sbin/servers/$(SERVER) -/usr/sbin/servers/$(SERVER): $(SERVER) - install -o root -cs $? $@ +install: /usr/sbin/servers/inet + +/usr/sbin/servers/inet: inet + install -c $? $@ clean: - rm -f $(SERVER) *.a *.o */*.o */*.a *.bak + rm -f $(OBJ) inet *.bak depend: /usr/bin/mkdep "$(CC) -E $(CPPFLAGS)" *.c generic/*.c > .depend @@ -49,42 +42,6 @@ depend: # Include generated dependencies. include .depend -$g/arp.o: $g/arp.c - cd generic && $(CC) -c $(CFLAGS) arp.c -$g/eth.o: $g/eth.c - cd generic && $(CC) -c $(CFLAGS) eth.c -$g/event.o: $g/event.c - cd generic && $(CC) -c $(CFLAGS) event.c -$g/icmp.o: $g/icmp.c - cd generic && $(CC) -c $(CFLAGS) icmp.c -$g/io.o: $g/io.c - cd generic && $(CC) -c $(CFLAGS) io.c -$g/ip.o: $g/ip.c - cd generic && $(CC) -c $(CFLAGS) ip.c -$g/ip_eth.o: $g/ip_eth.c - cd generic && $(CC) -c $(CFLAGS) ip_eth.c -$g/ip_ioctl.o: $g/ip_ioctl.c - cd generic && $(CC) -c $(CFLAGS) ip_ioctl.c -$g/ip_lib.o: $g/ip_lib.c - cd generic && $(CC) -c $(CFLAGS) ip_lib.c -$g/ip_ps.o: $g/ip_ps.c - cd generic && $(CC) -c $(CFLAGS) ip_ps.c -$g/ip_read.o: $g/ip_read.c - cd generic && $(CC) -c $(CFLAGS) ip_read.c -$g/ip_write.o: $g/ip_write.c - cd generic && $(CC) -c $(CFLAGS) ip_write.c -$g/ipr.o: $g/ipr.c - cd generic && $(CC) -c $(CFLAGS) ipr.c -$g/psip.o: $g/psip.c - cd generic && $(CC) -c $(CFLAGS) psip.c -$g/tcp.o: $g/tcp.c - cd generic && $(CC) -c $(CFLAGS) tcp.c -$g/tcp_lib.o: $g/tcp_lib.c - cd generic && $(CC) -c $(CFLAGS) tcp_lib.c -$g/tcp_recv.o: $g/tcp_recv.c - cd generic && $(CC) -c $(CFLAGS) tcp_recv.c -$g/tcp_send.o: $g/tcp_send.c - cd generic && $(CC) -c $(CFLAGS) tcp_send.c -$g/udp.o: $g/udp.c - cd generic && $(CC) -c $(CFLAGS) udp.c - +# +# $PchId: Makefile.mnx3,v 1.1 2005/06/28 14:28:45 philip Exp $ +# diff --git a/servers/inet/buf.c b/servers/inet/buf.c index 6d9ec94e4..baa431255 100644 --- a/servers/inet/buf.c +++ b/servers/inet/buf.c @@ -22,11 +22,7 @@ THIS_FILE #endif #ifndef BUF512_NR -#if CRAMPED -#define BUF512_NR 32 -#else -#define BUF512_NR 128 -#endif +#define BUF512_NR 512 #endif #ifndef BUF2K_NR #define BUF2K_NR 0 @@ -35,8 +31,8 @@ THIS_FILE #define BUF32K_NR 0 #endif -#define ACC_NR ((BUF512_NR+BUF2K_NR+BUF32K_NR)*3/2) -#define CLIENT_NR 6 +#define ACC_NR ((BUF512_NR+BUF2K_NR+BUF32K_NR)*3) +#define CLIENT_NR 7 #define DECLARE_TYPE(Tag, Type, Size) \ typedef struct Tag \ @@ -92,6 +88,7 @@ PRIVATE size_t bf_buf_gran; PUBLIC size_t bf_free_bufsize; PUBLIC acc_t *bf_temporary_acc; +PUBLIC acc_t *bf_linkcheck_acc; #ifdef BUF_CONSISTENCY_CHECK int inet_buf_debug; @@ -115,7 +112,6 @@ FORWARD int report_buffer ARGS(( buf_t *buf, char *label, int i )); PUBLIC void bf_init() { int i; - size_t size; size_t buf_s; acc_t *acc; @@ -216,7 +212,7 @@ bf_checkreq_t checkfunc; return; } - ip_panic(( "buf.c: to many clients" )); + ip_panic(( "buf.c: too many clients" )); } /* @@ -240,6 +236,7 @@ size_t size; assert (size>0); head= NULL; + tail= NULL; while (size) { new_acc= NULL; @@ -271,7 +268,7 @@ size_t size; #endif #undef ALLOC_BUF { - DBLOCK(1, printf("freeing buffers\n")); + DBLOCK(2, printf("freeing buffers\n")); bf_free_bufsize= 0; for (i=0; bf_free_bufsizeacc_next) j++; printf("# of free 512-bytes buffer is now %d\n", j); } #endif } -#if DEBUG +#if DEBUG && 0 { printf("last level was level %d\n", i-1); } #endif if (bf_free_bufsizeacc_length= count; size -= count; } - tail->acc_next= 0; - -#if DEBUG - bf_chkbuf(head); -#endif + tail->acc_next= NULL; return head; } @@ -420,7 +413,6 @@ int clnt_line; register acc_t *acc_ptr; { register acc_t *new_acc; - int i, j; if (!acc_freelist) { @@ -497,8 +489,8 @@ acc_t *old_acc; size_t size, offset_old, offset_new, block_size, block_size_old; /* Check if old acc is good enough. */ - if (!old_acc || !old_acc->acc_next && old_acc->acc_linkC == 1 && - old_acc->acc_buffer->buf_linkC == 1) + if (!old_acc || (!old_acc->acc_next && old_acc->acc_linkC == 1 && + old_acc->acc_buffer->buf_linkC == 1)) { return old_acc; } @@ -556,7 +548,7 @@ register unsigned length; register acc_t *head, *tail; if (!data && !offset && !length) - return 0; + return NULL; #ifdef BUF_TRACK_ALLOC_FREE assert(data || (printf("from %s, %d: %u, %u\n", @@ -566,19 +558,13 @@ register unsigned length; #endif assert(data); -#if DEBUG - bf_chkbuf(data); -#endif if (!length) { head= bf_dupacc(data); bf_afree(head->acc_next); - head->acc_next= 0; + head->acc_next= NULL; head->acc_length= 0; -#if DEBUG - bf_chkbuf(data); -#endif return head; } while (data && offset>=data->acc_length) @@ -591,7 +577,7 @@ register unsigned length; head= bf_dupacc(data); bf_afree(head->acc_next); - head->acc_next= 0; + head->acc_next= NULL; head->acc_offset += offset; head->acc_length -= offset; if (length >= head->acc_length) @@ -608,7 +594,7 @@ register unsigned length; tail->acc_next= bf_dupacc(data); tail= tail->acc_next; bf_afree(tail->acc_next); - tail->acc_next= 0; + tail->acc_next= NULL; data= data->acc_next; length -= tail->acc_length; } @@ -624,12 +610,9 @@ register unsigned length; tail->acc_next= bf_dupacc(data); tail= tail->acc_next; bf_afree(tail->acc_next); - tail->acc_next= 0; + tail->acc_next= NULL; tail->acc_length= length; } -#if DEBUG - bf_chkbuf(data); -#endif return head; } @@ -706,7 +689,8 @@ acc_t *data_second; if (!data_second) return data_first; - head= 0; + head= NULL; + tail= NULL; while (data_first) { if (data_first->acc_linkC == 1) @@ -720,7 +704,7 @@ acc_t *data_second; data_first= curr->acc_next; if (!curr->acc_length) { - curr->acc_next= 0; + curr->acc_next= NULL; bf_afree(curr); continue; } @@ -732,7 +716,7 @@ acc_t *data_second; } if (!head) return data_second; - tail->acc_next= 0; + tail->acc_next= NULL; while (data_second && !data_second->acc_length) { @@ -877,7 +861,6 @@ acc_t *acc; PUBLIC int bf_consistency_check() { acc_t *acc; - buf_t *buf; int silent; int error; int i; @@ -930,7 +913,7 @@ PUBLIC int bf_consistency_check() if (!silent) { printf( -"acc[%d] (0x%x) has been lost with count %d, last allocated at %s, %d\n", +"acc[%d] (%p) has been lost with count %d, last allocated at %s, %d\n", i, acc, acc->acc_linkC, acc->acc_alloc_file, acc->acc_alloc_line); #if 0 silent= 1; @@ -1041,7 +1024,7 @@ int i; assert(buf->buf_generation == buf_generation-1); buf->buf_generation= buf_generation; printf( -"%s[%d] (0x%x) has been lost with count %d, last allocated at %s, %d\n", +"%s[%d] (%p) has been lost with count %d, last allocated at %s, %d\n", label, i, buf, buf->buf_linkC, buf->buf_alloc_file, buf->buf_alloc_line); @@ -1101,6 +1084,15 @@ acc_t *acc; } } +PUBLIC void _bf_mark_1acc(clnt_file, clnt_line, acc) +char *clnt_file; +int clnt_line; +acc_t *acc; +{ + acc->acc_alloc_file= clnt_file; + acc->acc_alloc_line= clnt_line; +} + PUBLIC void _bf_mark_acc(clnt_file, clnt_line, acc) char *clnt_file; int clnt_line; @@ -1119,12 +1111,68 @@ acc_t *acc; } #endif +PUBLIC int bf_linkcheck(acc) +acc_t *acc; +{ + int i; + + buf_t *buffer; + for (i= 0; iacc_next) + { + if (acc->acc_linkC <= 0) + { + printf("wrong acc_linkC (%d) for acc %p\n", + acc->acc_linkC, acc); + return 0; + } + if (acc->acc_offset < 0) + { + printf("wrong acc_offset (%d) for acc %p\n", + acc->acc_offset, acc); + return 0; + } + if (acc->acc_length < 0) + { + printf("wrong acc_length (%d) for acc %p\n", + acc->acc_length, acc); + return 0; + } + buffer= acc->acc_buffer; + if (buffer == NULL) + { + printf("no buffer for acc %p\n", acc); + return 0; + } + if (buffer->buf_linkC <= 0) + { + printf( + "wrong buf_linkC (%d) for buffer %p, from acc %p\n", + buffer->buf_linkC, buffer, acc); + return 0; + } + if (acc->acc_offset + acc->acc_length > buffer->buf_size) + { + printf("%d + %d > %d for buffer %p, and acc %p\n", + acc->acc_offset, acc->acc_length, + buffer->buf_size, buffer, acc); + return 0; + } + } + if (acc != NULL) + { + printf("loop\n"); + return 0; + } + return 1; +} + PRIVATE void free_accs() { int i, j; DBLOCK(1, printf("free_accs\n")); +assert(bf_linkcheck(bf_linkcheck_acc)); for (i=0; !acc_freelist && i THIS_FILE PUBLIC int clck_call_expire; PRIVATE time_t curr_time; +PRIVATE time_t prev_time; PRIVATE timer_t *timer_chain; PRIVATE time_t next_timeout; +#ifdef __minix_vmd +PRIVATE int clck_tasknr= ANY; +#endif FORWARD _PROTOTYPE( void clck_fast_release, (timer_t *timer) ); FORWARD _PROTOTYPE( void set_timer, (void) ); PUBLIC void clck_init() { -#if ZERO + int r; + clck_call_expire= 0; curr_time= 0; + prev_time= 0; next_timeout= 0; timer_chain= 0; + +#ifdef __minix_vmd + r= sys_findproc(CLOCK_NAME, &clck_tasknr, 0); + if (r != OK) + ip_panic(( "unable to find clock task: %d\n", r )); #endif } @@ -37,27 +47,45 @@ PUBLIC time_t get_time() { if (!curr_time) { +#ifdef __minix_vmd + static message mess; + + mess.m_type= GET_UPTIME; + if (sendrec (clck_tasknr, &mess) < 0) + ip_panic(("unable to sendrec")); + if (mess.m_type != OK) + ip_panic(("can't read clock")); + curr_time= mess.NEW_TIME; +#else /* Minix 3 */ int s; if ((s=sys_getuptime(&curr_time)) != OK) ip_panic(("can't read clock")); +#endif + assert(curr_time >= prev_time); } return curr_time; } - + PUBLIC void set_time (tim) time_t tim; { - if (!curr_time) + if (!curr_time && tim >= prev_time) { /* Some code assumes that no time elapses while it is * running. */ curr_time= tim; } + else if (!curr_time) + { + DBLOCK(0x20, printf("set_time: new time %ld < prev_time %ld\n", + tim, prev_time)); + } } PUBLIC void reset_time() { + prev_time= curr_time; curr_time= 0; } @@ -145,12 +173,27 @@ PRIVATE void set_timer() if (next_timeout == 0 || new_time < next_timeout) { +#ifdef __minix_vmd + static message mess; + next_timeout= new_time; + + new_time -= curr_time; + + mess.m_type= SET_SYNC_AL; + mess.CLOCK_PROC_NR= this_proc; + mess.DELTA_TICKS= new_time; + if (sendrec (clck_tasknr, &mess) < 0) + ip_panic(("unable to sendrec")); + if (mess.m_type != OK) + ip_panic(("can't set timer")); +#else /* Minix 3 */ next_timeout= new_time; new_time -= curr_time; if (sys_syncalrm(SELF, new_time, 0) != OK) - ip_panic(("can't set timer")); + ip_panic(("can't set timer")); +#endif } } @@ -184,5 +227,5 @@ PUBLIC void clck_expire_timers() } /* - * $PchId: clock.c,v 1.6 1995/11/21 06:54:39 philip Exp $ + * $PchId: clock.c,v 1.10 2005/06/28 14:23:40 philip Exp $ */ diff --git a/servers/inet/const.h b/servers/inet/const.h index a455af7fe..4c4e7609d 100644 --- a/servers/inet/const.h +++ b/servers/inet/const.h @@ -14,14 +14,12 @@ Copyright 1995 Philip Homburg #endif #ifndef NDEBUG -#define NDEBUG (CRAMPED) +#define NDEBUG 0 #endif #define CLOCK_GRAN 1 /* in HZ */ -#if DEBUG #define where() printf("%s, %d: ", __FILE__, __LINE__) -#endif #define NW_SUSPEND SUSPEND #define NW_WOULDBLOCK EWOULDBLOCK @@ -32,5 +30,5 @@ Copyright 1995 Philip Homburg #endif /* INET__CONST_H */ /* - * $PchId: const.h,v 1.6 1995/11/21 06:54:39 philip Exp $ + * $PchId: const.h,v 1.7 2000/08/12 09:21:44 philip Exp $ */ diff --git a/servers/inet/generic/arp.c b/servers/inet/generic/arp.c index 24e2cea30..84ea087b0 100644 --- a/servers/inet/generic/arp.c +++ b/servers/inet/generic/arp.c @@ -11,20 +11,26 @@ Copyright 1995 Philip Homburg #include "assert.h" #include "buf.h" #include "clock.h" +#include "event.h" #include "eth.h" #include "io.h" #include "sr.h" THIS_FILE -#define ARP_CACHE_NR 64 +#define ARP_CACHE_NR 256 +#define AP_REQ_NR 32 + +#define ARP_HASH_NR 256 +#define ARP_HASH_MASK 0xff +#define ARP_HASH_WIDTH 4 #define MAX_ARP_RETRIES 5 #define ARP_TIMEOUT (HZ/2+1) /* .5 seconds */ #ifndef ARP_EXP_TIME #define ARP_EXP_TIME (20L*60L*HZ) /* 20 minutes */ #endif -#define ARP_NOTRCH_EXP_TIME (5*HZ) /* 5 seconds */ +#define ARP_NOTRCH_EXP_TIME (30*HZ) /* 30 seconds */ #define ARP_INUSE_OFFSET (60*HZ) /* an entry in the cache can be deleted if its not used for 1 minute */ @@ -66,37 +72,39 @@ typedef struct arp_port int ap_eth_port; int ap_ip_port; int ap_eth_fd; - ether_addr_t ap_ethaddr; - ipaddr_t ap_ipaddr; - timer_t ap_timer; - ether_addr_t ap_write_ethaddr; - ipaddr_t ap_write_ipaddr; - int ap_write_code; + ether_addr_t ap_ethaddr; /* Ethernet address of this port */ + ipaddr_t ap_ipaddr; /* IP address of this port */ - ipaddr_t ap_req_ipaddr; - int ap_req_count; + struct arp_req + { + timer_t ar_timer; + int ar_entry; + int ar_req_count; + } ap_req[AP_REQ_NR]; arp_func_t ap_arp_func; + + acc_t *ap_sendpkt; + acc_t *ap_sendlist; + acc_t *ap_reclist; + event_t ap_event; } arp_port_t; -#define APF_EMPTY 0 -#define APF_ARP_RD_IP 0x4 -#define APF_ARP_RD_SP 0x8 -#define APF_ARP_WR_IP 0x10 -#define APF_ARP_WR_SP 0x20 -#define APF_INADDR_SET 0x100 -#define APF_MORE2WRITE 0x200 -#define APF_CLIENTREQ 0x400 -#define APF_CLIENTWRITE 0x1000 -#define APF_SUSPEND 0x2000 +#define APF_EMPTY 0x00 +#define APF_ARP_RD_IP 0x01 +#define APF_ARP_RD_SP 0x02 +#define APF_ARP_WR_IP 0x04 +#define APF_ARP_WR_SP 0x08 +#define APF_INADDR_SET 0x10 +#define APF_SUSPEND 0x20 -#define APS_INITIAL 0x00 -#define APS_GETADDR 0x01 -#define APS_ARPSTART 0x10 -#define APS_ARPPROTO 0x20 -#define APS_ARPMAIN 0x40 -#define APS_ERROR 0x80 +#define APS_INITIAL 1 +#define APS_GETADDR 2 +#define APS_ARPSTART 3 +#define APS_ARPPROTO 4 +#define APS_ARPMAIN 5 +#define APS_ERROR 6 typedef struct arp_cache { @@ -110,39 +118,61 @@ typedef struct arp_cache } arp_cache_t; #define ACF_EMPTY 0 -#define ACF_GOTREQ 1 +#define ACF_PERM 1 +#define ACF_PUB 2 #define ACS_UNUSED 0 #define ACS_INCOMPLETE 1 #define ACS_VALID 2 #define ACS_UNREACHABLE 3 +PRIVATE struct arp_hash_ent +{ + arp_cache_t *ahe_row[ARP_HASH_WIDTH]; +} arp_hash[ARP_HASH_NR]; + +PRIVATE arp_port_t *arp_port_table; +PRIVATE arp_cache_t *arp_cache; +PRIVATE int arp_cache_nr; + FORWARD acc_t *arp_getdata ARGS(( int fd, size_t offset, size_t count, int for_ioctl )); FORWARD int arp_putdata ARGS(( int fd, size_t offset, acc_t *data, int for_ioctl )); FORWARD void arp_main ARGS(( arp_port_t *arp_port )); -FORWARD void arp_timeout ARGS(( int fd, timer_t *timer )); +FORWARD void arp_timeout ARGS(( int ref, timer_t *timer )); FORWARD void setup_write ARGS(( arp_port_t *arp_port )); FORWARD void setup_read ARGS(( arp_port_t *arp_port )); -FORWARD void process_arp_req ARGS(( arp_port_t *arp_port, acc_t *data )); +FORWARD void do_reclist ARGS(( event_t *ev, ev_arg_t ev_arg )); +FORWARD void process_arp_pkt ARGS(( arp_port_t *arp_port, acc_t *data )); FORWARD void client_reply ARGS(( arp_port_t *arp_port, ipaddr_t ipaddr, ether_addr_t *ethaddr )); FORWARD arp_cache_t *find_cache_ent ARGS(( arp_port_t *arp_port, ipaddr_t ipaddr )); -FORWARD arp_cache_t *alloc_cache_ent ARGS(( void )); - -PRIVATE arp_port_t *arp_port_table; -PRIVATE arp_cache_t arp_cache[ARP_CACHE_NR]; +FORWARD arp_cache_t *alloc_cache_ent ARGS(( int flags )); +FORWARD void arp_buffree ARGS(( int priority )); +#ifdef BUF_CONSISTENCY_CHECK +FORWARD void arp_bufcheck ARGS(( void )); +#endif PUBLIC void arp_prep() { arp_port_table= alloc(eth_conf_nr * sizeof(arp_port_table[0])); + + arp_cache_nr= ARP_CACHE_NR; + if (arp_cache_nr < (eth_conf_nr+1)*AP_REQ_NR) + { + arp_cache_nr= (eth_conf_nr+1)*AP_REQ_NR; + printf("arp: using %d cache entries instead of %d\n", + arp_cache_nr, ARP_CACHE_NR); + } + arp_cache= alloc(arp_cache_nr * sizeof(arp_cache[0])); } PUBLIC void arp_init() { arp_port_t *arp_port; + arp_cache_t *cache; int i; assert (BUF_S >= sizeof(struct nwio_ethstat)); @@ -155,6 +185,20 @@ PUBLIC void arp_init() * unavailable */ } + cache= arp_cache; + for (i=0; iac_state= ACS_UNUSED; + cache->ac_flags= ACF_EMPTY; + cache->ac_expire= 0; + cache->ac_lastuse= 0; + } + +#ifndef BUF_CONSISTENCY_CHECK + bf_logon(arp_buffree); +#else + bf_logon(arp_buffree, arp_bufcheck); +#endif } PRIVATE void arp_main(arp_port) @@ -166,11 +210,14 @@ arp_port_t *arp_port; { case APS_INITIAL: arp_port->ap_eth_fd= eth_open(arp_port->ap_eth_port, - arp_port->ap_eth_port, arp_getdata, arp_putdata, 0); + arp_port->ap_eth_port, arp_getdata, arp_putdata, + 0 /* no put_pkt */, 0 /* no select_res */); if (arp_port->ap_eth_fd<0) { - DBLOCK(1, printf("arp.c: unable to open ethernet\n")); + DBLOCK(1, printf("arp[%d]: unable to open eth[%d]\n", + arp_port-arp_port_table, + arp_port->ap_eth_port)); return; } @@ -195,19 +242,6 @@ arp_port_t *arp_port; case APS_ARPSTART: arp_port->ap_state= APS_ARPPROTO; - { - arp_cache_t *cache; - int i; - - cache= arp_cache; - for (i=0; iac_state= ACS_UNUSED; - cache->ac_flags= ACF_EMPTY; - cache->ac_expire= 0; - cache->ac_lastuse= 0; - } - } result= eth_ioctl (arp_port->ap_eth_fd, NWIOSETHOPT); if (result==NW_SUSPEND) @@ -220,17 +254,14 @@ arp_port_t *arp_port; /* fall through */ case APS_ARPPROTO: arp_port->ap_state= APS_ARPMAIN; - if (arp_port->ap_flags & APF_MORE2WRITE) - setup_write(arp_port); + setup_write(arp_port); setup_read(arp_port); return; -#if !CRAMPED default: ip_panic(( "arp_main(&arp_port_table[%d]) called but ap_state=0x%x\n", arp_port->ap_eth_port, arp_port->ap_state )); -#endif } } @@ -241,7 +272,6 @@ size_t count; int for_ioctl; { arp_port_t *arp_port; - arp46_t *arp; acc_t *data; int result; @@ -281,13 +311,18 @@ int for_ioctl; assert (arp_port->ap_flags & APF_ARP_WR_IP); if (!count) { + data= arp_port->ap_sendpkt; + arp_port->ap_sendpkt= NULL; + assert(data); + bf_afree(data); data= NULL; + result= (int)offset; if (result<0) { DIFBLOCK(1, (result != NW_SUSPEND), printf( - "arp.c: write error on port %d: error %d\n", - fd, result)); + "arp[%d]: write error on port %d: error %d\n", + fd, arp_port->ap_eth_fd, result)); arp_port->ap_state= APS_ERROR; break; @@ -298,37 +333,14 @@ int for_ioctl; return NW_OK; } assert (offset+count <= sizeof(arp46_t)); - data= bf_memreq(sizeof(arp46_t)); - arp= (arp46_t *)ptr2acc_data(data); - data->acc_offset += offset; - data->acc_length= count; - if (arp_port->ap_write_code == ARP_REPLY) - arp->a46_dstaddr= arp_port->ap_write_ethaddr; - else - { - arp->a46_dstaddr.ea_addr[0]= 0xff; - arp->a46_dstaddr.ea_addr[1]= 0xff; - arp->a46_dstaddr.ea_addr[2]= 0xff; - arp->a46_dstaddr.ea_addr[3]= 0xff; - arp->a46_dstaddr.ea_addr[4]= 0xff; - arp->a46_dstaddr.ea_addr[5]= 0xff; - } - arp->a46_hdr= HTONS(ARP_ETHERNET); - arp->a46_pro= HTONS(ETH_IP_PROTO); - arp->a46_hln= 6; - arp->a46_pln= 4; - arp->a46_op= htons(arp_port->ap_write_code); - arp->a46_sha= arp_port->ap_ethaddr; - memcpy (arp->a46_spa, &arp_port->ap_ipaddr, sizeof(ipaddr_t)); - arp->a46_tha= arp_port->ap_write_ethaddr; - memcpy (arp->a46_tpa, &arp_port->ap_write_ipaddr, - sizeof(ipaddr_t)); + data= arp_port->ap_sendpkt; + assert(data); + data= bf_cut(data, offset, count); + return data; default: -#if !CRAMPED printf("arp_getdata(%d, 0x%d, 0x%d) called but ap_state=0x%x\n", fd, offset, count, arp_port->ap_state); -#endif break; } return 0; @@ -343,6 +355,8 @@ int for_ioctl; arp_port_t *arp_port; int result; struct nwio_ethstat *ethstat; + ev_arg_t ev_arg; + acc_t *tmpacc; arp_port= &arp_port_table[fd]; @@ -354,8 +368,8 @@ int for_ioctl; if (result<0) { DIFBLOCK(1, (result != NW_SUSPEND), printf( - "arp.c: read error on port %d: error %d\n", - fd, result)); + "arp[%d]: read error on port %d: error %d\n", + fd, arp_port->ap_eth_fd, result)); return NW_OK; } @@ -374,11 +388,29 @@ int for_ioctl; /* Warning: the above assertion is illegal; puts and gets of data can be brokenup in any piece the server likes. However we assume that the server is eth.c and it transfers only - whole packets. */ + whole packets. + */ data= bf_packIffLess(data, sizeof(arp46_t)); if (data->acc_length >= sizeof(arp46_t)) - process_arp_req(arp_port,data); - bf_afree(data); + { + if (!arp_port->ap_reclist) + { + ev_arg.ev_ptr= arp_port; + ev_enqueue(&arp_port->ap_event, do_reclist, + ev_arg); + } + if (data->acc_linkC != 1) + { + tmpacc= bf_dupacc(data); + bf_afree(data); + data= tmpacc; + tmpacc= NULL; + } + data->acc_ext_link= arp_port->ap_reclist; + arp_port->ap_reclist= data; + } + else + bf_afree(data); return NW_OK; } switch (arp_port->ap_state) @@ -407,10 +439,8 @@ int for_ioctl; bf_afree(data); return NW_OK; default: -#if !CRAMPED printf("arp_putdata(%d, 0x%d, 0x%lx) called but ap_state=0x%x\n", fd, offset, (unsigned long)data, arp_port->ap_state); -#endif break; } return EGENERIC; @@ -431,74 +461,83 @@ arp_port_t *arp_port; return; } DIFBLOCK(1, (result != NW_OK), - printf("arp.c: eth_read(..,%d)=%d\n", - ETH_MAX_PACK_SIZE, result)); + printf("arp[%d]: eth_read(..,%d)=%d\n", + arp_port-arp_port_table, ETH_MAX_PACK_SIZE, result)); } } PRIVATE void setup_write(arp_port) arp_port_t *arp_port; { - int i, result; + int result; + acc_t *data; - while (arp_port->ap_flags & APF_MORE2WRITE) + for(;;) { - if (arp_port->ap_flags & APF_CLIENTWRITE) - { - arp_port->ap_flags &= ~APF_CLIENTWRITE; - arp_port->ap_write_ipaddr= arp_port->ap_req_ipaddr; - arp_port->ap_write_code= ARP_REQUEST; - clck_timer(&arp_port->ap_timer, - get_time() + ARP_TIMEOUT, - arp_timeout, arp_port->ap_eth_port); - } - else - { - arp_cache_t *cache; + data= arp_port->ap_sendlist; + if (!data) + break; + arp_port->ap_sendlist= data->acc_ext_link; - cache= arp_cache; - for (i=0; iac_flags & ACF_GOTREQ) && - cache->ac_port == arp_port) - { - cache->ac_flags &= ~ACF_GOTREQ; - arp_port->ap_write_ethaddr= cache-> - ac_ethaddr; - arp_port->ap_write_ipaddr= cache-> - ac_ipaddr; - arp_port->ap_write_code= ARP_REPLY; - break; - } - } - if (i>=ARP_CACHE_NR) - { - arp_port->ap_flags &= ~APF_MORE2WRITE; - break; - } + if (arp_port->ap_ipaddr == HTONL(0x00000000)) + { + /* Interface is down */ + printf( + "arp[%d]: not sending ARP packet, interface is down\n", + arp_port-arp_port_table); + bf_afree(data); data= NULL; + continue; } + + assert(!arp_port->ap_sendpkt); + arp_port->ap_sendpkt= data; data= NULL; + arp_port->ap_flags= (arp_port->ap_flags & ~APF_ARP_WR_SP) | APF_ARP_WR_IP; result= eth_write(arp_port->ap_eth_fd, sizeof(arp46_t)); if (result == NW_SUSPEND) + { arp_port->ap_flags |= APF_ARP_WR_SP; + break; + } if (result<0) { DIFBLOCK(1, (result != NW_SUSPEND), - printf("arp.c: eth_write(..,%d)=%d\n", - sizeof(arp46_t), result)); + printf("arp[%d]: eth_write(..,%d)=%d\n", + arp_port-arp_port_table, sizeof(arp46_t), + result)); return; } } } -PRIVATE void process_arp_req (arp_port, data) +PRIVATE void do_reclist(ev, ev_arg) +event_t *ev; +ev_arg_t ev_arg; +{ + arp_port_t *arp_port; + acc_t *data; + + arp_port= ev_arg.ev_ptr; + assert(ev == &arp_port->ap_event); + + while (data= arp_port->ap_reclist, data != NULL) + { + arp_port->ap_reclist= data->acc_ext_link; + process_arp_pkt(arp_port, data); + bf_afree(data); + } +} + +PRIVATE void process_arp_pkt (arp_port, data) arp_port_t *arp_port; acc_t *data; { + int i, entry, do_reply; arp46_t *arp; - arp_cache_t *ce; - int level; + u16_t *p; + arp_cache_t *ce, *cache; + struct arp_req *reqp; time_t curr_time; ipaddr_t spa, tpa; @@ -513,23 +552,53 @@ acc_t *data; arp->a46_pro != HTONS(ETH_IP_PROTO) || arp->a46_pln != 4) return; - ce= find_cache_ent(arp_port, spa); - if (ce && ce->ac_expire < curr_time) + if (arp_port->ap_ipaddr == HTONL(0x00000000)) { - DBLOCK(0x10, printf("arp: expiring entry for "); - writeIpAddr(ce->ac_ipaddr); printf("\n")); - ce->ac_state= ACS_UNUSED; - ce= NULL; + /* Interface is down */ +#if DEBUG + printf("arp[%d]: dropping ARP packet, interface is down\n", + arp_port-arp_port_table); +#endif + return; } + + ce= find_cache_ent(arp_port, spa); + cache= NULL; /* lint */ + + do_reply= 0; + if (arp->a46_op != HTONS(ARP_REQUEST)) + ; /* No need to reply */ + else if (tpa == arp_port->ap_ipaddr) + do_reply= 1; + else + { + /* Look for a published entry */ + cache= find_cache_ent(arp_port, tpa); + if (cache) + { + if (cache->ac_flags & ACF_PUB) + { + /* Published entry */ + do_reply= 1; + } + else + { + /* Nothing to do */ + cache= NULL; + } + } + } + if (ce == NULL) { - if (tpa != arp_port->ap_ipaddr) + if (!do_reply) return; - DBLOCK(0x10, printf("arp: allocating entry for "); + DBLOCK(0x10, printf("arp[%d]: allocating entry for ", + arp_port-arp_port_table); writeIpAddr(spa); printf("\n")); - ce= alloc_cache_ent(); + ce= alloc_cache_ent(ACF_EMPTY); ce->ac_flags= ACF_EMPTY; ce->ac_state= ACS_VALID; ce->ac_ethaddr= arp->a46_sha; @@ -544,6 +613,18 @@ acc_t *data; ce->ac_ethaddr= arp->a46_sha; if (ce->ac_state == ACS_INCOMPLETE) { + /* Find request entry */ + entry= ce-arp_cache; + for (i= 0, reqp= arp_port->ap_req; iar_entry == entry) + break; + } + assert(i < AP_REQ_NR); + clck_untimer(&reqp->ar_timer); + reqp->ar_entry= -1; + ce->ac_state= ACS_VALID; client_reply(arp_port, spa, &arp->a46_sha); } @@ -552,11 +633,11 @@ acc_t *data; } /* Update fields in the arp cache. */ -#if !CRAMPED if (memcmp(&ce->ac_ethaddr, &arp->a46_sha, sizeof(ce->ac_ethaddr)) != 0) { - printf("arp: ethernet address for IP address "); + printf("arp[%d]: ethernet address for IP address ", + arp_port-arp_port_table); writeIpAddr(spa); printf(" changed from "); writeEtherAddr(&ce->ac_ethaddr); @@ -565,15 +646,46 @@ acc_t *data; printf("\n"); ce->ac_ethaddr= arp->a46_sha; } -#else - ce->ac_ethaddr= arp->a46_sha; -#endif ce->ac_expire= curr_time+ARP_EXP_TIME; - if (arp->a46_op == HTONS(ARP_REQUEST) && (tpa == arp_port->ap_ipaddr)) + if (do_reply) { - ce->ac_flags |= ACF_GOTREQ; - arp_port->ap_flags |= APF_MORE2WRITE; + data= bf_memreq(sizeof(arp46_t)); + arp= (arp46_t *)ptr2acc_data(data); + + /* Clear padding */ + assert(sizeof(arp->a46_data.a46_dummy) % sizeof(*p) == 0); + for (i= 0, p= (u16_t *)arp->a46_data.a46_dummy; + i < sizeof(arp->a46_data.a46_dummy)/sizeof(*p); + i++, p++) + { + *p= 0xdead; + } + + arp->a46_dstaddr= ce->ac_ethaddr; + arp->a46_hdr= HTONS(ARP_ETHERNET); + arp->a46_pro= HTONS(ETH_IP_PROTO); + arp->a46_hln= 6; + arp->a46_pln= 4; + + arp->a46_op= htons(ARP_REPLY); + if (tpa == arp_port->ap_ipaddr) + { + arp->a46_sha= arp_port->ap_ethaddr; + } + else + { + assert(cache); + arp->a46_sha= cache->ac_ethaddr; + } + memcpy (arp->a46_spa, &tpa, sizeof(ipaddr_t)); + arp->a46_tha= ce->ac_ethaddr; + memcpy (arp->a46_tpa, &ce->ac_ipaddr, sizeof(ipaddr_t)); + + assert(data->acc_linkC == 1); + data->acc_ext_link= arp_port->ap_sendlist; + arp_port->ap_sendlist= data; data= NULL; + if (!(arp_port->ap_flags & APF_ARP_WR_IP)) setup_write(arp_port); } @@ -584,12 +696,6 @@ arp_port_t *arp_port; ipaddr_t ipaddr; ether_addr_t *ethaddr; { - if ((arp_port->ap_flags & APF_CLIENTREQ) && - ipaddr == arp_port->ap_req_ipaddr) - { - arp_port->ap_flags &= ~(APF_CLIENTREQ|APF_CLIENTWRITE); - clck_untimer(&arp_port->ap_timer); - } (*arp_port->ap_arp_func)(arp_port->ap_ip_port, ipaddr, ethaddr); } @@ -597,37 +703,113 @@ PRIVATE arp_cache_t *find_cache_ent (arp_port, ipaddr) arp_port_t *arp_port; ipaddr_t ipaddr; { - arp_cache_t *cache; + arp_cache_t *ce; int i; + unsigned hash; - for (i=0, cache= arp_cache; i> 24) ^ (ipaddr >> 16) ^ (ipaddr >> 8) ^ ipaddr; + hash &= ARP_HASH_MASK; + + ce= arp_hash[hash].ahe_row[0]; + if (ce && ce->ac_ipaddr == ipaddr && ce->ac_port == arp_port && + ce->ac_state != ACS_UNUSED) { - if (cache->ac_state != ACS_UNUSED && - cache->ac_port == arp_port && - cache->ac_ipaddr == ipaddr) + return ce; + } + for (i= 1; iac_ipaddr != ipaddr || ce->ac_port != arp_port + || ce->ac_state == ACS_UNUSED) { - return cache; + continue; + } + arp_hash[hash].ahe_row[i]= arp_hash[hash].ahe_row[0]; + arp_hash[hash].ahe_row[0]= ce; + return ce; + } + + for (i=0, ce= arp_cache; iac_state != ACS_UNUSED && + ce->ac_port == arp_port && + ce->ac_ipaddr == ipaddr) + { + for (i= ARP_HASH_WIDTH-1; i>0; i--) + { + arp_hash[hash].ahe_row[i]= + arp_hash[hash].ahe_row[i-1]; + } + assert(i == 0); + arp_hash[hash].ahe_row[0]= ce; + return ce; } } return NULL; } -PRIVATE arp_cache_t *alloc_cache_ent() +PRIVATE arp_cache_t *alloc_cache_ent(flags) +int flags; { arp_cache_t *cache, *old; int i; old= NULL; - for (i=0, cache= arp_cache; iac_state == ACS_UNUSED) - return cache; + { + old= cache; + break; + } if (cache->ac_state == ACS_INCOMPLETE) continue; + if (cache->ac_flags & ACF_PERM) + continue; if (!old || cache->ac_lastuse < old->ac_lastuse) old= cache; } assert(old); + + if (!flags) + return old; + + /* Get next permanent entry */ + for (i=0, cache= arp_cache; iac_state == ACS_UNUSED) + break; + if (cache->ac_flags & ACF_PERM) + continue; + break; + } + if (i >= arp_cache_nr/2) + return NULL; /* Too many entries */ + if (cache != old) + { + assert(old > cache); + *old= *cache; + old= cache; + } + + if (!(flags & ACF_PUB)) + return old; + + /* Get first nonpublished entry */ + for (i=0, cache= arp_cache; iac_state == ACS_UNUSED) + break; + if (cache->ac_flags & ACF_PUB) + continue; + break; + } + if (cache != old) + { + assert(old > cache); + *old= *cache; + old= cache; + } return old; } @@ -636,7 +818,6 @@ int eth_port; ipaddr_t ipaddr; { arp_port_t *arp_port; - int i; if (eth_port < 0 || eth_port >= eth_conf_nr) return; @@ -654,8 +835,8 @@ int eth_port; int ip_port; arp_func_t arp_func; { - arp_port_t *arp_port; int i; + arp_port_t *arp_port; assert(eth_port >= 0); if (eth_port >= eth_conf_nr) @@ -667,6 +848,12 @@ arp_func_t arp_func; arp_port->ap_state= APS_INITIAL; arp_port->ap_flags= APF_EMPTY; arp_port->ap_arp_func= arp_func; + arp_port->ap_sendpkt= NULL; + arp_port->ap_sendlist= NULL; + arp_port->ap_reclist= NULL; + for (i= 0; iap_req[i].ar_entry= -1; + ev_init(&arp_port->ap_event); arp_main(arp_port); @@ -678,23 +865,48 @@ int eth_port; ipaddr_t ipaddr; ether_addr_t *ethaddr; { + int i, ref; arp_port_t *arp_port; - int i; + struct arp_req *reqp; arp_cache_t *ce; time_t curr_time; assert(eth_port >= 0 && eth_port < eth_conf_nr); arp_port= &arp_port_table[eth_port]; assert(arp_port->ap_state == APS_ARPMAIN || - (printf("ap_state= %d\n", arp_port->ap_state), 0)); + (printf("arp[%d]: ap_state= %d\n", arp_port-arp_port_table, + arp_port->ap_state), 0)); curr_time= get_time(); ce= find_cache_ent (arp_port, ipaddr); if (ce && ce->ac_expire < curr_time) { - ce->ac_state= ACS_UNUSED; - ce= NULL; + assert(ce->ac_state != ACS_INCOMPLETE); + + /* Check whether there is enough space for an ARP + * request or not. + */ + for (i= 0, reqp= arp_port->ap_req; iar_entry < 0) + break; + } + if (i < AP_REQ_NR) + { + /* Okay, expire this entry. */ + ce->ac_state= ACS_UNUSED; + ce= NULL; + } + else + { + /* Continue using this entry for a while */ + printf("arp[%d]: Overloaded! Keeping entry for ", + arp_port-arp_port_table); + writeIpAddr(ipaddr); + printf("\n"); + ce->ac_expire= curr_time+ARP_NOTRCH_EXP_TIME; + } } if (ce) { @@ -710,67 +922,433 @@ ether_addr_t *ethaddr; if (ce->ac_state == ACS_UNREACHABLE) return EDSTNOTRCH; assert(ce->ac_state == ACS_INCOMPLETE); + return NW_SUSPEND; } - if (arp_port->ap_flags & APF_CLIENTREQ) + /* Find an empty slot for an ARP request */ + for (i= 0, reqp= arp_port->ap_req; iar_entry < 0) + break; + } + if (i >= AP_REQ_NR) + { + /* We should be able to report that this ARP request + * cannot be accepted. At the moment we just return SUSPEND. */ return NW_SUSPEND; } - ce= alloc_cache_ent(); + ref= (eth_port*AP_REQ_NR + i); + + ce= alloc_cache_ent(ACF_EMPTY); ce->ac_flags= 0; ce->ac_state= ACS_INCOMPLETE; ce->ac_ipaddr= ipaddr; ce->ac_port= arp_port; ce->ac_expire= curr_time+ARP_EXP_TIME; ce->ac_lastuse= curr_time; - arp_port->ap_flags |= APF_CLIENTREQ|APF_MORE2WRITE | APF_CLIENTWRITE; - arp_port->ap_req_ipaddr= ipaddr; - arp_port->ap_req_count= 0; - if (!(arp_port->ap_flags & APF_ARP_WR_IP)) - setup_write(arp_port); + + reqp->ar_entry= ce-arp_cache; + reqp->ar_req_count= -1; + + /* Send the first packet by expiring the timer */ + clck_timer(&reqp->ar_timer, 1, arp_timeout, ref); + return NW_SUSPEND; } -PRIVATE void arp_timeout (fd, timer) +PUBLIC int arp_ioctl (eth_port, fd, req, get_userdata, put_userdata) +int eth_port; int fd; -timer_t *timer; +ioreq_t req; +get_userdata_t get_userdata; +put_userdata_t put_userdata; { arp_port_t *arp_port; - arp_cache_t *ce; - int level; + arp_cache_t *ce, *cache; + acc_t *data; + nwio_arp_t *arp_iop; + int entno, result, ac_flags; + u32_t flags; + ipaddr_t ipaddr; time_t curr_time; - arp_port= &arp_port_table[fd]; + assert(eth_port >= 0 && eth_port < eth_conf_nr); + arp_port= &arp_port_table[eth_port]; + assert(arp_port->ap_state == APS_ARPMAIN || + (printf("arp[%d]: ap_state= %d\n", arp_port-arp_port_table, + arp_port->ap_state), 0)); - assert (timer == &arp_port->ap_timer); - - if (++arp_port->ap_req_count < MAX_ARP_RETRIES) + switch(req) { - arp_port->ap_flags |= APF_CLIENTWRITE|APF_MORE2WRITE; - if (!(arp_port->ap_flags & APF_ARP_WR_IP)) - setup_write(arp_port); + case NWIOARPGIP: + data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE); + if (data == NULL) + return EFAULT; + data= bf_packIffLess(data, sizeof(*arp_iop)); + arp_iop= (nwio_arp_t *)ptr2acc_data(data); + ipaddr= arp_iop->nwa_ipaddr; + ce= NULL; /* lint */ + for (entno= 0; entno < arp_cache_nr; entno++) + { + ce= &arp_cache[entno]; + if (ce->ac_state == ACS_UNUSED || + ce->ac_port != arp_port) + { + continue; + } + if (ce->ac_ipaddr == ipaddr) + break; + } + if (entno == arp_cache_nr) + { + /* Also report the address of this interface */ + if (ipaddr != arp_port->ap_ipaddr) + { + bf_afree(data); + return ENOENT; + } + arp_iop->nwa_entno= arp_cache_nr; + arp_iop->nwa_ipaddr= ipaddr; + arp_iop->nwa_ethaddr= arp_port->ap_ethaddr; + arp_iop->nwa_flags= NWAF_PERM | NWAF_PUB; + } + else + { + arp_iop->nwa_entno= entno+1; + arp_iop->nwa_ipaddr= ce->ac_ipaddr; + arp_iop->nwa_ethaddr= ce->ac_ethaddr; + arp_iop->nwa_flags= 0; + if (ce->ac_state == ACS_INCOMPLETE) + arp_iop->nwa_flags |= NWAF_INCOMPLETE; + if (ce->ac_state == ACS_UNREACHABLE) + arp_iop->nwa_flags |= NWAF_DEAD; + if (ce->ac_flags & ACF_PERM) + arp_iop->nwa_flags |= NWAF_PERM; + if (ce->ac_flags & ACF_PUB) + arp_iop->nwa_flags |= NWAF_PUB; + } + + result= (*put_userdata)(fd, 0, data, TRUE); + return result; + + case NWIOARPGNEXT: + data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE); + if (data == NULL) + return EFAULT; + data= bf_packIffLess(data, sizeof(*arp_iop)); + arp_iop= (nwio_arp_t *)ptr2acc_data(data); + entno= arp_iop->nwa_entno; + if (entno < 0) + entno= 0; + ce= NULL; /* lint */ + for (; entno < arp_cache_nr; entno++) + { + ce= &arp_cache[entno]; + if (ce->ac_state == ACS_UNUSED || + ce->ac_port != arp_port) + { + continue; + } + break; + } + if (entno == arp_cache_nr) + { + bf_afree(data); + return ENOENT; + } + arp_iop->nwa_entno= entno+1; + arp_iop->nwa_ipaddr= ce->ac_ipaddr; + arp_iop->nwa_ethaddr= ce->ac_ethaddr; + arp_iop->nwa_flags= 0; + if (ce->ac_state == ACS_INCOMPLETE) + arp_iop->nwa_flags |= NWAF_INCOMPLETE; + if (ce->ac_state == ACS_UNREACHABLE) + arp_iop->nwa_flags |= NWAF_DEAD; + if (ce->ac_flags & ACF_PERM) + arp_iop->nwa_flags |= NWAF_PERM; + if (ce->ac_flags & ACF_PUB) + arp_iop->nwa_flags |= NWAF_PUB; + + result= (*put_userdata)(fd, 0, data, TRUE); + return result; + + case NWIOARPSIP: + data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE); + if (data == NULL) + return EFAULT; + data= bf_packIffLess(data, sizeof(*arp_iop)); + arp_iop= (nwio_arp_t *)ptr2acc_data(data); + ipaddr= arp_iop->nwa_ipaddr; + if (find_cache_ent(arp_port, ipaddr)) + { + bf_afree(data); + return EEXIST; + } + + flags= arp_iop->nwa_flags; + ac_flags= ACF_EMPTY; + if (flags & NWAF_PERM) + ac_flags |= ACF_PERM; + if (flags & NWAF_PUB) + ac_flags |= ACF_PUB|ACF_PERM; + + /* Allocate a cache entry */ + ce= alloc_cache_ent(ac_flags); + if (ce == NULL) + { + bf_afree(data); + return ENOMEM; + } + + ce->ac_flags= ac_flags; + ce->ac_state= ACS_VALID; + ce->ac_ethaddr= arp_iop->nwa_ethaddr; + ce->ac_ipaddr= arp_iop->nwa_ipaddr; + ce->ac_port= arp_port; + + curr_time= get_time(); + ce->ac_expire= curr_time+ARP_EXP_TIME; + ce->ac_lastuse= curr_time; + + bf_afree(data); + return 0; + + case NWIOARPDIP: + data= (*get_userdata)(fd, 0, sizeof(*arp_iop), TRUE); + if (data == NULL) + return EFAULT; + data= bf_packIffLess(data, sizeof(*arp_iop)); + arp_iop= (nwio_arp_t *)ptr2acc_data(data); + ipaddr= arp_iop->nwa_ipaddr; + bf_afree(data); data= NULL; + ce= find_cache_ent(arp_port, ipaddr); + if (!ce) + return ENOENT; + if (ce->ac_state == ACS_INCOMPLETE) + return EINVAL; + + ac_flags= ce->ac_flags; + if (ac_flags & ACF_PUB) + { + /* Make sure entry is at the end of published + * entries. + */ + for (entno= 0, cache= arp_cache; + entnoac_state == ACS_UNUSED) + break; + if (cache->ac_flags & ACF_PUB) + continue; + break; + } + assert(cache > arp_cache); + cache--; + if (cache != ce) + { + assert(cache > ce); + *ce= *cache; + ce= cache; + } + } + if (ac_flags & ACF_PERM) + { + /* Make sure entry is at the end of permanent + * entries. + */ + for (entno= 0, cache= arp_cache; + entnoac_state == ACS_UNUSED) + break; + if (cache->ac_flags & ACF_PERM) + continue; + break; + } + assert(cache > arp_cache); + cache--; + if (cache != ce) + { + assert(cache > ce); + *ce= *cache; + ce= cache; + } + } + + /* Clear entry */ + ce->ac_state= ACS_UNUSED; + + return 0; + + default: + ip_panic(("arp_ioctl: unknown request 0x%lx", + (unsigned long)req)); } - else - { - ce= find_cache_ent(arp_port, arp_port->ap_req_ipaddr); - if (ce) { - assert(ce->ac_state == ACS_INCOMPLETE || - (printf("ce->ac_state= %d\n", ce->ac_state),0)); - curr_time= get_time(); - ce->ac_state= ACS_UNREACHABLE; - ce->ac_expire= curr_time+ ARP_NOTRCH_EXP_TIME; - ce->ac_lastuse= curr_time; + return 0; +} - client_reply(arp_port, ce->ac_ipaddr, NULL); +PRIVATE void arp_timeout (ref, timer) +int ref; +timer_t *timer; +{ + int i, port, reqind, acind; + arp_port_t *arp_port; + arp_cache_t *ce; + struct arp_req *reqp; + time_t curr_time; + acc_t *data; + arp46_t *arp; + u16_t *p; + + port= ref / AP_REQ_NR; + reqind= ref % AP_REQ_NR; + + assert(port >= 0 && port ap_req[reqind]; + assert (timer == &reqp->ar_timer); + + acind= reqp->ar_entry; + + assert(acind >= 0 && acind < arp_cache_nr); + ce= &arp_cache[acind]; + + assert(ce->ac_port == arp_port); + assert(ce->ac_state == ACS_INCOMPLETE); + + if (++reqp->ar_req_count >= MAX_ARP_RETRIES) + { + curr_time= get_time(); + ce->ac_state= ACS_UNREACHABLE; + ce->ac_expire= curr_time+ ARP_NOTRCH_EXP_TIME; + ce->ac_lastuse= curr_time; + + clck_untimer(&reqp->ar_timer); + reqp->ar_entry= -1; + client_reply(arp_port, ce->ac_ipaddr, NULL); + return; + } + + data= bf_memreq(sizeof(arp46_t)); + arp= (arp46_t *)ptr2acc_data(data); + + /* Clear padding */ + assert(sizeof(arp->a46_data.a46_dummy) % sizeof(*p) == 0); + for (i= 0, p= (u16_t *)arp->a46_data.a46_dummy; + i < sizeof(arp->a46_data.a46_dummy)/sizeof(*p); + i++, p++) + { + *p= 0xdead; + } + + arp->a46_dstaddr.ea_addr[0]= 0xff; + arp->a46_dstaddr.ea_addr[1]= 0xff; + arp->a46_dstaddr.ea_addr[2]= 0xff; + arp->a46_dstaddr.ea_addr[3]= 0xff; + arp->a46_dstaddr.ea_addr[4]= 0xff; + arp->a46_dstaddr.ea_addr[5]= 0xff; + arp->a46_hdr= HTONS(ARP_ETHERNET); + arp->a46_pro= HTONS(ETH_IP_PROTO); + arp->a46_hln= 6; + arp->a46_pln= 4; + arp->a46_op= HTONS(ARP_REQUEST); + arp->a46_sha= arp_port->ap_ethaddr; + memcpy (arp->a46_spa, &arp_port->ap_ipaddr, sizeof(ipaddr_t)); + memset(&arp->a46_tha, '\0', sizeof(ether_addr_t)); + memcpy (arp->a46_tpa, &ce->ac_ipaddr, sizeof(ipaddr_t)); + + assert(data->acc_linkC == 1); + data->acc_ext_link= arp_port->ap_sendlist; + arp_port->ap_sendlist= data; data= NULL; + + if (!(arp_port->ap_flags & APF_ARP_WR_IP)) + setup_write(arp_port); + + clck_timer(&reqp->ar_timer, get_time() + ARP_TIMEOUT, + arp_timeout, ref); +} + +PRIVATE void arp_buffree(priority) +int priority; +{ + int i; + acc_t *pack, *next_pack; + arp_port_t *arp_port; + + for (i= 0, arp_port= arp_port_table; iap_reclist; + while(next_pack && next_pack->acc_ext_link) + { + pack= next_pack; + next_pack= pack->acc_ext_link; + bf_afree(pack); + } + if (next_pack) + { + if (ev_in_queue(&arp_port->ap_event)) + { + DBLOCK(1, printf( + "not freeing ap_reclist, ap_event enqueued\n")); + } + else + { + bf_afree(next_pack); + next_pack= NULL; + } + } + arp_port->ap_reclist= next_pack; + } + if (priority == ARP_PRI_SEND) + { + next_pack= arp_port->ap_sendlist; + while(next_pack && next_pack->acc_ext_link) + { + pack= next_pack; + next_pack= pack->acc_ext_link; + bf_afree(pack); + } + if (next_pack) + { + if (ev_in_queue(&arp_port->ap_event)) + { + DBLOCK(1, printf( + "not freeing ap_sendlist, ap_event enqueued\n")); + } + else + { + bf_afree(next_pack); + next_pack= NULL; + } + } + arp_port->ap_sendlist= next_pack; } } } +#ifdef BUF_CONSISTENCY_CHECK +PRIVATE void arp_bufcheck() +{ + int i; + arp_port_t *arp_port; + acc_t *pack; + + for (i= 0, arp_port= arp_port_table; iap_reqlist; pack; + pack= pack->acc_ext_link) + { + bf_check_acc(pack); + } + } +} +#endif /* BUF_CONSISTENCY_CHECK */ + /* - * $PchId: arp.c,v 1.6 1995/11/21 06:45:27 philip Exp $ + * $PchId: arp.c,v 1.22 2005/06/28 14:15:06 philip Exp $ */ diff --git a/servers/inet/generic/arp.h b/servers/inet/generic/arp.h index 5dadb1929..1edfe5394 100644 --- a/servers/inet/generic/arp.h +++ b/servers/inet/generic/arp.h @@ -22,8 +22,11 @@ void arp_set_ipaddr ARGS(( int eth_port, ipaddr_t ipaddr )); int arp_set_cb ARGS(( int eth_port, int ip_port, arp_func_t arp_func )); int arp_ip_eth ARGS(( int eth_port, ipaddr_t ipaddr, ether_addr_t *ethaddr )); +int arp_ioctl ARGS(( int eth_port, int fd, ioreq_t req, + get_userdata_t get_userdata, put_userdata_t put_userdata )); + #endif /* ARP_H */ /* - * $PchId: arp.h,v 1.5 1995/11/21 06:45:27 philip Exp $ + * $PchId: arp.h,v 1.7 2001/04/19 18:58:17 philip Exp $ */ diff --git a/servers/inet/generic/assert.h b/servers/inet/generic/assert.h index 929cc1a3b..e6d6492b0 100644 --- a/servers/inet/generic/assert.h +++ b/servers/inet/generic/assert.h @@ -8,11 +8,11 @@ Copyright 1995 Philip Homburg #if !NDEBUG -void bad_assertion(char *file, int line, char *what); -void bad_compare(char *file, int line, int lhs, char *what, int rhs); +void bad_assertion(char *file, int line, char *what) _NORETURN; +void bad_compare(char *file, int line, int lhs, char *what, int rhs) _NORETURN; -#define assert(x) (!(x) ? bad_assertion(this_file, __LINE__, #x) \ - : (void) 0) +#define assert(x) ((void)(!(x) ? bad_assertion(this_file, __LINE__, \ + #x),0 : 0)) #define compare(a,t,b) (!((a) t (b)) ? bad_compare(this_file, __LINE__, \ (a), #a " " #t " " #b, (b)) : (void) 0) @@ -27,5 +27,5 @@ void bad_compare(char *file, int line, int lhs, char *what, int rhs); /* - * $PchId: assert.h,v 1.4 1995/11/21 06:45:27 philip Exp $ + * $PchId: assert.h,v 1.8 2002/03/18 21:50:32 philip Exp $ */ diff --git a/servers/inet/generic/buf.h b/servers/inet/generic/buf.h index 6b0223039..95c893510 100644 --- a/servers/inet/generic/buf.h +++ b/servers/inet/generic/buf.h @@ -11,6 +11,9 @@ Copyright 1995 Philip Homburg #define MAX_BUFREQ_PRI 10 +#define ARP_PRI_REC 3 +#define ARP_PRI_SEND 3 + #define ETH_PRI_PORTBUFS 3 #define ETH_PRI_FDBUFS_EXTRA 5 #define ETH_PRI_FDBUFS 6 @@ -79,6 +82,7 @@ typedef struct acc } acc_t; extern acc_t *bf_temporary_acc; +extern acc_t *bf_linkcheck_acc; /* For debugging... */ @@ -95,7 +99,10 @@ extern acc_t *bf_temporary_acc; #define bf_pack(a) _bf_pack(this_file, __LINE__, a) #define bf_append(a,b) _bf_append(this_file, __LINE__, a, b) #define bf_dupacc(a) _bf_dupacc(this_file, __LINE__, a) +#if 0 +#define bf_mark_1acc(a) _bf_mark_1acc(this_file, __LINE__, a) #define bf_mark_acc(a) _bf_mark_acc(this_file, __LINE__, a) +#endif #define bf_align(a,s,al) _bf_align(this_file, __LINE__, a, s, al) #else /* BUF_IMPLEMENTATION */ @@ -112,6 +119,7 @@ extern acc_t *bf_temporary_acc; #else +#define bf_mark_1acc(acc) ((void)0) #define bf_mark_acc(acc) ((void)0) #endif /* BUF_TRACK_ALLOC_FREE */ @@ -214,22 +222,28 @@ acc_t *_bf_align ARGS(( char *clnt_file, int clnt_line, Size must be less than or equal to BUF_S. */ +int bf_linkcheck ARGS(( acc_t *acc )); +/* check if all link count are positive, and offsets and sizes are within + * the underlying buffer. + */ + #define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \ (&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \ acc_offset])) #define bf_chkbuf(buf) ((buf)? (compare((buf)->acc_linkC,>,0), \ compare((buf)->acc_buffer, !=, 0), \ - compare((buf)->acc_buffer->buf_linkC,>,0)) : 0) + compare((buf)->acc_buffer->buf_linkC,>,0)) : (void)0) #ifdef BUF_CONSISTENCY_CHECK int bf_consistency_check ARGS(( void )); void bf_check_acc ARGS(( acc_t *acc )); +void _bf_mark_1acc ARGS(( char *clnt_file, int clnt_line, acc_t *acc )); void _bf_mark_acc ARGS(( char *clnt_file, int clnt_line, acc_t *acc )); #endif #endif /* BUF_H */ /* - * $PchId: buf.h,v 1.8 1995/11/21 06:45:27 philip Exp $ + * $PchId: buf.h,v 1.13 2003/09/10 08:52:09 philip Exp $ */ diff --git a/servers/inet/generic/eth.c b/servers/inet/generic/eth.c index 23fe72f07..9705d472f 100644 --- a/servers/inet/generic/eth.c +++ b/servers/inet/generic/eth.c @@ -29,6 +29,7 @@ typedef struct eth_fd nwio_ethopt_t ef_ethopt; eth_port_t *ef_port; struct eth_fd *ef_type_next; + struct eth_fd *ef_send_next; int ef_srfd; acc_t *ef_rdbuf_head; acc_t *ef_rdbuf_tail; @@ -47,6 +48,15 @@ typedef struct eth_fd # define EFF_WRITE_IP 0x4 # define EFF_OPTSET 0x8 +/* Note that the vh_type field is normally considered part of the ethernet + * header. + */ +typedef struct +{ + u16_t vh_type; + u16_t vh_vlan; +} vlan_hdr_t; + FORWARD int eth_checkopt ARGS(( eth_fd_t *eth_fd )); FORWARD void hash_fd ARGS(( eth_fd_t *eth_fd )); FORWARD void unhash_fd ARGS(( eth_fd_t *eth_fd )); @@ -59,9 +69,12 @@ FORWARD void reply_thr_get ARGS(( eth_fd_t *eth_fd, size_t result, int for_ioctl )); FORWARD void reply_thr_put ARGS(( eth_fd_t *eth_fd, size_t result, int for_ioctl )); +FORWARD void do_rec_conf ARGS(( eth_port_t *eth_port )); FORWARD u32_t compute_rec_conf ARGS(( eth_port_t *eth_port )); +FORWARD acc_t *insert_vlan_hdr ARGS(( eth_port_t *eth_port, acc_t *pack )); PUBLIC eth_port_t *eth_port_table; +PUBLIC int no_ethWritePort= 0; PRIVATE eth_fd_t eth_fd_table[ETH_FD_NR]; PRIVATE ether_addr_t broadcast= { { 255, 255, 255, 255, 255, 255 } }; @@ -80,18 +93,21 @@ PUBLIC void eth_init() thus a good compiler doesn't generate any code for this */ -#if ZERO + for (i=0; ief_port; @@ -163,7 +181,6 @@ ioreq_t req; int result; u32_t new_en_flags, new_di_flags, old_en_flags, old_di_flags; - u32_t flags; data= (*eth_fd->ef_get_userdata)(eth_fd-> ef_srfd, 0, sizeof(nwio_ethopt_t), TRUE); @@ -270,8 +287,7 @@ ioreq_t req; if (changes & (NWEO_BROAD_MASK | NWEO_MULTI_MASK | NWEO_PROMISC_MASK)) { - flags= compute_rec_conf(eth_port); - eth_set_rec_conf(eth_port, flags); + do_rec_conf(eth_port); } } @@ -307,7 +323,7 @@ ioreq_t req; acc_t *acc; int result; -assert (sizeof(nwio_ethstat_t) <= BUF_S); + assert (sizeof(nwio_ethstat_t) <= BUF_S); eth_port= eth_fd->ef_port; if (!(eth_port->etp_flags & EPF_ENABLED)) @@ -317,15 +333,24 @@ assert (sizeof(nwio_ethstat_t) <= BUF_S); } acc= bf_memreq(sizeof(nwio_ethstat_t)); -compare (bf_bufsize(acc), ==, sizeof(*ethstat)); + compare (bf_bufsize(acc), ==, sizeof(*ethstat)); ethstat= (nwio_ethstat_t *)ptr2acc_data(acc); - ethstat->nwes_addr= eth_port->etp_ethaddr; - result= eth_get_stat(eth_port, ðstat->nwes_stat); -assert (result == 0); -compare (bf_bufsize(acc), ==, sizeof(*ethstat)); + if (!eth_port->etp_vlan) + { + result= eth_get_stat(eth_port, + ðstat->nwes_stat); + assert (result == 0); + } + else + { + /* No statistics */ + memset(ðstat->nwes_stat, '\0', + sizeof(ethstat->nwes_stat)); + } + result= (*eth_fd->ef_put_userdata)(eth_fd-> ef_srfd, 0, acc, TRUE); if (result >= 0) @@ -344,7 +369,7 @@ int fd; size_t count; { eth_fd_t *eth_fd; - eth_port_t *eth_port; + eth_port_t *eth_port, *rep; acc_t *user_data; int r; @@ -370,9 +395,19 @@ size_t count; return NW_OK; } eth_fd->ef_flags |= EFF_WRITE_IP; - if (eth_port->etp_wr_pack) + + /* Enqueue at the real ethernet port */ + rep= eth_port->etp_vlan_port; + if (!rep) + rep= eth_port; + if (rep->etp_wr_pack) { - eth_port->etp_flags |= EPF_MORE2WRITE; + eth_fd->ef_send_next= NULL; + if (rep->etp_sendq_head) + rep->etp_sendq_tail->ef_send_next= eth_fd; + else + rep->etp_sendq_head= eth_fd; + rep->etp_sendq_tail= eth_fd; return NW_SUSPEND; } @@ -398,7 +433,7 @@ acc_t *data; size_t data_len; { eth_fd_t *eth_fd; - eth_port_t *eth_port; + eth_port_t *eth_port, *rep; eth_hdr_t *eth_hdr; acc_t *eth_pack; unsigned long nweo_flags; @@ -420,11 +455,14 @@ size_t data_len; DBLOCK(1, printf("illegal packetsize (%d)\n",count)); return EPACKSIZE; } - if (eth_port->etp_wr_pack) + rep= eth_port->etp_vlan_port; + if (!rep) + rep= eth_port; + + if (rep->etp_wr_pack) return NW_WOULDBLOCK; nweo_flags= eth_fd->ef_ethopt.nweo_flags; - if (nweo_flags & NWEO_RWDATONLY) { eth_pack= bf_memreq(ETH_HDR_SIZE); @@ -450,9 +488,20 @@ size_t data_len; eth_port->etp_wr_pack= eth_pack; ev_arg.ev_ptr= eth_port; ev_enqueue(ð_port->etp_sendev, eth_loop_ev, ev_arg); + return NW_OK; } - else - eth_write_port(eth_port, eth_pack); + + if (rep != eth_port) + { + eth_pack= insert_vlan_hdr(eth_port, eth_pack); + if (!eth_pack) + { + /* Packet is silently discarded */ + return NW_OK; + } + } + + eth_write_port(rep, eth_pack); return NW_OK; } @@ -506,29 +555,34 @@ int which_operation; switch (which_operation) { case SR_CANCEL_READ: -assert (eth_fd->ef_flags & EFF_READ_IP); + assert (eth_fd->ef_flags & EFF_READ_IP); eth_fd->ef_flags &= ~EFF_READ_IP; reply_thr_put(eth_fd, EINTR, FALSE); break; case SR_CANCEL_WRITE: -assert (eth_fd->ef_flags & EFF_WRITE_IP); + assert (eth_fd->ef_flags & EFF_WRITE_IP); eth_fd->ef_flags &= ~EFF_WRITE_IP; reply_thr_get(eth_fd, EINTR, FALSE); break; -#if !CRAMPED default: ip_panic(( "got unknown cancel request" )); -#endif } return NW_OK; } +PUBLIC int eth_select(fd, operations) +int fd; +unsigned operations; +{ + printf("eth_select: not implemented\n"); + return 0; +} + PUBLIC void eth_close(fd) int fd; { eth_fd_t *eth_fd; eth_port_t *eth_port; - u32_t flags; acc_t *pack; eth_fd= ð_fd_table[fd]; @@ -547,8 +601,7 @@ int fd; eth_fd->ef_flags= EFF_EMPTY; eth_port= eth_fd->ef_port; - flags= compute_rec_conf(eth_port); - eth_set_rec_conf(eth_port, flags); + do_rec_conf(eth_port); } PUBLIC void eth_loop_ev(ev, ev_arg) @@ -562,7 +615,13 @@ ev_arg_t ev_arg; assert(ev == ð_port->etp_sendev); pack= eth_port->etp_wr_pack; + + assert(!no_ethWritePort); + no_ethWritePort= 1; eth_arrive(eth_port, pack, bf_bufsize(pack)); + assert(no_ethWritePort); + no_ethWritePort= 0; + eth_port->etp_wr_pack= NULL; eth_restart_write(eth_port); } @@ -665,30 +724,14 @@ PUBLIC void eth_restart_write(eth_port) eth_port_t *eth_port; { eth_fd_t *eth_fd; - int i, r; + int r; - if (eth_port->etp_wr_pack) - return; - - if (!(eth_port->etp_flags & EPF_MORE2WRITE)) - return; - eth_port->etp_flags &= ~EPF_MORE2WRITE; - - for (i=0, eth_fd= eth_fd_table; ietp_wr_pack == NULL); + while (eth_fd= eth_port->etp_sendq_head, eth_fd != NULL) { - if ((eth_fd->ef_flags & (EFF_INUSE|EFF_WRITE_IP)) != - (EFF_INUSE|EFF_WRITE_IP)) - { - continue; - } - if (eth_fd->ef_port != eth_port) - continue; - if (eth_port->etp_wr_pack) - { - eth_port->etp_flags |= EPF_MORE2WRITE; return; - } + eth_port->etp_sendq_head= eth_fd->ef_send_next; eth_fd->ef_flags &= ~EFF_WRITE_IP; r= eth_write(eth_fd-eth_fd_table, eth_fd->ef_write_count); @@ -708,7 +751,12 @@ size_t pack_size; ether_type_t type; eth_fd_t *eth_fd, *first_fd, *share_fd; int hash, i; + u16_t vlan, temp; time_t exp_time; + acc_t *vlan_pack, *hdr_acc, *tmp_acc; + eth_port_t *vp; + vlan_hdr_t vh; + u32_t *p; exp_time= get_time() + EXPIRE_TIME; @@ -741,6 +789,46 @@ size_t pack_size; hash ^= (hash >> 8); hash &= (ETH_TYPE_HASH_NR-1); + if (type == HTONS(ETH_VLAN_PROTO)) + { + /* VLAN packet. Extract original ethernet packet */ + + vlan_pack= pack; + vlan_pack->acc_linkC++; + hdr_acc= bf_cut(vlan_pack, 0, 2*sizeof(ether_addr_t)); + vlan_pack= bf_delhead(vlan_pack, 2*sizeof(ether_addr_t)); + vlan_pack= bf_packIffLess(vlan_pack, sizeof(vh)); + vh= *(vlan_hdr_t *)ptr2acc_data(vlan_pack); + vlan_pack= bf_delhead(vlan_pack, sizeof(vh)); + hdr_acc= bf_append(hdr_acc, vlan_pack); + vlan_pack= hdr_acc; hdr_acc= NULL; + if (bf_bufsize(vlan_pack) < ETH_MIN_PACK_SIZE) + { + tmp_acc= bf_memreq(sizeof(vh)); + + /* Clear padding */ + assert(sizeof(vh) <= sizeof(*p)); + p= (u32_t *)ptr2acc_data(tmp_acc); + *p= 0xdeadbeef; + + vlan_pack= bf_append(vlan_pack, tmp_acc); + tmp_acc= NULL; + } + vlan= ntohs(vh.vh_vlan); + if (vlan & ETH_TCI_CFI) + { + /* No support for extended address formats */ + bf_afree(vlan_pack); vlan_pack= NULL; + } + vlan &= ETH_TCI_VLAN_MASK; + } + else + { + /* No VLAN processing */ + vlan_pack= NULL; + vlan= 0; /* lint */ + } + first_fd= NULL; for (i= 0; i<2; i++) { @@ -813,6 +901,40 @@ size_t pack_size; } bf_afree(pack); } + if (vlan_pack) + { + hash= ETH_HASH_VLAN(vlan, temp); + for (vp= eth_port->etp_vlan_tab[hash]; vp; + vp= vp->etp_vlan_next) + { + if (vp->etp_vlan == vlan) + break; + } + if (vp) + { + eth_arrive(vp, vlan_pack, pack_size-sizeof(vh)); + vlan_pack= NULL; + } + else + { + /* No device for VLAN */ + bf_afree(vlan_pack); + vlan_pack= NULL; + } + } +} + +PUBLIC void eth_reg_vlan(eth_port, vlan_port) +eth_port_t *eth_port; +eth_port_t *vlan_port; +{ + u16_t t, vlan; + int h; + + vlan= vlan_port->etp_vlan; + h= ETH_HASH_VLAN(vlan, t); + vlan_port->etp_vlan_next= eth_port->etp_vlan_tab[h]; + eth_port->etp_vlan_tab[h]= vlan_port; } PRIVATE void packet2user (eth_fd, pack, exp_time) @@ -923,6 +1045,27 @@ PRIVATE void eth_bufcheck() } #endif +PRIVATE void do_rec_conf(eth_port) +eth_port_t *eth_port; +{ + int i; + u32_t flags; + eth_port_t *vp; + + if (eth_port->etp_vlan) + { + /* Configure underlying device */ + eth_port= eth_port->etp_vlan_port; + } + flags= compute_rec_conf(eth_port); + for (i= 0; ietp_vlan_tab[i]; vp; vp= vp->etp_vlan_next) + flags |= compute_rec_conf(vp); + } + eth_set_rec_conf(eth_port, flags); +} + PRIVATE u32_t compute_rec_conf(eth_port) eth_port_t *eth_port; { @@ -968,6 +1111,41 @@ int for_ioctl; assert(error == NW_OK); } +PRIVATE acc_t *insert_vlan_hdr(eth_port, pack) +eth_port_t *eth_port; +acc_t *pack; +{ + acc_t *head_acc, *vh_acc; + u16_t type, vlan; + vlan_hdr_t *vp; + + head_acc= bf_cut(pack, 0, 2*sizeof(ether_addr_t)); + pack= bf_delhead(pack, 2*sizeof(ether_addr_t)); + pack= bf_packIffLess(pack, sizeof(type)); + type= *(u16_t *)ptr2acc_data(pack); + if (type == HTONS(ETH_VLAN_PROTO)) + { + /* Packeted is already tagged. Should update vlan number. + * For now, just discard packet. + */ + printf("insert_vlan_hdr: discarding vlan packet\n"); + bf_afree(head_acc); head_acc= NULL; + bf_afree(pack); pack= NULL; + return NULL; + } + vlan= eth_port->etp_vlan; /* priority and CFI are zero */ + + vh_acc= bf_memreq(sizeof(vlan_hdr_t)); + vp= (vlan_hdr_t *)ptr2acc_data(vh_acc); + vp->vh_type= HTONS(ETH_VLAN_PROTO); + vp->vh_vlan= htons(vlan); + + head_acc= bf_append(head_acc, vh_acc); vh_acc= NULL; + head_acc= bf_append(head_acc, pack); pack= NULL; + pack= head_acc; head_acc= NULL; + return pack; +} + /* - * $PchId: eth.c,v 1.11 1996/08/02 07:04:58 philip Exp $ + * $PchId: eth.c,v 1.23 2005/06/28 14:15:58 philip Exp $ */ diff --git a/servers/inet/generic/eth.h b/servers/inet/generic/eth.h index b084c3f98..be712c730 100644 --- a/servers/inet/generic/eth.h +++ b/servers/inet/generic/eth.h @@ -23,16 +23,17 @@ void eth_prep ARGS(( void )); void eth_init ARGS(( void )); int eth_open ARGS(( int port, int srfd, get_userdata_t get_userdata, put_userdata_t put_userdata, - put_pkt_t put_pkt )); + put_pkt_t put_pkt, select_res_t sel_res )); int eth_ioctl ARGS(( int fd, ioreq_t req)); int eth_read ARGS(( int port, size_t count )); int eth_write ARGS(( int port, size_t count )); int eth_cancel ARGS(( int fd, int which_operation )); +int eth_select ARGS(( int fd, unsigned operations )); void eth_close ARGS(( int fd )); int eth_send ARGS(( int port, struct acc *data, size_t data_len )); #endif /* ETH_H */ /* - * $PchId: eth.h,v 1.6 1996/05/07 20:49:07 philip Exp $ + * $PchId: eth.h,v 1.8 2005/06/28 14:16:10 philip Exp $ */ diff --git a/servers/inet/generic/eth_int.h b/servers/inet/generic/eth_int.h index 5b49c9255..d9fc74d90 100644 --- a/servers/inet/generic/eth_int.h +++ b/servers/inet/generic/eth_int.h @@ -8,35 +8,43 @@ Copyright 1995 Philip Homburg #define ETH_INT_H #define ETH_TYPE_HASH_NR 16 +#define ETH_VLAN_HASH_NR 16 +/* Assume that the arguments are a local variable */ +#define ETH_HASH_VLAN(v,t) \ + ((t)= (((v) >> 8) ^ (v)), \ + (t)= (((t) >> 4) ^ (t)), \ + (t) & (ETH_VLAN_HASH_NR-1)) + typedef struct eth_port { int etp_flags; ether_addr_t etp_ethaddr; acc_t *etp_wr_pack, *etp_rd_pack; + struct eth_fd *etp_sendq_head; + struct eth_fd *etp_sendq_tail; struct eth_fd *etp_type_any; struct eth_fd *etp_type[ETH_TYPE_HASH_NR]; event_t etp_sendev; + /* VLAN support */ + u16_t etp_vlan; + struct eth_port *etp_vlan_port; + struct eth_port *etp_vlan_tab[ETH_VLAN_HASH_NR]; + struct eth_port *etp_vlan_next; + osdep_eth_port_t etp_osdep; } eth_port_t; #define EPF_EMPTY 0x0 #define EPF_ENABLED 0x1 -#define EPF_MORE2WRITE 0x10 #define EPF_READ_IP 0x20 #define EPF_READ_SP 0x40 -#if 0 -#define EPS_EMPTY 0x0 -#define EPS_LOC 0x1 -#define EPS_BROAD 0x2 -#define EPS_MULTI 0x4 -#define EPS_PROMISC 0x8 -#endif - extern eth_port_t *eth_port_table; +extern int no_ethWritePort; /* debug, consistency check */ + void osdep_eth_init ARGS(( void )); int eth_get_stat ARGS(( eth_port_t *eth_port, eth_stat_t *eth_stat )); void eth_write_port ARGS(( eth_port_t *eth_port, acc_t *pack )); @@ -44,9 +52,10 @@ void eth_arrive ARGS(( eth_port_t *port, acc_t *pack, size_t pack_size )); void eth_set_rec_conf ARGS(( eth_port_t *eth_port, u32_t flags )); void eth_restart_write ARGS(( eth_port_t *eth_port )); void eth_loop_ev ARGS(( event_t *ev, ev_arg_t ev_arg )); +void eth_reg_vlan ARGS(( eth_port_t *eth_port, eth_port_t *vlan_port )); #endif /* ETH_INT_H */ /* - * $PchId: eth_int.h,v 1.6 1995/11/21 06:45:27 philip Exp $ + * $PchId: eth_int.h,v 1.9 2001/04/23 08:04:06 philip Exp $ */ diff --git a/servers/inet/generic/event.c b/servers/inet/generic/event.c index ae0b2faf4..e59d97706 100644 --- a/servers/inet/generic/event.c +++ b/servers/inet/generic/event.c @@ -1,7 +1,7 @@ /* inet/generic/event.c -Created: April 1995 by Philip Homburg +Created: April 1995 by Philip Homburg Implementation of an event queue. @@ -65,5 +65,5 @@ event_t *ev; /* - * $PchId: event.c,v 1.4 1995/11/21 06:45:27 philip Exp $ + * $PchId: event.c,v 1.6 2004/08/03 16:23:32 philip Exp $ */ diff --git a/servers/inet/generic/event.h b/servers/inet/generic/event.h index 1c50c962f..568371268 100644 --- a/servers/inet/generic/event.h +++ b/servers/inet/generic/event.h @@ -1,7 +1,7 @@ /* inet/generic/event.h -Created: April 1995 by Philip Homburg +Created: April 1995 by Philip Homburg Header file for an event mechanism. @@ -38,5 +38,5 @@ int ev_in_queue ARGS(( event_t *ev )); #endif /* INET__GENERIC__EVENT_H */ /* - * $PchId: event.h,v 1.4 1995/11/21 06:45:27 philip Exp $ + * $PchId: event.h,v 1.5 2004/08/03 16:23:49 philip Exp $ */ diff --git a/servers/inet/generic/icmp.c b/servers/inet/generic/icmp.c index b05b1aa95..a24507b16 100644 --- a/servers/inet/generic/icmp.c +++ b/servers/inet/generic/icmp.c @@ -10,6 +10,7 @@ Copyright 1995 Philip Homburg #include "type.h" #include "assert.h" +#include "clock.h" #include "icmp.h" #include "icmp_lib.h" #include "io.h" @@ -25,9 +26,13 @@ typedef struct icmp_port int icp_state; int icp_ipport; int icp_ipfd; + unsigned icp_rate_count; + unsigned icp_rate_report; + time_t icp_rate_lasttime; acc_t *icp_head_queue; acc_t *icp_tail_queue; acc_t *icp_write_pack; + event_t icp_event; } icmp_port_t; #define ICPF_EMPTY 0x0 @@ -71,9 +76,11 @@ FORWARD acc_t *make_repl_ip ARGS(( ip_hdr_t *ip_hdr, int ip_len )); FORWARD void enqueue_pack ARGS(( icmp_port_t *icmp_port, acc_t *reply_ip_hdr )); -FORWARD void icmp_write ARGS(( icmp_port_t *icmp_port )); +FORWARD int icmp_rate_limit ARGS(( icmp_port_t *icmp_port, + acc_t *reply_ip_hdr )); +FORWARD void icmp_write ARGS(( event_t *ev, ev_arg_t ev_arg )); FORWARD void icmp_buffree ARGS(( int priority )); -FORWARD acc_t *icmp_err_pack ARGS(( acc_t *pack, icmp_hdr_t **icmp_hdr )); +FORWARD acc_t *icmp_err_pack ARGS(( acc_t *pack, icmp_hdr_t **icmp_hdr_pp )); #ifdef BUF_CONSISTENCY_CHECK FORWARD void icmp_bufcheck ARGS(( void )); #endif @@ -92,11 +99,13 @@ PUBLIC void icmp_init() for (i= 0, icmp_port= icmp_port_table; iicp_flags= ICPF_EMPTY; icmp_port->icp_state= ICPS_BEGIN; -#endif icmp_port->icp_ipport= i; + icmp_port->icp_rate_count= 0; + icmp_port->icp_rate_report= ICMP_MAX_RATE; + icmp_port->icp_rate_lasttime= 0; + ev_init(&icmp_port->icp_event); } #ifndef BUF_CONSISTENCY_CHECK @@ -119,8 +128,9 @@ icmp_port_t *icmp_port; { case ICPS_BEGIN: icmp_port->icp_head_queue= 0; - icmp_port->icp_ipfd= ip_open (icmp_port->icp_ipport, - icmp_port->icp_ipport, icmp_getdata, icmp_putdata, 0); + icmp_port->icp_ipfd= ip_open(icmp_port->icp_ipport, + icmp_port->icp_ipport, icmp_getdata, icmp_putdata, + 0 /* no put_pkt */, 0 /* no select_res */); if (icmp_port->icp_ipfd<0) { DBLOCK(1, printf("unable to open ip_port %d\n", @@ -159,6 +169,7 @@ int for_ioctl; nwio_ipopt_t *ipopt; acc_t *data; int result; + ev_arg_t ev_arg; icmp_port= &icmp_port_table[port]; @@ -177,9 +188,10 @@ int for_ioctl; } if (icmp_port->icp_flags & ICPF_WRITE_SP) { - icmp_port->icp_flags &= - ~(ICPF_WRITE_IP|ICPF_WRITE_SP); - icmp_write (icmp_port); + icmp_port->icp_flags &= ~ICPF_WRITE_SP; + ev_arg.ev_ptr= icmp_port; + ev_enqueue(&icmp_port->icp_event, icmp_write, + ev_arg); } return NW_OK; } @@ -202,9 +214,7 @@ int for_ioctl; return NW_OK; } -assert (count == sizeof (*ipopt)); data= bf_memreq (sizeof (*ipopt)); -assert (data->acc_length == sizeof(*ipopt)); ipopt= (nwio_ipopt_t *)ptr2acc_data(data); ipopt->nwio_flags= NWIO_COPY | NWIO_EN_LOC | NWIO_EN_BROAD | @@ -213,10 +223,10 @@ assert (data->acc_length == sizeof(*ipopt)); ipopt->nwio_proto= IPPROTO_ICMP; return data; default: - DBLOCK(1, printf("unknown state %d\n", - icmp_port->icp_state)); - return 0; + break; } + DBLOCK(1, printf("unknown state %d\n", icmp_port->icp_state)); + return NULL; } PRIVATE int icmp_putdata(port, offset, data, for_ioctl) @@ -232,7 +242,6 @@ int for_ioctl; if (icmp_port->icp_flags & ICPF_READ_IP) { -assert (!for_ioctl); if (!data) { result= (int)offset; @@ -266,10 +275,6 @@ icmp_port_t *icmp_port; { int result; -assert (!(icmp_port->icp_flags & (ICPF_READ_IP|ICPF_READ_SP) || - (icmp_port->icp_flags & (ICPF_READ_IP|ICPF_READ_SP)) == - (ICPF_READ_IP|ICPF_READ_SP))); - for (;;) { icmp_port->icp_flags |= ICPF_READ_IP; @@ -289,12 +294,17 @@ int port_nr; acc_t *pack; int code; { - acc_t *icmp_acc; icmp_hdr_t *icmp_hdr; icmp_port_t *icmp_port; - assert(0 <= port_nr && port_nr < ip_conf_nr); - icmp_port= &icmp_port_table[port_nr]; + if (port_nr >= 0 && port_nr < ip_conf_nr) + icmp_port= &icmp_port_table[port_nr]; + else + { + printf("icmp_snd_time_exceeded: strange port %d\n", port_nr); + bf_afree(pack); + return; + } pack= icmp_err_pack(pack, &icmp_hdr); if (pack == NULL) return; @@ -311,12 +321,17 @@ acc_t *pack; int code; ipaddr_t gw; { - acc_t *icmp_acc; icmp_hdr_t *icmp_hdr; icmp_port_t *icmp_port; - assert(0 <= port_nr && port_nr < ip_conf_nr); - icmp_port= &icmp_port_table[port_nr]; + if (port_nr >= 0 && port_nr < ip_conf_nr) + icmp_port= &icmp_port_table[port_nr]; + else + { + printf("icmp_snd_redirect: strange port %d\n", port_nr); + bf_afree(pack); + return; + } pack= icmp_err_pack(pack, &icmp_hdr); if (pack == NULL) return; @@ -335,12 +350,17 @@ int port_nr; acc_t *pack; int code; { - acc_t *icmp_acc; icmp_hdr_t *icmp_hdr; icmp_port_t *icmp_port; - assert(0 <= port_nr && port_nr < ip_conf_nr); - icmp_port= &icmp_port_table[port_nr]; + if (port_nr >= 0 && port_nr < ip_conf_nr) + icmp_port= &icmp_port_table[port_nr]; + else + { + printf("icmp_snd_unreachable: strange port %d\n", port_nr); + bf_afree(pack); + return; + } pack= icmp_err_pack(pack, &icmp_hdr); if (pack == NULL) return; @@ -351,6 +371,36 @@ int code; enqueue_pack(icmp_port, pack); } +PUBLIC void icmp_snd_mtu(port_nr, pack, mtu) +int port_nr; +acc_t *pack; +u16_t mtu; +{ + icmp_hdr_t *icmp_hdr; + icmp_port_t *icmp_port; + + if (port_nr >= 0 && port_nr < ip_conf_nr) + icmp_port= &icmp_port_table[port_nr]; + else + { + printf("icmp_snd_mtu: strange port %d\n", port_nr); + bf_afree(pack); + return; + } + + pack= icmp_err_pack(pack, &icmp_hdr); + if (pack == NULL) + return; + icmp_hdr->ih_type= ICMP_TYPE_DST_UNRCH; + icmp_hdr->ih_code= ICMP_FRAGM_AND_DF; + icmp_hdr->ih_hun.ihh_mtu.im_mtu= htons(mtu); + icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum, + (u16_t *)&icmp_hdr->ih_type, 2); + icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum, + (u16_t *)&icmp_hdr->ih_hun.ihh_mtu.im_mtu, 2); + enqueue_pack(icmp_port, pack); +} + PRIVATE void process_data(icmp_port, data) icmp_port_t *icmp_port; acc_t *data; @@ -378,16 +428,23 @@ acc_t *data; pack_len= bf_bufsize(data); pack_len -= ip_hdr_len; - if (pack_len < ICMP_MIN_HDR_LEN) + if (pack_len < ICMP_MIN_HDR_SIZE) { - DBLOCK(1, printf("got an incomplete icmp packet\n")); + if (pack_len == 0 && ip_hdr->ih_proto == 0) + { + /* IP layer reports new ip address, which can be + * ignored. + */ + } + else + DBLOCK(1, printf("got an incomplete icmp packet\n")); bf_afree(data); return; } icmp_data= bf_cut(data, ip_hdr_len, pack_len); - icmp_data= bf_packIffLess (icmp_data, ICMP_MIN_HDR_LEN); + icmp_data= bf_packIffLess (icmp_data, ICMP_MIN_HDR_SIZE); icmp_hdr= (icmp_hdr_t *)ptr2acc_data(icmp_data); if ((u16_t)~icmp_pack_oneCsum(icmp_data)) @@ -450,9 +507,10 @@ ip_hdr_t *ip_hdr; icmp_hdr_t *icmp_hdr; { acc_t *repl_ip_hdr, *repl_icmp; + ipaddr_t tmpaddr, locaddr, netmask; icmp_hdr_t *repl_icmp_hdr; i32_t tmp_chksum; - u16_t u16; + ip_port_t *ip_port; if (icmp_hdr->ih_code != 0) { @@ -463,16 +521,44 @@ icmp_hdr_t *icmp_hdr; bf_afree(icmp_data); return; } - if (icmp_len < ICMP_MIN_HDR_LEN + sizeof(icmp_id_seq_t)) + if (icmp_len < ICMP_MIN_HDR_SIZE + sizeof(icmp_id_seq_t)) { DBLOCK(1, printf("got an incomplete icmp echo request\n")); bf_afree(ip_data); bf_afree(icmp_data); return; } + tmpaddr= ntohl(ip_hdr->ih_dst); + if ((tmpaddr & 0xe0000000) == 0xe0000000 && + tmpaddr != 0xffffffff) + { + /* Respond only to the all hosts multicast address until + * a decent listening service has been implemented + */ + if (tmpaddr != 0xe0000001) + { + bf_afree(ip_data); + bf_afree(icmp_data); + return; + } + } + + /* Limit subnet broadcasts to the local net */ + ip_port= &ip_port_table[icmp_port->icp_ipport]; + locaddr= ip_port->ip_ipaddr; + netmask= ip_port->ip_subnetmask; + if (ip_hdr->ih_dst == (locaddr | ~netmask) && + (ip_port->ip_flags & IPF_SUBNET_BCAST) && + ((ip_hdr->ih_src ^ locaddr) & netmask) != 0) + { + /* Directed broadcast */ + bf_afree(ip_data); + bf_afree(icmp_data); + return; + } + repl_ip_hdr= make_repl_ip(ip_hdr, ip_len); - repl_icmp= bf_memreq (ICMP_MIN_HDR_LEN); -assert (repl_icmp->acc_length == ICMP_MIN_HDR_LEN); + repl_icmp= bf_memreq (ICMP_MIN_HDR_SIZE); repl_icmp_hdr= (icmp_hdr_t *)ptr2acc_data(repl_icmp); repl_icmp_hdr->ih_type= ICMP_TYPE_ECHO_REPL; repl_icmp_hdr->ih_code= 0; @@ -490,8 +576,8 @@ assert (repl_icmp->acc_length == ICMP_MIN_HDR_LEN); DBLOCK(2, printf("sending chksum 0x%x\n", repl_icmp_hdr->ih_chksum)); repl_ip_hdr->acc_next= repl_icmp; - repl_icmp->acc_next= bf_cut (icmp_data, ICMP_MIN_HDR_LEN, - icmp_len - ICMP_MIN_HDR_LEN); + repl_icmp->acc_next= bf_cut (icmp_data, ICMP_MIN_HDR_SIZE, + icmp_len - ICMP_MIN_HDR_SIZE); bf_afree(ip_data); bf_afree(icmp_data); @@ -508,8 +594,6 @@ acc_t *icmp_pack; int length; char byte_buf[2]; - assert (icmp_pack); - prev= 0; odd_byte= FALSE; @@ -560,7 +644,6 @@ int ip_len; repl_hdr_len= IP_MIN_HDR_SIZE; repl= bf_memreq(repl_hdr_len); -assert (repl->acc_length == repl_hdr_len); repl_ip_hdr= (ip_hdr_t *)ptr2acc_data(repl); @@ -578,6 +661,25 @@ PRIVATE void enqueue_pack(icmp_port, reply_ip_hdr) icmp_port_t *icmp_port; acc_t *reply_ip_hdr; { + int r; + ev_arg_t ev_arg; + + /* Check rate */ + if (icmp_port->icp_rate_count >= ICMP_MAX_RATE) + { + /* Something is going wrong; check policy */ + r= icmp_rate_limit(icmp_port, reply_ip_hdr); + if (r == -1) + { + bf_afree(reply_ip_hdr); + reply_ip_hdr= NULL; + return; + } + + /* OK, continue */ + } + icmp_port->icp_rate_count++; + reply_ip_hdr->acc_ext_link= 0; if (icmp_port->icp_head_queue) @@ -593,25 +695,116 @@ acc_t *reply_ip_hdr; icmp_port->icp_tail_queue= reply_ip_hdr; if (!(icmp_port->icp_flags & ICPF_WRITE_IP)) - icmp_write(icmp_port); + { + icmp_port->icp_flags |= ICPF_WRITE_IP; + ev_arg.ev_ptr= icmp_port; + ev_enqueue(&icmp_port->icp_event, icmp_write, ev_arg); + } } -PRIVATE void icmp_write(icmp_port) +PRIVATE int icmp_rate_limit(icmp_port, reply_ip_hdr) icmp_port_t *icmp_port; +acc_t *reply_ip_hdr; +{ + time_t t; + acc_t *pack; + ip_hdr_t *ip_hdr; + icmp_hdr_t *icmp_hdr; + int hdrlen, icmp_hdr_len, type; + + /* Check the time first */ + t= get_time(); + if (t >= icmp_port->icp_rate_lasttime + ICMP_RATE_INTERVAL) + { + icmp_port->icp_rate_lasttime= t; + icmp_port->icp_rate_count= 0; + return 0; + } + + icmp_port->icp_rate_count++; + + /* Adjust report limit if necessary */ + if (icmp_port->icp_rate_count > + icmp_port->icp_rate_report+ICMP_RATE_WARN) + { + icmp_port->icp_rate_report *= 2; + return -1; + } + + /* Do we need to report */ + if (icmp_port->icp_rate_count < icmp_port->icp_rate_report) + return -1; + + pack= bf_dupacc(reply_ip_hdr); + pack= bf_packIffLess(pack, IP_MIN_HDR_SIZE); + ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + printf("icmp[%d]: dropping ICMP packet #%d to ", + icmp_port->icp_ipport, icmp_port->icp_rate_count); + writeIpAddr(ip_hdr->ih_dst); + hdrlen= (ip_hdr->ih_vers_ihl & IH_IHL_MASK)*4; + pack= bf_packIffLess(pack, hdrlen+ICMP_MIN_HDR_SIZE); + ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + icmp_hdr= (icmp_hdr_t *)(ptr2acc_data(pack)+hdrlen); + type= icmp_hdr->ih_type; + printf(" type %d, code %d\n", type, icmp_hdr->ih_code); + switch(type) + { + case ICMP_TYPE_DST_UNRCH: + case ICMP_TYPE_SRC_QUENCH: + case ICMP_TYPE_REDIRECT: + case ICMP_TYPE_TIME_EXCEEDED: + case ICMP_TYPE_PARAM_PROBLEM: + icmp_hdr_len= offsetof(struct icmp_hdr, ih_dun); + pack= bf_packIffLess(pack, + hdrlen+icmp_hdr_len+IP_MIN_HDR_SIZE); + ip_hdr= (ip_hdr_t *)(ptr2acc_data(pack)+hdrlen+icmp_hdr_len); + icmp_hdr= (icmp_hdr_t *)(ptr2acc_data(pack)+hdrlen); + printf("\tinfo %08x, original dst ", + ntohs(icmp_hdr->ih_hun.ihh_unused)); + writeIpAddr(ip_hdr->ih_dst); + printf(", proto %d, length %u\n", + ip_hdr->ih_proto, ntohs(ip_hdr->ih_length)); + break; + default: + break; + } + bf_afree(pack); pack= NULL; + + return -1; +} + +PRIVATE void icmp_write(ev, ev_arg) +event_t *ev; +ev_arg_t ev_arg; { int result; + icmp_port_t *icmp_port; + acc_t *data; -assert (!(icmp_port->icp_flags & ICPF_WRITE_IP)); + icmp_port= ev_arg.ev_ptr; + assert(ev == &icmp_port->icp_event); + + assert (icmp_port->icp_flags & ICPF_WRITE_IP); + assert (!(icmp_port->icp_flags & ICPF_WRITE_SP)); while (icmp_port->icp_head_queue != NULL) { + data= icmp_port->icp_head_queue; + icmp_port->icp_head_queue= data->acc_ext_link; + + result= ip_send(icmp_port->icp_ipfd, data, + bf_bufsize(data)); + if (result != NW_WOULDBLOCK) + { + if (result == NW_OK) + continue; + DBLOCK(1, printf("icmp_write: error %d\n", result);); + continue; + } + assert(icmp_port->icp_write_pack == NULL); - icmp_port->icp_write_pack= icmp_port->icp_head_queue; - icmp_port->icp_head_queue= icmp_port->icp_head_queue-> - acc_ext_link; - - icmp_port->icp_flags |= ICPF_WRITE_IP; - + icmp_port->icp_write_pack= data; + result= ip_write(icmp_port->icp_ipfd, bf_bufsize(icmp_port->icp_write_pack)); if (result == NW_SUSPEND) @@ -619,8 +812,8 @@ assert (!(icmp_port->icp_flags & ICPF_WRITE_IP)); icmp_port->icp_flags |= ICPF_WRITE_SP; return; } - icmp_port->icp_flags &= ~ICPF_WRITE_IP; } + icmp_port->icp_flags &= ~ICPF_WRITE_IP; } PRIVATE void icmp_buffree(priority) @@ -679,6 +872,8 @@ icmp_hdr_t *icmp_hdr; ip_hdr_t *old_ip_hdr; int ip_port_nr; ipaddr_t dst, mask; + size_t old_pack_size; + u16_t new_mtu; if (icmp_len < 8 + IP_MIN_HDR_SIZE) { @@ -715,6 +910,24 @@ icmp_hdr_t *icmp_hdr; * It should be handed to the appropriate transport layer. */ break; + case ICMP_FRAGM_AND_DF: + + DBLOCK(1, printf("icmp_dst_unreach: got mtu icmp from "); + writeIpAddr(ip_hdr->ih_src); + printf("; original destination: "); + writeIpAddr(old_ip_hdr->ih_dst); + printf("; protocol: %d\n", + old_ip_hdr->ih_proto)); + old_pack_size= ntohs(old_ip_hdr->ih_length); + if (!old_pack_size) + break; + new_mtu= ntohs(icmp_hdr->ih_hun.ihh_mtu.im_mtu); + if (!new_mtu || new_mtu > old_pack_size) + new_mtu= old_pack_size-1; + ipr_mtu(ip_port_nr, old_ip_hdr->ih_dst, new_mtu, + IPR_MTU_TIMEOUT); + break; + default: DBLOCK(1, printf("icmp_dst_unreach: got strange code %d from ", icmp_hdr->ih_code); @@ -785,9 +998,12 @@ icmp_hdr_t *icmp_hdr; { int entries; int entry_size; + u32_t addr; + i32_t pref; u16_t lifetime; int i; char *bufp; + ip_port_t *ip_port; if (icmp_len < 8) { @@ -836,12 +1052,15 @@ icmp_hdr_t *icmp_hdr; lifetime)); return; } + ip_port= &ip_port_table[icmp_port->icp_ipport]; for (i= 0, bufp= (char *)&icmp_hdr->ih_dun.uhd_data[0]; i< entries; i++, bufp += entry_size) { + addr= *(ipaddr_t *)bufp; + pref= ntohl(*(u32_t *)(bufp+4)); ipr_add_oroute(icmp_port->icp_ipport, HTONL(0L), HTONL(0L), - *(ipaddr_t *)bufp, lifetime * HZ, 1, 0, - ntohl(*(i32_t *)(bufp+4)), NULL); + addr, lifetime ? lifetime * HZ : 1, + 1, 0, 0, pref, NULL); } } @@ -890,55 +1109,78 @@ icmp_hdr_t *icmp_hdr; bf_afree(old_ip_pack); } -PRIVATE acc_t *icmp_err_pack(pack, icmp_hdr) +PRIVATE acc_t *icmp_err_pack(pack, icmp_hdr_pp) acc_t *pack; -icmp_hdr_t **icmp_hdr; +icmp_hdr_t **icmp_hdr_pp; { ip_hdr_t *ip_hdr; + icmp_hdr_t *icmp_hdr_p; acc_t *ip_pack, *icmp_pack, *tmp_pack; - int ip_hdr_len, icmp_hdr_len; - size_t size; + int ip_hdr_len, icmp_hdr_len, ih_type; + size_t size, pack_len; ipaddr_t dest, netmask; nettype_t nettype; pack= bf_packIffLess(pack, IP_MIN_HDR_SIZE); ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2; + pack_len= bf_bufsize(pack); - /* If the IP protocol is ICMP or the fragment offset is non-zero, + /* If the IP protocol is ICMP (except echo request/reply) or the + * fragment offset is non-zero, * drop the packet. Also check if the source address is valid. */ - if (ip_hdr->ih_proto == IPPROTO_ICMP || - (ntohs(ip_hdr->ih_flags_fragoff) & IH_FRAGOFF_MASK) != 0) + if ((ntohs(ip_hdr->ih_flags_fragoff) & IH_FRAGOFF_MASK) != 0) { bf_afree(pack); return NULL; } + if (ip_hdr->ih_proto == IPPROTO_ICMP) + { + if (ip_hdr_len>IP_MIN_HDR_SIZE) + { + pack= bf_packIffLess(pack, ip_hdr_len); + ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + } + + if (pack_len < ip_hdr_len+ICMP_MIN_HDR_SIZE) + { + bf_afree(pack); + return NULL; + } + icmp_pack= bf_cut(pack, ip_hdr_len, ICMP_MIN_HDR_SIZE); + icmp_pack= bf_packIffLess (icmp_pack, ICMP_MIN_HDR_SIZE); + icmp_hdr_p= (icmp_hdr_t *)ptr2acc_data(icmp_pack); + ih_type= icmp_hdr_p->ih_type; + bf_afree(icmp_pack); icmp_pack= NULL; + + if (ih_type != ICMP_TYPE_ECHO_REQ && + ih_type != ICMP_TYPE_ECHO_REPL) + { + bf_afree(pack); + return NULL; + } + } dest= ip_hdr->ih_src; nettype= ip_nettype(dest); netmask= ip_netmask(nettype); - if ((nettype != IPNT_CLASS_A && nettype != IPNT_LOCAL && - nettype != IPNT_CLASS_B && nettype != IPNT_CLASS_C) || - (dest & ~netmask) == 0 || (dest & ~netmask) == ~netmask) + if (nettype != IPNT_CLASS_A && nettype != IPNT_LOCAL && + nettype != IPNT_CLASS_B && nettype != IPNT_CLASS_C) { -#if !CRAMPED printf("icmp_err_pack: invalid source address: "); writeIpAddr(dest); printf("\n"); -#endif bf_afree(pack); return NULL; } /* Take the IP header and the first 64 bits of user data. */ size= ntohs(ip_hdr->ih_length); - ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2; - if (size < ip_hdr_len || bf_bufsize(pack) < size) + if (size < ip_hdr_len || pack_len < size) { -#if !CRAMPED printf("icmp_err_pack: wrong packet size:\n"); printf("\thdrlen= %d, ih_length= %d, bufsize= %d\n", - ip_hdr_len, size, bf_bufsize(pack)); -#endif + ip_hdr_len, size, pack_len); bf_afree(pack); return NULL; } @@ -955,12 +1197,13 @@ icmp_hdr_t **icmp_hdr; pack= bf_append(icmp_pack, pack); size += icmp_hdr_len; pack= bf_packIffLess(pack, icmp_hdr_len); - *icmp_hdr= (icmp_hdr_t *)ptr2acc_data(pack); - (*icmp_hdr)->ih_type= 0; - (*icmp_hdr)->ih_code= 0; - (*icmp_hdr)->ih_chksum= 0; - (*icmp_hdr)->ih_hun.ihh_unused= 0; - (*icmp_hdr)->ih_chksum= ~icmp_pack_oneCsum(pack); + icmp_hdr_p= (icmp_hdr_t *)ptr2acc_data(pack); + icmp_hdr_p->ih_type= 0; + icmp_hdr_p->ih_code= 0; + icmp_hdr_p->ih_chksum= 0; + icmp_hdr_p->ih_hun.ihh_unused= 0; + icmp_hdr_p->ih_chksum= ~icmp_pack_oneCsum(pack); + *icmp_hdr_pp= icmp_hdr_p; /* Create an IP header */ ip_hdr_len= IP_MIN_HDR_SIZE; @@ -982,5 +1225,5 @@ icmp_hdr_t **icmp_hdr; } /* - * $PchId: icmp.c,v 1.8 1996/12/17 07:53:34 philip Exp $ + * $PchId: icmp.c,v 1.23 2005/06/28 14:16:56 philip Exp $ */ diff --git a/servers/inet/generic/icmp.h b/servers/inet/generic/icmp.h index 7bd6b0b6b..12fe3b954 100644 --- a/servers/inet/generic/icmp.h +++ b/servers/inet/generic/icmp.h @@ -8,7 +8,14 @@ Copyright 1995 Philip Homburg #define ICMP_H #define ICMP_MAX_DATAGRAM 8196 -#define ICMP_DEF_TTL 60 +#define ICMP_DEF_TTL 96 + +/* Rate limit. The implementation is a bit sloppy and may send twice the + * number of packets. + */ +#define ICMP_MAX_RATE 100 /* This many per interval */ +#define ICMP_RATE_INTERVAL (1*HZ) /* Interval in ticks */ +#define ICMP_RATE_WARN 10 /* Report this many dropped packets */ /* Prototypes */ @@ -19,5 +26,5 @@ void icmp_init ARGS(( void )); #endif /* ICMP_H */ /* - * $PchId: icmp.h,v 1.4 1995/11/21 06:45:27 philip Exp $ + * $PchId: icmp.h,v 1.7 2001/04/19 19:06:18 philip Exp $ */ diff --git a/servers/inet/generic/icmp_lib.h b/servers/inet/generic/icmp_lib.h index 9174f4cb2..7501f1a7a 100644 --- a/servers/inet/generic/icmp_lib.h +++ b/servers/inet/generic/icmp_lib.h @@ -16,9 +16,10 @@ void icmp_snd_time_exceeded ARGS(( int port_nr, acc_t *pack, int code )); void icmp_snd_unreachable ARGS(( int port_nr, acc_t *pack, int code )); void icmp_snd_redirect ARGS(( int port_nr, acc_t *pack, int code, ipaddr_t gw )); +void icmp_snd_mtu ARGS(( int port_nr, acc_t *pack, U16_t mtu )); #endif /* ICMP_LIB_H */ /* - * $PchId: icmp_lib.h,v 1.5 1996/12/17 07:54:09 philip Exp $ + * $PchId: icmp_lib.h,v 1.6 2002/06/08 21:32:44 philip Exp $ */ diff --git a/servers/inet/generic/io.c b/servers/inet/generic/io.c index df641b3ed..13c252ca1 100644 --- a/servers/inet/generic/io.c +++ b/servers/inet/generic/io.c @@ -30,5 +30,5 @@ ether_addr_t *addr; } /* - * $PchId: io.c,v 1.5 1995/11/21 06:45:27 philip Exp $ + * $PchId: io.c,v 1.6 1998/10/23 20:24:34 philip Exp $ */ diff --git a/servers/inet/generic/ip.c b/servers/inet/generic/ip.c index f04bac6ec..11771c987 100644 --- a/servers/inet/generic/ip.c +++ b/servers/inet/generic/ip.c @@ -25,6 +25,7 @@ THIS_FILE FORWARD void ip_close ARGS(( int fd )); FORWARD int ip_cancel ARGS(( int fd, int which_operation )); +FORWARD int ip_select ARGS(( int fd, unsigned operations )); FORWARD void ip_buffree ARGS(( int priority )); #ifdef BUF_CONSISTENCY_CHECK @@ -55,7 +56,6 @@ PUBLIC void ip_init() assert (BUF_S >= sizeof(nwio_ipopt_t)); assert (BUF_S >= sizeof(nwio_route_t)); -#if ZERO for (i=0, ip_ass= ip_ass_table; iia_frags= 0; @@ -68,20 +68,18 @@ PUBLIC void ip_init() ip_fd->if_flags= IFF_EMPTY; ip_fd->if_rdbuf_head= 0; } -#endif for (i=0, ip_port= ip_port_table, icp= ip_conf; iip_port= i; -#if ZERO ip_port->ip_flags= IPF_EMPTY; -#endif ip_port->ip_dev_main= (ip_dev_t)ip_bad_callback; ip_port->ip_dev_set_ipaddr= (ip_dev_t)ip_bad_callback; ip_port->ip_dev_send= (ip_dev_send_t)ip_bad_callback; ip_port->ip_dl_type= icp->ic_devtype; - ip_port->ip_mss= IP_DEF_MSS; + ip_port->ip_mtu= IP_DEF_MTU; + ip_port->ip_mtu_max= IP_MAX_PACKSIZE; switch(ip_port->ip_dl_type) { @@ -92,7 +90,6 @@ PUBLIC void ip_init() continue; assert(result == NW_OK); break; -#if ENABLE_PSIP case IPDL_PSIP: ip_port->ip_dl.dl_ps.ps_port= icp->ic_port; result= ipps_init(ip_port); @@ -100,24 +97,21 @@ PUBLIC void ip_init() continue; assert(result == NW_OK); break; -#endif -#if !CRAMPED default: ip_panic(( "unknown ip_dl_type %d", ip_port->ip_dl_type )); -#endif + break; } -#if ZERO ip_port->ip_loopb_head= NULL; ip_port->ip_loopb_tail= NULL; ev_init(&ip_port->ip_loopb_event); -#endif + ip_port->ip_routeq_head= NULL; + ip_port->ip_routeq_tail= NULL; + ev_init(&ip_port->ip_routeq_event); ip_port->ip_flags |= IPF_CONFIGURED; -#if ZERO ip_port->ip_proto_any= NULL; for (j= 0; jip_proto[j]= NULL; -#endif } #ifndef BUF_CONSISTENCY_CHECK @@ -137,7 +131,7 @@ PUBLIC void ip_init() sr_add_minor(if2minor(ip_conf[i].ic_ifno, IP_DEV_OFF), i, ip_open, ip_close, ip_read, - ip_write, ip_ioctl, ip_cancel); + ip_write, ip_ioctl, ip_cancel, ip_select); (*ip_port->ip_dev_main)(ip_port); } @@ -156,8 +150,8 @@ int which_operation; switch (which_operation) { case SR_CANCEL_IOCTL: - assert (ip_fd->if_flags & IFF_GIPCONF_IP); - ip_fd->if_flags &= ~IFF_GIPCONF_IP; + assert (ip_fd->if_flags & IFF_IOCTL_IP); + ip_fd->if_flags &= ~IFF_IOCTL_IP; repl_res= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, (size_t)EINTR, (size_t)0, TRUE); assert (!repl_res); @@ -179,21 +173,29 @@ int which_operation; assert (!repl_res); break; #endif -#if !CRAMPED default: ip_panic(( "unknown cancel request" )); -#endif + break; } return NW_OK; } +PRIVATE int ip_select(fd, operations) +int fd; +unsigned operations; +{ + printf("ip_select: not implemented\n"); + return 0; +} -PUBLIC int ip_open (port, srfd, get_userdata, put_userdata, put_pkt) +PUBLIC int ip_open (port, srfd, get_userdata, put_userdata, put_pkt, + select_res) int port; int srfd; get_userdata_t get_userdata; put_userdata_t put_userdata; put_pkt_t put_pkt; +select_res_t select_res; { int i; ip_fd_t *ip_fd; @@ -228,6 +230,7 @@ put_pkt_t put_pkt; ip_fd->if_get_userdata= get_userdata; ip_fd->if_put_userdata= put_userdata; ip_fd->if_put_pkt= put_pkt; + return i; } @@ -295,7 +298,7 @@ int priority; if (priority == IP_PRI_PORTBUFS) { next_pack= ip_port->ip_dl.dl_ps.ps_send_head; - while(next_pack != NULL) + while (next_pack != NULL) { pack= next_pack; next_pack= pack->acc_ext_link; @@ -317,7 +320,7 @@ int priority; { if (ev_in_queue(&ip_port->ip_loopb_event)) { -#if !CRAMPED +#if DEBUG printf( "not freeing ip_loopb_head, ip_loopb_event enqueued\n"); #endif @@ -329,6 +332,30 @@ int priority; } } ip_port->ip_loopb_head= next_pack; + + next_pack= ip_port->ip_routeq_head; + while(next_pack && next_pack->acc_ext_link) + { + pack= next_pack; + next_pack= pack->acc_ext_link; + bf_afree(pack); + } + if (next_pack) + { + if (ev_in_queue(&ip_port->ip_routeq_event)) + { +#if DEBUG + printf( +"not freeing ip_loopb_head, ip_routeq_event enqueued\n"); +#endif + } + else + { + bf_afree(next_pack); + next_pack= NULL; + } + } + ip_port->ip_routeq_head= next_pack; } } if (priority == IP_PRI_FDBUFS_EXTRA) @@ -410,6 +437,11 @@ PRIVATE void ip_bufcheck() { bf_check_acc(pack); } + for (pack= ip_port->ip_routeq_head; pack; + pack= pack->acc_ext_link) + { + bf_check_acc(pack); + } } for (i= 0, ip_fd= ip_fd_table; iip_port )); -#endif } /* - * $PchId: ip.c,v 1.7 1996/12/17 07:54:47 philip Exp $ + * $PchId: ip.c,v 1.19 2005/06/28 14:17:40 philip Exp $ */ diff --git a/servers/inet/generic/ip.h b/servers/inet/generic/ip.h index 927eced83..8d30f8723 100644 --- a/servers/inet/generic/ip.h +++ b/servers/inet/generic/ip.h @@ -15,7 +15,7 @@ void ip_prep ARGS(( void )); void ip_init ARGS(( void )); int ip_open ARGS(( int port, int srfd, get_userdata_t get_userdata, put_userdata_t put_userdata, - put_pkt_t put_pkt )); + put_pkt_t put_pkt, select_res_t select_res )); int ip_ioctl ARGS(( int fd, ioreq_t req )); int ip_read ARGS(( int fd, size_t count )); int ip_write ARGS(( int fd, size_t count )); @@ -24,5 +24,5 @@ int ip_send ARGS(( int fd, struct acc *data, size_t data_len )); #endif /* INET_IP_H */ /* - * $PchId: ip.h,v 1.6 1996/05/07 20:49:28 philip Exp $ + * $PchId: ip.h,v 1.8 2005/06/28 14:17:57 philip Exp $ */ diff --git a/servers/inet/generic/ip_eth.c b/servers/inet/generic/ip_eth.c index d05d77758..b94633f14 100644 --- a/servers/inet/generic/ip_eth.c +++ b/servers/inet/generic/ip_eth.c @@ -16,6 +16,8 @@ Copyright 1995 Philip Homburg #include "clock.h" #include "eth.h" #include "event.h" +#include "icmp_lib.h" +#include "io.h" #include "ip.h" #include "ip_int.h" @@ -27,8 +29,14 @@ typedef struct xmit_hdr ipaddr_t xh_ipaddr; } xmit_hdr_t; -PRIVATE ether_addr_t broadcast_ethaddr= { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -PRIVATE ipaddr_t broadcast_ipaddr= 0xFFFFFFFFL; +PRIVATE ether_addr_t broadcast_ethaddr= +{ + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } +}; +PRIVATE ether_addr_t ipmulticast_ethaddr= +{ + { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 } +}; FORWARD void do_eth_read ARGS(( ip_port_t *port )); FORWARD acc_t *get_eth_data ARGS(( int fd, size_t offset, @@ -39,7 +47,7 @@ FORWARD void ipeth_main ARGS(( ip_port_t *port )); FORWARD void ipeth_set_ipaddr ARGS(( ip_port_t *port )); FORWARD void ipeth_restart_send ARGS(( ip_port_t *ip_port )); FORWARD int ipeth_send ARGS(( struct ip_port *ip_port, ipaddr_t dest, - acc_t *pack, int broadcast )); + acc_t *pack, int type )); FORWARD void ipeth_arp_reply ARGS(( int ip_port_nr, ipaddr_t ipaddr, ether_addr_t *dst_ether_ptr )); FORWARD int ipeth_update_ttl ARGS(( time_t enq_time, time_t now, @@ -56,7 +64,8 @@ ip_port_t *ip_port; ip_port->ip_dl.dl_eth.de_fd= eth_open(ip_port-> ip_dl.dl_eth.de_port, ip_port->ip_port, - get_eth_data, put_eth_data, ip_eth_arrived); + get_eth_data, put_eth_data, ip_eth_arrived, + 0 /* no select_res */); if (ip_port->ip_dl.dl_eth.de_fd < 0) { DBLOCK(1, printf("ip.c: unable to open eth port\n")); @@ -71,15 +80,15 @@ ip_port_t *ip_port; ip_port->ip_dev_main= ipeth_main; ip_port->ip_dev_set_ipaddr= ipeth_set_ipaddr; ip_port->ip_dev_send= ipeth_send; - ip_port->ip_mss= ETH_MAX_PACK_SIZE-ETH_HDR_SIZE; + ip_port->ip_mtu= ETH_MAX_PACK_SIZE-ETH_HDR_SIZE; + ip_port->ip_mtu_max= ip_port->ip_mtu; return 0; } PRIVATE void ipeth_main(ip_port) ip_port_t *ip_port; { - int result, i; - ip_fd_t *ip_fd; + int result; switch (ip_port->ip_dl.dl_eth.de_state) { @@ -91,8 +100,8 @@ ip_port_t *ip_port; ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND; if (result<0) { - DBLOCK(1, printf("eth_ioctl(..,%lx)=%d\n", - NWIOSETHOPT, result)); + DBLOCK(1, printf("eth_ioctl(..,0x%lx)=%d\n", + (unsigned long)NWIOSETHOPT, result)); return; } if (ip_port->ip_dl.dl_eth.de_state != IES_SETPROTO) @@ -104,10 +113,8 @@ ip_port_t *ip_port; ipeth_arp_reply); if (result != NW_OK) { -#if !CRAMPED printf("ipeth_main: arp_set_cb failed: %d\n", result); -#endif return; } @@ -122,28 +129,11 @@ ip_port_t *ip_port; /* fall through */ case IES_GETIPADDR: ip_port->ip_dl.dl_eth.de_state= IES_MAIN; - for (i=0, ip_fd= ip_fd_table; iif_flags & IFF_INUSE)) - { - continue; - } - if (ip_fd->if_port != ip_port) - { - continue; - } - if (ip_fd->if_flags & IFF_GIPCONF_IP) - { - ip_ioctl (i, NWIOGIPCONF); - } - } do_eth_read(ip_port); return; -#if !CRAMPED default: ip_panic(( "unknown state: %d", ip_port->ip_dl.dl_eth.de_state)); -#endif } } @@ -208,11 +198,9 @@ int for_ioctl; assert (data); return data; default: -#if !CRAMPED printf( "get_eth_data(%d, 0x%d, 0x%d) called but ip_state=0x%x\n", fd, offset, count, ip_port->ip_dl.dl_eth.de_state); -#endif break; } return 0; @@ -225,7 +213,6 @@ acc_t *data; int for_ioctl; { ip_port_t *ip_port; - acc_t *pack; int result; ip_port= &ip_port_table[port]; @@ -261,11 +248,9 @@ int for_ioctl; ip_eth_arrived(port, data, bf_bufsize(data)); return NW_OK; } -#if !CRAMPED printf("ip_port->ip_dl.dl_eth.de_state= 0x%x", ip_port->ip_dl.dl_eth.de_state); ip_panic (( "strange status" )); -#endif } PRIVATE void ipeth_set_ipaddr(ip_port) @@ -276,19 +261,20 @@ ip_port_t *ip_port; ipeth_main(ip_port); } -PRIVATE int ipeth_send(ip_port, dest, pack, broadcast) +PRIVATE int ipeth_send(ip_port, dest, pack, type) struct ip_port *ip_port; ipaddr_t dest; acc_t *pack; -int broadcast; +int type; { - int r; + int i, r; acc_t *eth_pack, *tail; size_t pack_size; eth_hdr_t *eth_hdr; xmit_hdr_t *xmit_hdr; - ipaddr_t hostpart; + ipaddr_t hostpart, tmpaddr; time_t t; + u32_t *p; /* Start optimistic: the arp will succeed without blocking and the * ethernet packet can be sent without blocking also. Start with @@ -301,26 +287,41 @@ int broadcast; if (pack_size= 0; i--, p++) + { + *p= 0xdeadbeef; + } + eth_pack= bf_append(eth_pack, tail); } eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack); /* Lookup the ethernet address */ - if (broadcast) - eth_hdr->eh_dst= broadcast_ethaddr; + if (type != IP_LT_NORMAL) + { + if (type == IP_LT_BROADCAST) + eth_hdr->eh_dst= broadcast_ethaddr; + else + { + tmpaddr= ntohl(dest); + eth_hdr->eh_dst= ipmulticast_ethaddr; + eth_hdr->eh_dst.ea_addr[5]= tmpaddr & 0xff; + eth_hdr->eh_dst.ea_addr[4]= (tmpaddr >> 8) & 0xff; + eth_hdr->eh_dst.ea_addr[3]= (tmpaddr >> 16) & 0x7f; + } + } else { - if ((dest & ip_port->ip_subnetmask) != - (ip_port->ip_ipaddr & ip_port->ip_subnetmask)) + if ((dest ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) { -#if !CRAMPED ip_panic(( "invalid destination" )); -#endif } hostpart= (dest & ~ip_port->ip_subnetmask); - - assert(hostpart != 0); assert(dest != ip_port->ip_ipaddr); r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port, @@ -354,12 +355,12 @@ int broadcast; } /* If we have no write in progress, we can try to send the ethernet - * packet using eth_send. If the IP packet is larger than mss, - * unqueue the packet and let ipeth_restart_send deal with it. + * packet using eth_send. If the IP packet is larger than mtu, + * enqueue the packet and let ipeth_restart_send deal with it. */ pack_size= bf_bufsize(eth_pack); if (ip_port->ip_dl.dl_eth.de_frame == NULL && pack_size <= - ip_port->ip_mss + sizeof(*eth_hdr)) + ip_port->ip_mtu + sizeof(*eth_hdr)) { r= eth_send(ip_port->ip_dl.dl_eth.de_fd, eth_pack, pack_size); @@ -383,7 +384,7 @@ int broadcast; } /* Enqueue the packet, and store the current time, in the - * room for the ethernet source address. + * space for the ethernet source address. */ t= get_time(); assert(sizeof(t) <= sizeof(eth_hdr->eh_src)); @@ -406,10 +407,11 @@ PRIVATE void ipeth_restart_send(ip_port) ip_port_t *ip_port; { time_t now, enq_time; - int r; + int i, r; acc_t *eth_pack, *ip_pack, *next_eth_pack, *next_part, *tail; size_t pack_size; eth_hdr_t *eth_hdr, *next_eth_hdr; + u32_t *p; now= get_time(); @@ -422,19 +424,22 @@ ip_port_t *ip_port; pack_size= bf_bufsize(eth_pack); - if (pack_size > ip_port->ip_mss+sizeof(*eth_hdr)) + if (pack_size > ip_port->ip_mtu+sizeof(*eth_hdr)) { /* Split the IP packet */ - ip_pack= eth_pack->acc_next; - next_part= ip_pack; + assert(eth_pack->acc_linkC == 1); + ip_pack= eth_pack->acc_next; eth_pack->acc_next= NULL; + next_part= ip_pack; ip_pack= NULL; ip_pack= ip_split_pack(ip_port, &next_part, - ip_port->ip_mss); + ip_port->ip_mtu); if (ip_pack == NULL) { bf_afree(eth_pack); continue; } + eth_pack->acc_next= ip_pack; ip_pack= NULL; + /* Allocate new ethernet header */ next_eth_pack= bf_memreq(sizeof(*next_eth_hdr)); next_eth_hdr= (eth_hdr_t *)ptr2acc_data(next_eth_pack); @@ -445,11 +450,12 @@ ip_port_t *ip_port; if (ip_port->ip_dl.dl_eth.de_q_head == NULL) ip_port->ip_dl.dl_eth.de_q_head= next_eth_pack; else + { ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= next_eth_pack; + } ip_port->ip_dl.dl_eth.de_q_tail= next_eth_pack; - eth_pack->acc_next= ip_pack; pack_size= bf_bufsize(eth_pack); } @@ -459,8 +465,10 @@ ip_port_t *ip_port; r= ipeth_update_ttl(enq_time, now, eth_pack); if (r == ETIMEDOUT) { - ip_warning(( "should send ICMP ttl exceded" )); - bf_afree(eth_pack); + ip_pack= bf_delhead(eth_pack, sizeof(*eth_hdr)); + eth_pack= NULL; + icmp_snd_time_exceeded(ip_port->ip_port, + ip_pack, ICMP_TTL_EXC); continue; } assert(r == NW_OK); @@ -469,7 +477,17 @@ ip_port_t *ip_port; if (pack_size= 0; i--, p++) + { + *p= 0xdeadbeef; + } + eth_pack= bf_append(eth_pack, tail); + pack_size= ETH_MIN_PACK_SIZE; } assert(ip_port->ip_dl.dl_eth.de_frame == NULL); @@ -700,5 +718,5 @@ size_t pack_size; } /* - * $PchId: ip_eth.c,v 1.9 1996/12/17 07:55:21 philip Exp $ + * $PchId: ip_eth.c,v 1.25 2005/06/28 14:18:10 philip Exp $ */ diff --git a/servers/inet/generic/ip_int.h b/servers/inet/generic/ip_int.h index d09932df3..a9bbfa703 100644 --- a/servers/inet/generic/ip_int.h +++ b/servers/inet/generic/ip_int.h @@ -13,11 +13,15 @@ Copyright 1995 Philip Homburg #define IP_42BSD_BCAST 1 /* hostnumber 0 is also network broadcast */ +#define IP_LT_NORMAL 0 /* Normal */ +#define IP_LT_BROADCAST 1 /* Broadcast */ +#define IP_LT_MULTICAST 2 /* Multicast */ + struct ip_port; struct ip_fd; typedef void (*ip_dev_t) ARGS(( struct ip_port *ip_port )); typedef int (*ip_dev_send_t) ARGS(( struct ip_port *ip_port, ipaddr_t dest, - acc_t *pack, int broadcast )); + acc_t *pack, int type )); #define IP_PROTO_HASH_NR 32 @@ -47,16 +51,20 @@ typedef struct ip_port } dl_ps; } ip_dl; ipaddr_t ip_ipaddr; - ipaddr_t ip_netmask; ipaddr_t ip_subnetmask; + ipaddr_t ip_classfulmask; u16_t ip_frame_id; - u16_t ip_mss; + u16_t ip_mtu; + u16_t ip_mtu_max; /* Max MTU for this kind of network */ ip_dev_t ip_dev_main; ip_dev_t ip_dev_set_ipaddr; ip_dev_send_t ip_dev_send; acc_t *ip_loopb_head; acc_t *ip_loopb_tail; event_t ip_loopb_event; + acc_t *ip_routeq_head; + acc_t *ip_routeq_tail; + event_t ip_routeq_event; struct ip_fd *ip_proto_any; struct ip_fd *ip_proto[IP_PROTO_HASH_NR]; } ip_port_t; @@ -73,10 +81,11 @@ typedef struct ip_port #define IEF_READ_SP 0x20 #define IEF_WRITE_SP 0x80 -#define IPF_EMPTY 0x0 -#define IPF_CONFIGURED 0x1 -#define IPF_IPADDRSET 0x2 -#define IPF_NETMASKSET 0x4 +#define IPF_EMPTY 0x0 +#define IPF_CONFIGURED 0x1 +#define IPF_IPADDRSET 0x2 +#define IPF_NETMASKSET 0x4 +#define IPF_SUBNET_BCAST 0x8 /* Subset support subnet broadcasts */ #define IPDL_ETH NETTYPE_ETH #define IPDL_PSIP NETTYPE_PSIP @@ -105,14 +114,15 @@ typedef struct ip_fd put_pkt_t if_put_pkt; time_t if_exp_time; size_t if_rd_count; + ioreq_t if_ioctl; } ip_fd_t; -#define IFF_EMPTY 0x0 -#define IFF_INUSE 0x1 -#define IFF_OPTSET 0x2 -#define IFF_BUSY 0xC -# define IFF_READ_IP 0x4 -# define IFF_GIPCONF_IP 0x8 +#define IFF_EMPTY 0x00 +#define IFF_INUSE 0x01 +#define IFF_OPTSET 0x02 +#define IFF_BUSY 0x1C +# define IFF_READ_IP 0x04 +# define IFF_IOCTL_IP 0x08 typedef enum nettype { @@ -127,12 +137,15 @@ typedef enum nettype IPNT_BROADCAST /* 255.255.255.255 */ } nettype_t; +struct nwio_ipconf; + /* ip_eth.c */ int ipeth_init ARGS(( ip_port_t *ip_port )); /* ip_ioctl.c */ void ip_hash_proto ARGS(( ip_fd_t *ip_fd )); void ip_unhash_proto ARGS(( ip_fd_t *ip_fd )); +int ip_setconf ARGS(( int ip_port, struct nwio_ipconf *ipconfp )); /* ip_lib.c */ ipaddr_t ip_get_netmask ARGS(( ipaddr_t hostaddr )); @@ -146,18 +159,19 @@ char *ip_nettoa ARGS(( nettype_t nettype )); /* ip_ps.c */ int ipps_init ARGS(( ip_port_t *ip_port )); void ipps_get ARGS(( int ip_port_nr )); -void ipps_put ARGS(( int ip_port_nr, acc_t *pack )); +void ipps_put ARGS(( int ip_port_nr, ipaddr_t nexthop, acc_t *pack )); /* ip_read.c */ void ip_port_arrive ARGS(( ip_port_t *port, acc_t *pack, ip_hdr_t *ip_hdr )); void ip_arrived ARGS(( ip_port_t *port, acc_t *pack )); void ip_arrived_broadcast ARGS(( ip_port_t *port, acc_t *pack )); void ip_process_loopb ARGS(( event_t *ev, ev_arg_t arg )); +void ip_packet2user ARGS(( ip_fd_t *ip_fd, acc_t *pack, time_t exp_time, + size_t data_len )); /* ip_write.c */ void dll_eth_write_frame ARGS(( ip_port_t *port )); -acc_t *ip_split_pack ARGS(( ip_port_t *ip_port, acc_t **ref_last, - int first_size )); +acc_t *ip_split_pack ARGS(( ip_port_t *ip_port, acc_t **ref_last, int mtu )); void ip_hdr_chksum ARGS(( ip_hdr_t *ip_hdr, int ip_hdr_len )); @@ -171,5 +185,5 @@ extern ip_ass_t ip_ass_table[IP_ASS_NR]; #endif /* INET_IP_INT_H */ /* - * $PchId: ip_int.h,v 1.6 1996/12/17 07:59:36 philip Exp $ + * $PchId: ip_int.h,v 1.19 2004/08/03 16:24:23 philip Exp $ */ diff --git a/servers/inet/generic/ip_ioctl.c b/servers/inet/generic/ip_ioctl.c index a65ede67a..bbfaf247a 100644 --- a/servers/inet/generic/ip_ioctl.c +++ b/servers/inet/generic/ip_ioctl.c @@ -22,6 +22,7 @@ THIS_FILE FORWARD int ip_checkopt ARGS(( ip_fd_t *ip_fd )); FORWARD void reply_thr_get ARGS(( ip_fd_t *ip_fd, size_t reply, int for_ioctl )); +FORWARD void report_addr ARGS(( ip_port_t *ip_port )); PUBLIC int ip_ioctl (fd, req) int fd; @@ -31,6 +32,7 @@ ioreq_t req; ip_port_t *ip_port; nwio_ipopt_t *ipopt; nwio_ipopt_t oldopt, newopt; + nwio_ipconf2_t *ipconf2; nwio_ipconf_t *ipconf; nwio_route_t *route_ent; acc_t *data; @@ -38,8 +40,8 @@ ioreq_t req; unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags; unsigned long new_flags; - int old_ip_flags; - int ent_no; + int ent_no, r; + nwio_ipconf_t ipconf_var; assert (fd>=0 && fd<=IP_FD_NR); ip_fd= &ip_fd_table[fd]; @@ -49,6 +51,16 @@ ioreq_t req; switch (req) { case NWIOSIPOPT: + ip_port= ip_fd->if_port; + + if (!(ip_port->ip_flags & IPF_IPADDRSET)) + { + ip_fd->if_ioctl= NWIOSIPOPT; + ip_fd->if_flags |= IFF_IOCTL_IP; + return NW_SUSPEND; + } + ip_fd->if_flags &= ~IFF_IOCTL_IP; + data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, sizeof(nwio_ipopt_t), TRUE); @@ -167,69 +179,162 @@ ioreq_t req; return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, (acc_t *)0, TRUE); + case NWIOSIPCONF2: case NWIOSIPCONF: ip_port= ip_fd->if_port; - data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, - sizeof(nwio_ipconf_t), TRUE); - - data= bf_packIffLess (data, sizeof(nwio_ipconf_t)); - assert (data->acc_length == sizeof(nwio_ipconf_t)); - - old_ip_flags= ip_port->ip_flags; - - ipconf= (nwio_ipconf_t *)ptr2acc_data(data); - - if (ipconf->nwic_flags & ~NWIC_FLAGS) + if (req == NWIOSIPCONF2) { - bf_afree(data); - return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, - EBADMODE, (acc_t *)0, TRUE); - } + data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, + sizeof(*ipconf2), TRUE); + data= bf_packIffLess (data, sizeof(*ipconf2)); + assert (data->acc_length == sizeof(*ipconf2)); - if (ipconf->nwic_flags & NWIC_IPADDR_SET) - { - ip_port->ip_ipaddr= ipconf->nwic_ipaddr; - ip_port->ip_flags |= IPF_IPADDRSET; - ip_port->ip_netmask= - ip_netmask(ip_nettype(ipconf->nwic_ipaddr)); - if (!(ip_port->ip_flags & IPF_NETMASKSET)) { - ip_port->ip_subnetmask= ip_port->ip_netmask; - } - (*ip_port->ip_dev_set_ipaddr)(ip_port); - } - if (ipconf->nwic_flags & NWIC_NETMASK_SET) - { - ip_port->ip_subnetmask= ipconf->nwic_netmask; - ip_port->ip_flags |= IPF_NETMASKSET; - } + ipconf2= (nwio_ipconf2_t *)ptr2acc_data(data); + ipconf= &ipconf_var; + ipconf->nwic_flags= ipconf2->nwic_flags; + ipconf->nwic_ipaddr= ipconf2->nwic_ipaddr; + ipconf->nwic_netmask= ipconf2->nwic_netmask; + ipconf->nwic_flags &= ~NWIC_MTU_SET; + } + else + { + data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, + sizeof(*ipconf), TRUE); + data= bf_packIffLess (data, sizeof(*ipconf)); + assert (data->acc_length == sizeof(*ipconf)); + + ipconf= (nwio_ipconf_t *)ptr2acc_data(data); + } + r= ip_setconf(ip_port-ip_port_table, ipconf); bf_afree(data); - return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, NW_OK, + return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, r, (acc_t *)0, TRUE); - case NWIOGIPCONF: + case NWIOGIPCONF2: ip_port= ip_fd->if_port; if (!(ip_port->ip_flags & IPF_IPADDRSET)) { - ip_fd->if_flags |= IFF_GIPCONF_IP; + ip_fd->if_ioctl= NWIOGIPCONF2; + ip_fd->if_flags |= IFF_IOCTL_IP; return NW_SUSPEND; } - ip_fd->if_flags &= ~IFF_GIPCONF_IP; + ip_fd->if_flags &= ~IFF_IOCTL_IP; data= bf_memreq(sizeof(nwio_ipconf_t)); - ipconf= (nwio_ipconf_t *)ptr2acc_data(data); - ipconf->nwic_flags= NWIC_IPADDR_SET; - ipconf->nwic_ipaddr= ip_port->ip_ipaddr; - ipconf->nwic_netmask= ip_port->ip_subnetmask; + ipconf2= (nwio_ipconf2_t *)ptr2acc_data(data); + ipconf2->nwic_flags= NWIC_IPADDR_SET; + ipconf2->nwic_ipaddr= ip_port->ip_ipaddr; + ipconf2->nwic_netmask= ip_port->ip_subnetmask; if (ip_port->ip_flags & IPF_NETMASKSET) - ipconf->nwic_flags |= NWIC_NETMASK_SET; + ipconf2->nwic_flags |= NWIC_NETMASK_SET; result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, 0, data, TRUE); return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, (acc_t *)0, TRUE); + case NWIOGIPCONF: + ip_port= ip_fd->if_port; + + if (!(ip_port->ip_flags & IPF_IPADDRSET)) + { + ip_fd->if_ioctl= NWIOGIPCONF; + ip_fd->if_flags |= IFF_IOCTL_IP; + return NW_SUSPEND; + } + ip_fd->if_flags &= ~IFF_IOCTL_IP; + data= bf_memreq(sizeof(*ipconf)); + ipconf= (nwio_ipconf_t *)ptr2acc_data(data); + ipconf->nwic_flags= NWIC_IPADDR_SET; + ipconf->nwic_ipaddr= ip_port->ip_ipaddr; + ipconf->nwic_netmask= ip_port->ip_subnetmask; + if (ip_port->ip_flags & IPF_NETMASKSET) + ipconf->nwic_flags |= NWIC_NETMASK_SET; + ipconf->nwic_mtu= ip_port->ip_mtu; + + result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, 0, data, + TRUE); + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, + (acc_t *)0, TRUE); + + case NWIOGIPOROUTE: + data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, + 0, sizeof(nwio_route_t), TRUE); + if (data == NULL) + { + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + EFAULT, NULL, TRUE); + } + + data= bf_packIffLess (data, sizeof(nwio_route_t) ); + route_ent= (nwio_route_t *)ptr2acc_data(data); + ent_no= route_ent->nwr_ent_no; + bf_afree(data); + + data= bf_memreq(sizeof(nwio_route_t)); + route_ent= (nwio_route_t *)ptr2acc_data(data); + result= ipr_get_oroute(ent_no, route_ent); + if (result < 0) + bf_afree(data); + else + { + assert(result == NW_OK); + result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, 0, + data, TRUE); + } + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + result, (acc_t *)0, TRUE); + + case NWIOSIPOROUTE: + data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, + 0, sizeof(nwio_route_t), TRUE); + if (data == NULL) + { + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + EFAULT, NULL, TRUE); + } + if (!(ip_fd->if_port->ip_flags & IPF_IPADDRSET)) + { + /* Interface is down, no changes allowed */ + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + EINVAL, NULL, TRUE); + } + + data= bf_packIffLess (data, sizeof(nwio_route_t) ); + route_ent= (nwio_route_t *)ptr2acc_data(data); + result= ipr_add_oroute(ip_fd->if_port-ip_port_table, + route_ent->nwr_dest, route_ent->nwr_netmask, + route_ent->nwr_gateway, (time_t)0, + route_ent->nwr_dist, route_ent->nwr_mtu, + !!(route_ent->nwr_flags & NWRF_STATIC), + route_ent->nwr_pref, NULL); + bf_afree(data); + + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + result, (acc_t *)0, TRUE); + + case NWIODIPOROUTE: + data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, + 0, sizeof(nwio_route_t), TRUE); + if (data == NULL) + { + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + EFAULT, NULL, TRUE); + } + + data= bf_packIffLess (data, sizeof(nwio_route_t) ); + route_ent= (nwio_route_t *)ptr2acc_data(data); + result= ipr_del_oroute(ip_fd->if_port-ip_port_table, + route_ent->nwr_dest, route_ent->nwr_netmask, + route_ent->nwr_gateway, + !!(route_ent->nwr_flags & NWRF_STATIC)); + bf_afree(data); + + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + result, (acc_t *)0, TRUE); + case NWIOGIPIROUTE: data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, sizeof(nwio_route_t), TRUE); @@ -266,48 +371,27 @@ ioreq_t req; return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, EFAULT, NULL, TRUE); } + if (!(ip_fd->if_port->ip_flags & IPF_IPADDRSET)) + { + /* Interface is down, no changes allowed */ + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + EINVAL, NULL, TRUE); + } data= bf_packIffLess (data, sizeof(nwio_route_t) ); route_ent= (nwio_route_t *)ptr2acc_data(data); - result= ipr_add_iroute(ip_fd->if_port->ip_port, + result= ipr_add_iroute(ip_fd->if_port-ip_port_table, route_ent->nwr_dest, route_ent->nwr_netmask, route_ent->nwr_gateway, (route_ent->nwr_flags & NWRF_UNREACHABLE) ? IRTD_UNREACHABLE : route_ent->nwr_dist, + route_ent->nwr_mtu, !!(route_ent->nwr_flags & NWRF_STATIC), NULL); bf_afree(data); return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, (acc_t *)0, TRUE); - case NWIOGIPOROUTE: - data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, - 0, sizeof(nwio_route_t), TRUE); - if (data == NULL) - { - return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, - EFAULT, NULL, TRUE); - } - - data= bf_packIffLess (data, sizeof(nwio_route_t) ); - route_ent= (nwio_route_t *)ptr2acc_data(data); - ent_no= route_ent->nwr_ent_no; - bf_afree(data); - - data= bf_memreq(sizeof(nwio_route_t)); - route_ent= (nwio_route_t *)ptr2acc_data(data); - result= ipr_get_oroute(ent_no, route_ent); - if (result < 0) - bf_afree(data); - else - { - assert(result == NW_OK); - result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, 0, - data, TRUE); - } - return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, - result, (acc_t *)0, TRUE); - case NWIODIPIROUTE: data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, 0, sizeof(nwio_route_t), TRUE); @@ -319,44 +403,41 @@ ioreq_t req; data= bf_packIffLess (data, sizeof(nwio_route_t) ); route_ent= (nwio_route_t *)ptr2acc_data(data); - result= ipr_del_iroute(ip_fd->if_port->ip_port, + result= ipr_del_iroute(ip_fd->if_port-ip_port_table, route_ent->nwr_dest, route_ent->nwr_netmask, route_ent->nwr_gateway, - (route_ent->nwr_flags & NWRF_UNREACHABLE) ? - IRTD_UNREACHABLE : route_ent->nwr_dist, !!(route_ent->nwr_flags & NWRF_STATIC)); bf_afree(data); return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, (acc_t *)0, TRUE); - case NWIOSIPOROUTE: - data= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, - 0, sizeof(nwio_route_t), TRUE); - if (data == NULL) + /* The following ARP ioctls are only valid if the + * underlying device is an ethernet. + */ + case NWIOARPGIP: + case NWIOARPGNEXT: + case NWIOARPSIP: + case NWIOARPDIP: + ip_port= ip_fd->if_port; + + if (ip_port->ip_dl_type != IPDL_ETH) { - return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, - EFAULT, NULL, TRUE); + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, + EBADIOCTL, (acc_t *)0, TRUE); } - - data= bf_packIffLess (data, sizeof(nwio_route_t) ); - route_ent= (nwio_route_t *)ptr2acc_data(data); - result= ipr_add_oroute(ip_fd->if_port->ip_port, - route_ent->nwr_dest, route_ent->nwr_netmask, - route_ent->nwr_gateway, (time_t)0, - route_ent->nwr_dist, - !!(route_ent->nwr_flags & NWRF_STATIC), - route_ent->nwr_pref, NULL); - bf_afree(data); - - return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, - result, (acc_t *)0, TRUE); + result= arp_ioctl(ip_port->ip_dl.dl_eth.de_port, + ip_fd->if_srfd, req, ip_fd->if_get_userdata, + ip_fd->if_put_userdata); + assert (result != SUSPEND); + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, + (acc_t *)0, TRUE); default: break; } - DBLOCK(1, printf("replying EBADIOCTL\n")); - return (*ip_fd->if_put_userdata)(ip_fd-> if_srfd, EBADIOCTL, + DBLOCK(1, printf("replying EBADIOCTL: 0x%x\n", req)); + return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, EBADIOCTL, (acc_t *)0, TRUE); } @@ -410,6 +491,84 @@ ip_fd_t *ip_fd; *ip_fd_p= curr->if_proto_next; } +PUBLIC int ip_setconf(ip_port_nr, ipconf) +int ip_port_nr; +nwio_ipconf_t *ipconf; +{ + int i, old_ip_flags, do_report; + ip_port_t *ip_port; + ip_fd_t *ip_fd; + ipaddr_t ipaddr; + u32_t mtu; + + ip_port= &ip_port_table[ip_port_nr]; + + old_ip_flags= ip_port->ip_flags; + + if (ipconf->nwic_flags & ~NWIC_FLAGS) + return EBADMODE; + + do_report= 0; + if (ipconf->nwic_flags & NWIC_MTU_SET) + { + mtu= ipconf->nwic_mtu; + if (mtu < IP_MIN_MTU || mtu > ip_port->ip_mtu_max) + return EINVAL; + ip_port->ip_mtu= mtu; + do_report= 1; + } + + if (ipconf->nwic_flags & NWIC_NETMASK_SET) + { + ip_port->ip_subnetmask= ipconf->nwic_netmask; + ip_port->ip_flags |= IPF_NETMASKSET|IPF_SUBNET_BCAST; + if (ntohl(ip_port->ip_subnetmask) >= 0xfffffffe) + ip_port->ip_flags &= ~IPF_SUBNET_BCAST; + do_report= 1; + } + if (ipconf->nwic_flags & NWIC_IPADDR_SET) + { + ipaddr= ipconf->nwic_ipaddr; + ip_port->ip_ipaddr= ipaddr; + ip_port->ip_flags |= IPF_IPADDRSET; + ip_port->ip_classfulmask= + ip_netmask(ip_nettype(ipaddr)); + if (!(ip_port->ip_flags & IPF_NETMASKSET)) + { + ip_port->ip_subnetmask= ip_port->ip_classfulmask; + } + if (ipaddr == HTONL(0x00000000)) + { + /* Special case. Use 0.0.0.0 to shutdown interface. */ + ip_port->ip_flags &= ~(IPF_IPADDRSET|IPF_NETMASKSET); + ip_port->ip_subnetmask= HTONL(0x00000000); + } + (*ip_port->ip_dev_set_ipaddr)(ip_port); + + /* revive calls waiting for an ip addresses */ + for (i=0, ip_fd= ip_fd_table; iif_flags & IFF_INUSE)) + continue; + if (ip_fd->if_port != ip_port) + continue; + if (ip_fd->if_flags & IFF_IOCTL_IP) + ip_ioctl (i, ip_fd->if_ioctl); + } + + do_report= 1; + } + + ipr_chk_itab(ip_port-ip_port_table, ip_port->ip_ipaddr, + ip_port->ip_subnetmask); + ipr_chk_otab(ip_port-ip_port_table, ip_port->ip_ipaddr, + ip_port->ip_subnetmask); + if (do_report) + report_addr(ip_port); + + return 0; +} + PRIVATE int ip_checkopt (ip_fd) ip_fd_t *ip_fd; { @@ -417,7 +576,6 @@ ip_fd_t *ip_fd; unsigned long flags; unsigned int en_di_flags; - ip_port_t *port; acc_t *pack; int result; @@ -467,6 +625,47 @@ int for_ioctl; assert (!result); } +PRIVATE void report_addr(ip_port) +ip_port_t *ip_port; +{ + int i, hdr_len; + ip_fd_t *ip_fd; + acc_t *pack; + ip_hdr_t *ip_hdr; + + pack= bf_memreq(IP_MIN_HDR_SIZE); + ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + + hdr_len= IP_MIN_HDR_SIZE; + ip_hdr->ih_vers_ihl= (IP_VERSION << 4) | (hdr_len/4); + ip_hdr->ih_tos= 0; + ip_hdr->ih_length= htons(ip_port->ip_mtu); + ip_hdr->ih_id= 0; + ip_hdr->ih_flags_fragoff= 0; + ip_hdr->ih_ttl= 0; + ip_hdr->ih_proto= 0; + ip_hdr->ih_src= ip_port->ip_ipaddr; + ip_hdr->ih_dst= ip_port->ip_subnetmask; + ip_hdr_chksum(ip_hdr, hdr_len); + + for (i=0, ip_fd= ip_fd_table; iif_flags & IFF_INUSE)) + { + continue; + } + if (ip_fd->if_port != ip_port) + { + continue; + } + + /* Deliver packet to user */ + pack->acc_linkC++; + ip_packet2user(ip_fd, pack, 255, IP_MIN_HDR_SIZE); + } + bf_afree(pack); pack= NULL; +} + /* - * $PchId: ip_ioctl.c,v 1.8 1996/12/17 07:56:18 philip Exp $ + * $PchId: ip_ioctl.c,v 1.22 2004/08/03 11:10:08 philip Exp $ */ diff --git a/servers/inet/generic/ip_lib.c b/servers/inet/generic/ip_lib.c index 151b08bb4..a9f6114c8 100644 --- a/servers/inet/generic/ip_lib.c +++ b/servers/inet/generic/ip_lib.c @@ -29,21 +29,21 @@ int optlen; strict_source_present= FALSE, record_route_present= FALSE, timestamp_present= FALSE; -assert (!(optlen & 3)); + assert (!(optlen & 3)); i= 0; while (iip_dl.dl_ps.ps_port, ip_port->ip_port); if (result == -1) return -1; -#if ZERO ip_port->ip_dl.dl_ps.ps_send_head= NULL; ip_port->ip_dl.dl_ps.ps_send_tail= NULL; -#endif ip_port->ip_dev_main= ipps_main; ip_port->ip_dev_set_ipaddr= ipps_set_ipaddr; ip_port->ip_dev_send= ipps_send; @@ -46,7 +44,8 @@ PUBLIC void ipps_get(ip_port_nr) int ip_port_nr; { int result; - acc_t *pack; + ipaddr_t dest; + acc_t *acc, *pack, *next_part; ip_port_t *ip_port; assert(ip_port_nr >= 0 && ip_port_nr < ip_conf_nr); @@ -57,12 +56,54 @@ int ip_port_nr; { pack= ip_port->ip_dl.dl_ps.ps_send_head; ip_port->ip_dl.dl_ps.ps_send_head= pack->acc_ext_link; - result= psip_send(ip_port->ip_dl.dl_ps.ps_port, pack); + + /* Extract nexthop address */ + pack= bf_packIffLess(pack, sizeof(dest)); + dest= *(ipaddr_t *)ptr2acc_data(pack); + pack= bf_delhead(pack, sizeof(dest)); + + if (bf_bufsize(pack) > ip_port->ip_mtu) + { + next_part= pack; + pack= ip_split_pack(ip_port, &next_part, + ip_port->ip_mtu); + if (pack == NULL) + continue; + + /* Prepend nexthop address */ + acc= bf_memreq(sizeof(dest)); + *(ipaddr_t *)(ptr2acc_data(acc))= dest; + acc->acc_next= next_part; + next_part= acc; acc= NULL; + + assert(next_part->acc_linkC == 1); + next_part->acc_ext_link= NULL; + if (ip_port->ip_dl.dl_ps.ps_send_head) + { + ip_port->ip_dl.dl_ps.ps_send_tail-> + acc_ext_link= next_part; + } + else + { + ip_port->ip_dl.dl_ps.ps_send_head= + next_part; + } + ip_port->ip_dl.dl_ps.ps_send_tail= next_part; + } + + result= psip_send(ip_port->ip_dl.dl_ps.ps_port, dest, pack); if (result != NW_SUSPEND) { assert(result == NW_OK); continue; } + + /* Prepend nexthop address */ + acc= bf_memreq(sizeof(dest)); + *(ipaddr_t *)(ptr2acc_data(acc))= dest; + acc->acc_next= pack; + pack= acc; acc= NULL; + pack->acc_ext_link= ip_port->ip_dl.dl_ps.ps_send_head; ip_port->ip_dl.dl_ps.ps_send_head= pack; if (pack->acc_ext_link == NULL) @@ -71,8 +112,9 @@ int ip_port_nr; } } -PUBLIC void ipps_put(ip_port_nr, pack) +PUBLIC void ipps_put(ip_port_nr, nexthop, pack) int ip_port_nr; +ipaddr_t nexthop; acc_t *pack; { ip_port_t *ip_port; @@ -80,7 +122,10 @@ acc_t *pack; assert(ip_port_nr >= 0 && ip_port_nr < ip_conf_nr); ip_port= &ip_port_table[ip_port_nr]; assert(ip_port->ip_dl_type == IPDL_PSIP); - ip_arrived(ip_port, pack); + if (nexthop == HTONL(0xffffffff)) + ip_arrived_broadcast(ip_port, pack); + else + ip_arrived(ip_port, pack); } PRIVATE void ipps_main(ip_port) @@ -92,57 +137,139 @@ ip_port_t *ip_port; PRIVATE void ipps_set_ipaddr(ip_port) ip_port_t *ip_port; { - int i; - ip_fd_t *ip_fd; - - /* revive calls waiting for an ip addresses */ - for (i=0, ip_fd= ip_fd_table; iif_flags & IFF_INUSE)) - { - continue; - } - if (ip_fd->if_port != ip_port) - { - continue; - } - if (ip_fd->if_flags & IFF_GIPCONF_IP) - { - ip_ioctl (i, NWIOGIPCONF); - } - } } -PRIVATE int ipps_send(ip_port, dest, pack, broadcast) +PRIVATE int ipps_send(ip_port, dest, pack, type) struct ip_port *ip_port; ipaddr_t dest; acc_t *pack; -int broadcast; +int type; { int result; + acc_t *acc, *next_part; - if (broadcast) + if (type != IP_LT_NORMAL) + { ip_arrived_broadcast(ip_port, bf_dupacc(pack)); - if (ip_port->ip_dl.dl_ps.ps_send_head == NULL) - { - result= psip_send(ip_port->ip_dl.dl_ps.ps_port, pack); - if (result != NW_SUSPEND) - { - assert(result == NW_OK); - return result; - } - assert (ip_port->ip_dl.dl_ps.ps_send_head == NULL); - ip_port->ip_dl.dl_ps.ps_send_head= pack; + /* Map all broadcasts to the on-link broadcast address. + * This saves the application from having to to find out + * if the destination is a subnet broadcast. + */ + dest= HTONL(0xffffffff); } - else + + /* Note that allocating a packet may trigger a cleanup action, + * which may cause the send queue to become empty. + */ + while (ip_port->ip_dl.dl_ps.ps_send_head != NULL) + { + acc= bf_memreq(sizeof(dest)); + + if (ip_port->ip_dl.dl_ps.ps_send_head == NULL) + { + bf_afree(acc); acc= NULL; + continue; + } + + /* Prepend nexthop address */ + *(ipaddr_t *)(ptr2acc_data(acc))= dest; + acc->acc_next= pack; + pack= acc; acc= NULL; + + assert(pack->acc_linkC == 1); + pack->acc_ext_link= NULL; + ip_port->ip_dl.dl_ps.ps_send_tail->acc_ext_link= pack; - ip_port->ip_dl.dl_ps.ps_send_tail= pack; - pack->acc_ext_link= NULL; + ip_port->ip_dl.dl_ps.ps_send_tail= pack; + + return NW_OK; + } + + while (pack) + { + if (bf_bufsize(pack) > ip_port->ip_mtu) + { + next_part= pack; + pack= ip_split_pack(ip_port, &next_part, + ip_port->ip_mtu); + if (pack == NULL) + { + return NW_OK; + } + + /* Prepend nexthop address */ + acc= bf_memreq(sizeof(dest)); + *(ipaddr_t *)(ptr2acc_data(acc))= dest; + acc->acc_next= next_part; + next_part= acc; acc= NULL; + + assert(next_part->acc_linkC == 1); + next_part->acc_ext_link= NULL; + ip_port->ip_dl.dl_ps.ps_send_head= next_part; + ip_port->ip_dl.dl_ps.ps_send_tail= next_part; + } + result= psip_send(ip_port->ip_dl.dl_ps.ps_port, dest, pack); + if (result == NW_SUSPEND) + { + /* Prepend nexthop address */ + acc= bf_memreq(sizeof(dest)); + *(ipaddr_t *)(ptr2acc_data(acc))= dest; + acc->acc_next= pack; + pack= acc; acc= NULL; + + assert(pack->acc_linkC == 1); + pack->acc_ext_link= ip_port->ip_dl.dl_ps.ps_send_head; + ip_port->ip_dl.dl_ps.ps_send_head= pack; + if (!pack->acc_ext_link) + ip_port->ip_dl.dl_ps.ps_send_tail= pack; + break; + } + assert(result == NW_OK); + pack= ip_port->ip_dl.dl_ps.ps_send_head; + if (!pack) + break; + ip_port->ip_dl.dl_ps.ps_send_head= pack->acc_ext_link; + + /* Extract nexthop address */ + pack= bf_packIffLess(pack, sizeof(dest)); + dest= *(ipaddr_t *)ptr2acc_data(pack); + pack= bf_delhead(pack, sizeof(dest)); + } return NW_OK; } +#if 0 +int ipps_check(ip_port_t *ip_port) +{ + int n, bad; + acc_t *prev, *curr; + + for (n= 0, prev= NULL, curr= ip_port->ip_dl.dl_ps.ps_send_head_; + curr; prev= curr, curr= curr->acc_ext_link) + { + n++; + } + bad= 0; + if (prev != NULL && prev != ip_port->ip_dl.dl_ps.ps_send_tail_) + { + printf("ipps_check, ip[%d]: wrong tail: got %p, expected %p\n", + ip_port-ip_port_table, + ip_port->ip_dl.dl_ps.ps_send_tail_, prev); + bad++; + } + if (n != ip_port->ip_dl.dl_ps.ps_send_nr) + { + printf("ipps_check, ip[%d]: wrong count: got %d, expected %d\n", + ip_port-ip_port_table, + ip_port->ip_dl.dl_ps.ps_send_nr, n); + bad++; + } + return bad == 0; +} +#endif + /* - * $PchId: ip_ps.c,v 1.5 1995/11/21 06:45:27 philip Exp $ + * $PchId: ip_ps.c,v 1.15 2003/01/21 15:57:52 philip Exp $ */ diff --git a/servers/inet/generic/ip_read.c b/servers/inet/generic/ip_read.c index 7fc80b7c9..318beefb5 100644 --- a/servers/inet/generic/ip_read.c +++ b/servers/inet/generic/ip_read.c @@ -25,9 +25,8 @@ FORWARD acc_t *merge_frags ARGS(( acc_t *first, acc_t *second )); FORWARD int ip_frag_chk ARGS(( acc_t *pack )); FORWARD acc_t *reassemble ARGS(( ip_port_t *ip_port, acc_t *pack, ip_hdr_t *ip_hdr )); +FORWARD void route_packets ARGS(( event_t *ev, ev_arg_t ev_arg )); FORWARD int broadcast_dst ARGS(( ip_port_t *ip_port, ipaddr_t dest )); -FORWARD void packet2user ARGS(( ip_fd_t *ip_fd, acc_t *pack, - time_t exp_time )); PUBLIC int ip_read (fd, count) int fd; @@ -38,8 +37,10 @@ size_t count; ip_fd= &ip_fd_table[fd]; if (!(ip_fd->if_flags & IFF_OPTSET)) + { return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, EBADMODE, (acc_t *)0, FALSE); + } ip_fd->if_rd_count= count; @@ -50,7 +51,8 @@ size_t count; { pack= ip_fd->if_rdbuf_head; ip_fd->if_rdbuf_head= pack->acc_ext_link; - packet2user (ip_fd, pack, ip_fd->if_exp_time); + ip_packet2user (ip_fd, pack, ip_fd->if_exp_time, + bf_bufsize(pack)); assert(!(ip_fd->if_flags & IFF_READ_IP)); return NW_OK; } @@ -150,8 +152,17 @@ ip_hdr_t *pack_hdr; } if ((ass_ent->ia_min_ttl) * HZ + first_time < get_time()) + { + if (broadcast_dst(ip_port, pack_hdr->ih_dst)) + { + DBLOCK(1, printf( + "ip_read'reassemble: reassembly timeout for broadcast packet\n");); + bf_afree(pack); pack= NULL; + return NULL; + } icmp_snd_time_exceeded(ip_port->ip_port, pack, ICMP_FRAG_REASSEM); + } else return pack; } @@ -268,9 +279,9 @@ ipaddr_t dst; if (new_ass_ent->ia_frags) { - DBLOCK(1, printf("old frags id= %u, proto= %u, src= ", + DBLOCK(2, printf("old frags id= %u, proto= %u, src= ", ntohs(new_ass_ent->ia_id), - ntohs(new_ass_ent->ia_proto)); + new_ass_ent->ia_proto); writeIpAddr(new_ass_ent->ia_srcaddr); printf(" dst= "); writeIpAddr(new_ass_ent->ia_dstaddr); printf(": "); ip_print_frags(new_ass_ent->ia_frags); printf("\n")); @@ -283,8 +294,17 @@ ipaddr_t dst; } curr_acc= new_ass_ent->ia_frags; new_ass_ent->ia_frags= 0; - icmp_snd_time_exceeded(ip_port->ip_port, curr_acc, - ICMP_FRAG_REASSEM); + if (broadcast_dst(ip_port, new_ass_ent->ia_dstaddr)) + { + DBLOCK(1, printf( + "ip_read'find_ass_ent: reassembly timeout for broadcast packet\n")); + bf_afree(curr_acc); curr_acc= NULL; + } + else + { + icmp_snd_time_exceeded(ip_port->ip_port, + curr_acc, ICMP_FRAG_REASSEM); + } } new_ass_ent->ia_min_ttl= IP_MAX_TTL; new_ass_ent->ia_port= ip_port; @@ -348,15 +368,16 @@ acc_t *pack; return TRUE; } -PRIVATE void packet2user (ip_fd, pack, exp_time) +PUBLIC void ip_packet2user (ip_fd, pack, exp_time, data_len) ip_fd_t *ip_fd; acc_t *pack; time_t exp_time; +size_t data_len; { acc_t *tmp_pack; ip_hdr_t *ip_hdr; int result, ip_hdr_len; - size_t size, transf_size; + size_t transf_size; assert (ip_fd->if_flags & IFF_INUSE); if (!(ip_fd->if_flags & IFF_READ_IP)) @@ -380,22 +401,19 @@ time_t exp_time; return; } - size= bf_bufsize (pack); + assert (pack->acc_length >= IP_MIN_HDR_SIZE); + ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + if (ip_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY) { - - pack= bf_packIffLess (pack, IP_MIN_HDR_SIZE); - assert (pack->acc_length >= IP_MIN_HDR_SIZE); - - ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) * 4; - assert (size >= ip_hdr_len); - size -= ip_hdr_len; + assert (data_len > ip_hdr_len); + data_len -= ip_hdr_len; pack= bf_delhead(pack, ip_hdr_len); } - if (size>ip_fd->if_rd_count) + if (data_len > ip_fd->if_rd_count) { tmp_pack= bf_cut (pack, 0, ip_fd->if_rd_count); bf_afree(pack); @@ -403,7 +421,7 @@ time_t exp_time; transf_size= ip_fd->if_rd_count; } else - transf_size= size; + transf_size= data_len; if (ip_fd->if_put_pkt) { @@ -414,16 +432,17 @@ time_t exp_time; result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, (size_t)0, pack, FALSE); if (result >= 0) - if (size > transf_size) + { + if (data_len > transf_size) result= EPACKSIZE; else result= transf_size; + } ip_fd->if_flags &= ~IFF_READ_IP; result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result, (acc_t *)0, FALSE); assert (result >= 0); - return; } PUBLIC void ip_port_arrive (ip_port, pack, ip_hdr) @@ -432,9 +451,8 @@ acc_t *pack; ip_hdr_t *ip_hdr; { ip_fd_t *ip_fd, *first_fd, *share_fd; - ip_hdr_t *hdr; - int port_nr; unsigned long ip_pack_stat; + unsigned size; int i; int hash, proto; time_t exp_time; @@ -452,6 +470,14 @@ ip_hdr_t *ip_hdr; assert (!(ntohs(ip_hdr->ih_flags_fragoff) & (IH_FRAGOFF_MASK|IH_MORE_FRAGS))); } + size= ntohs(ip_hdr->ih_length); + if (size > bf_bufsize(pack)) + { + /* Should discard packet */ + assert(0); + bf_afree(pack); pack= NULL; + return; + } exp_time= get_time() + (ip_hdr->ih_ttl+1) * HZ; @@ -499,13 +525,13 @@ ip_hdr_t *ip_hdr; continue; } pack->acc_linkC++; - packet2user(ip_fd, pack, exp_time); + ip_packet2user(ip_fd, pack, exp_time, size); } if (share_fd) { pack->acc_linkC++; - packet2user(share_fd, pack, exp_time); + ip_packet2user(share_fd, pack, exp_time, size); } } if (first_fd) @@ -515,10 +541,10 @@ ip_hdr_t *ip_hdr; !(first_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY)) { (*first_fd->if_put_pkt)(first_fd->if_srfd, pack, - ntohs(ip_hdr->ih_length)); + size); } else - packet2user(first_fd, pack, exp_time); + ip_packet2user(first_fd, pack, exp_time, size); } else { @@ -541,15 +567,12 @@ PUBLIC void ip_arrived(ip_port, pack) ip_port_t *ip_port; acc_t *pack; { - ip_port_t *next_port; ip_hdr_t *ip_hdr; - iroute_t *iroute; ipaddr_t dest; - nettype_t nettype; - int ip_frag_len, ip_hdr_len; + int ip_frag_len, ip_hdr_len, highbyte; size_t pack_size; - acc_t *tmp_pack; - int broadcast; + acc_t *tmp_pack, *hdr_pack; + ev_arg_t ev_arg; pack_size= bf_bufsize(pack); @@ -567,16 +590,24 @@ assert (pack->acc_length >= IP_MIN_HDR_SIZE); ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2; if (ip_hdr_len>IP_MIN_HDR_SIZE) { - pack= bf_align(pack, IP_MIN_HDR_SIZE, 4); pack= bf_packIffLess(pack, ip_hdr_len); ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); } ip_frag_len= ntohs(ip_hdr->ih_length); - if (ip_frag_lenacc_length >= IP_MIN_HDR_SIZE); return; } + if (pack->acc_linkC != 1 || pack->acc_buffer->buf_linkC != 1) + { + /* Get a private copy of the IP header */ + hdr_pack= bf_memreq(ip_hdr_len); + memcpy(ptr2acc_data(hdr_pack), ip_hdr, ip_hdr_len); + pack= bf_delhead(pack, ip_hdr_len); + hdr_pack->acc_next= pack; + pack= hdr_pack; hdr_pack= NULL; + ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + } + assert(pack->acc_linkC == 1); + assert(pack->acc_buffer->buf_linkC == 1); + /* Try to decrement the ttl field with one. */ if (ip_hdr->ih_ttl < 2) { - icmp_snd_time_exceeded(ip_port->ip_port, pack, ICMP_TTL_EXC); + icmp_snd_time_exceeded(ip_port->ip_port, pack, + ICMP_TTL_EXC); return; } ip_hdr->ih_ttl--; ip_hdr_chksum(ip_hdr, ip_hdr_len); /* Avoid routing to bad destinations. */ - nettype= ip_nettype(dest); - if (nettype != IPNT_CLASS_A && nettype != IPNT_CLASS_B && nettype != - IPNT_CLASS_C) + highbyte= ntohl(dest) >> 24; + if (highbyte == 0 || highbyte == 127 || + (highbyte == 169 && (((ntohl(dest) >> 16) & 0xff) == 254)) || + highbyte >= 0xe0) { /* Bogus destination address */ - if (nettype == IPNT_CLASS_D || nettype == IPNT_CLASS_E) - bf_afree(pack); - else - { - icmp_snd_unreachable(ip_port->ip_port, pack, - ICMP_HOST_UNRCH); - } - return; - } - iroute= iroute_frag(ip_port->ip_port, dest); - if (iroute == NULL || iroute->irt_dist == IRTD_UNREACHABLE) - { - /* Also unreachable */ - /* Finding out if we send a network unreachable is too much - * trouble. - */ - icmp_snd_unreachable(ip_port->ip_port, pack, - ICMP_HOST_UNRCH); - return; - } - next_port= &ip_port_table[iroute->irt_port]; - if (next_port != ip_port) - { - if (iroute->irt_gateway != 0) - { - /* Just send the packet to the next gateway */ - next_port->ip_dev_send(next_port, iroute->irt_gateway, - pack, /* no bradcast */ 0); - return; - } - /* The packet is for the attached network. Special addresses - * are the ip address of the interface and net.0 if - * no IP_42BSD_BCAST. - */ - if (dest == next_port->ip_ipaddr) - { - ip_port_arrive (next_port, pack, ip_hdr); - return; - } - if (dest == iroute->irt_dest) - { -#if IP_42BSD_BCAST - broadcast= 1; -#else - /* Bogus destination address */ - icmp_snd_dstunrch(pack); - return; -#endif - } - else if (dest == (iroute->irt_dest | ~iroute->irt_subnetmask)) - broadcast= 1; - else - broadcast= 0; - - /* Just send the packet to it's destination */ - next_port->ip_dev_send(next_port, dest, pack, broadcast); - return; - } - - /* Now we know that the packet should be route over the same network - * as it came from. If there is a next hop gateway, we can send - * the packet to that gateway and send a redirect ICMP to the sender - * if the sender is on the attached network. If there is no gateway - * complain. - */ - if (iroute->irt_gateway == 0) - { -#if !CRAMPED - printf("packet should not be here, src="); - writeIpAddr(ip_hdr->ih_src); - printf(" dst="); - writeIpAddr(ip_hdr->ih_dst); - printf("\n"); -#endif bf_afree(pack); return; } - if (((ip_hdr->ih_src ^ ip_port->ip_ipaddr) & - ip_port->ip_subnetmask) == 0) + + /* Further processing from an event handler */ + if (pack->acc_linkC != 1) { - /* Finding out if we can send a network redirect instead of - * a host redirect is too much trouble. - */ - pack->acc_linkC++; - icmp_snd_redirect(ip_port->ip_port, pack, - ICMP_REDIRECT_HOST, iroute->irt_gateway); - } - else - { -#if !CRAMPED - printf("packet is wrongly routed, src="); - writeIpAddr(ip_hdr->ih_src); - printf(" dst="); - writeIpAddr(ip_hdr->ih_dst); - printf("\n"); -#endif + tmp_pack= bf_dupacc(pack); bf_afree(pack); + pack= tmp_pack; + tmp_pack= NULL; + } + pack->acc_ext_link= NULL; + if (ip_port->ip_routeq_head) + { + ip_port->ip_routeq_tail->acc_ext_link= pack; + ip_port->ip_routeq_tail= pack; return; } - ip_port->ip_dev_send(ip_port, iroute->irt_gateway, pack, - /* no broadcast */ 0); + + ip_port->ip_routeq_head= pack; + ip_port->ip_routeq_tail= pack; + ev_arg.ev_ptr= ip_port; + ev_enqueue(&ip_port->ip_routeq_event, route_packets, ev_arg); } PUBLIC void ip_arrived_broadcast(ip_port, pack) @@ -773,19 +742,13 @@ assert (pack->acc_length >= IP_MIN_HDR_SIZE); if (!broadcast_dst(ip_port, ip_hdr->ih_dst)) { -#if !CRAMPED - /* this message isn't very useful, but is quite annoying on - * the console - */ - /* - printf("ip[%d]: broadcast packet for ip-nonbroadcast addr, src=", + printf( + "ip[%d]: broadcast packet for ip-nonbroadcast addr, src=", ip_port->ip_port); writeIpAddr(ip_hdr->ih_src); printf(" dst="); writeIpAddr(ip_hdr->ih_dst); printf("\n"); - */ -#endif bf_afree(pack); return; } @@ -793,10 +756,220 @@ assert (pack->acc_length >= IP_MIN_HDR_SIZE); ip_port_arrive (ip_port, pack, ip_hdr); } +PRIVATE void route_packets(ev, ev_arg) +event_t *ev; +ev_arg_t ev_arg; +{ + ip_port_t *ip_port; + ipaddr_t dest; + acc_t *pack; + iroute_t *iroute; + ip_port_t *next_port; + int r, type; + ip_hdr_t *ip_hdr; + size_t req_mtu; + + ip_port= ev_arg.ev_ptr; + assert(&ip_port->ip_routeq_event == ev); + + while (pack= ip_port->ip_routeq_head, pack != NULL) + { + ip_port->ip_routeq_head= pack->acc_ext_link; + + ip_hdr= (ip_hdr_t *)ptr2acc_data(pack); + dest= ip_hdr->ih_dst; + + iroute= iroute_frag(ip_port->ip_port, dest); + if (iroute == NULL || iroute->irt_dist == IRTD_UNREACHABLE) + { + /* Also unreachable */ + /* Finding out if we send a network unreachable is too + * much trouble. + */ + if (iroute == NULL) + { + printf("ip[%d]: no route to ", + ip_port-ip_port_table); + writeIpAddr(dest); + printf("\n"); + } + icmp_snd_unreachable(ip_port->ip_port, pack, + ICMP_HOST_UNRCH); + continue; + } + next_port= &ip_port_table[iroute->irt_port]; + + if (ip_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG)) + { + req_mtu= bf_bufsize(pack); + if (req_mtu > next_port->ip_mtu || + (iroute->irt_mtu && req_mtu>iroute->irt_mtu)) + { + icmp_snd_mtu(ip_port->ip_port, pack, + next_port->ip_mtu); + continue; + } + } + + if (next_port != ip_port) + { + if (iroute->irt_gateway != 0) + { + /* Just send the packet to the next gateway */ + pack->acc_linkC++; /* Extra ref for ICMP */ + r= next_port->ip_dev_send(next_port, + iroute->irt_gateway, + pack, IP_LT_NORMAL); + if (r == EDSTNOTRCH) + { + printf("ip[%d]: gw ", + ip_port-ip_port_table); + writeIpAddr(iroute->irt_gateway); + printf(" on ip[%d] is down for dest ", + next_port-ip_port_table); + writeIpAddr(dest); + printf("\n"); + icmp_snd_unreachable(next_port- + ip_port_table, pack, + ICMP_HOST_UNRCH); + pack= NULL; + } + else + { + assert(r == 0); + bf_afree(pack); pack= NULL; + } + continue; + } + /* The packet is for the attached network. Special + * addresses are the ip address of the interface and + * net.0 if no IP_42BSD_BCAST. + */ + if (dest == next_port->ip_ipaddr) + { + ip_port_arrive (next_port, pack, ip_hdr); + continue; + } + if (dest == iroute->irt_dest) + { + /* Never forward obsolete directed broadcasts */ +#if IP_42BSD_BCAST && 0 + type= IP_LT_BROADCAST; +#else + /* Bogus destination address */ + DBLOCK(1, printf( + "ip[%d]: dropping old-fashioned directed broadcast ", + ip_port-ip_port_table); + writeIpAddr(dest); + printf("\n");); + icmp_snd_unreachable(next_port-ip_port_table, + pack, ICMP_HOST_UNRCH); + continue; +#endif + } + else if (dest == (iroute->irt_dest | + ~iroute->irt_subnetmask)) + { + if (!ip_forward_directed_bcast) + { + /* Do not forward directed broadcasts */ + DBLOCK(1, printf( + "ip[%d]: dropping directed broadcast ", + ip_port-ip_port_table); + writeIpAddr(dest); + printf("\n");); + icmp_snd_unreachable(next_port- + ip_port_table, pack, + ICMP_HOST_UNRCH); + continue; + } + else + type= IP_LT_BROADCAST; + } + else + type= IP_LT_NORMAL; + + /* Just send the packet to it's destination */ + pack->acc_linkC++; /* Extra ref for ICMP */ + r= next_port->ip_dev_send(next_port, dest, pack, type); + if (r == EDSTNOTRCH) + { + DBLOCK(1, printf("ip[%d]: next hop ", + ip_port-ip_port_table); + writeIpAddr(dest); + printf(" on ip[%d] is down\n", + next_port-ip_port_table);); + icmp_snd_unreachable(next_port-ip_port_table, + pack, ICMP_HOST_UNRCH); + pack= NULL; + } + else + { + assert(r == 0 || (printf("r = %d\n", r), 0)); + bf_afree(pack); pack= NULL; + } + continue; + } + + /* Now we know that the packet should be routed over the same + * network as it came from. If there is a next hop gateway, + * we can send the packet to that gateway and send a redirect + * ICMP to the sender if the sender is on the attached + * network. If there is no gateway complain. + */ + if (iroute->irt_gateway == 0) + { + printf("ip_arrived: packet should not be here, src="); + writeIpAddr(ip_hdr->ih_src); + printf(" dst="); + writeIpAddr(ip_hdr->ih_dst); + printf("\n"); + bf_afree(pack); + continue; + } + if (((ip_hdr->ih_src ^ ip_port->ip_ipaddr) & + ip_port->ip_subnetmask) == 0) + { + /* Finding out if we can send a network redirect + * instead of a host redirect is too much trouble. + */ + pack->acc_linkC++; + icmp_snd_redirect(ip_port->ip_port, pack, + ICMP_REDIRECT_HOST, iroute->irt_gateway); + } + else + { + printf("ip_arrived: packet is wrongly routed, src="); + writeIpAddr(ip_hdr->ih_src); + printf(" dst="); + writeIpAddr(ip_hdr->ih_dst); + printf("\n"); + printf("in port %d, output %d, dest net ", + ip_port->ip_port, + iroute->irt_port); + writeIpAddr(iroute->irt_dest); + printf("/"); + writeIpAddr(iroute->irt_subnetmask); + printf(" next hop "); + writeIpAddr(iroute->irt_gateway); + printf("\n"); + bf_afree(pack); + continue; + } + /* No code for unreachable ICMPs here. The sender should + * process the ICMP redirect and figure it out. + */ + ip_port->ip_dev_send(ip_port, iroute->irt_gateway, pack, + IP_LT_NORMAL); + } +} + PRIVATE int broadcast_dst(ip_port, dest) ip_port_t *ip_port; ipaddr_t dest; { + ipaddr_t my_ipaddr, netmask, classmask; + /* Treat class D (multicast) address as broadcasts. */ if ((dest & HTONL(0xF0000000)) == HTONL(0xE0000000)) { @@ -808,39 +981,45 @@ ipaddr_t dest; { return 1; } - - if (((ip_port->ip_ipaddr ^ dest) & ip_port->ip_netmask) != 0) - { - /* Two possibilities, 0 (iff IP_42BSD_BCAST) and -1 */ - if (dest == HTONL((ipaddr_t)-1)) - return 1; -#if IP_42BSD_BCAST - if (dest == HTONL((ipaddr_t)0)) - return 1; -#endif - return 0; - } - if (((ip_port->ip_ipaddr ^ dest) & ip_port->ip_subnetmask) != 0) - { - /* Two possibilities, netwerk.0 (iff IP_42BSD_BCAST) and - * netwerk.-1 - */ - if ((dest & ~ip_port->ip_netmask) == ~ip_port->ip_netmask) - return 1; -#if IP_42BSD_BCAST - if ((dest & ~ip_port->ip_netmask) == 0) - return 1; -#endif - return 0; - } - - /* Two possibilities, netwerk.subnet.0 (iff IP_42BSD_BCAST) and - * netwerk.subnet.-1 - */ - if ((dest & ~ip_port->ip_subnetmask) == ~ip_port->ip_subnetmask) + /* Two possibilities, 0 (iff IP_42BSD_BCAST) and -1 */ + if (dest == HTONL((ipaddr_t)-1)) return 1; #if IP_42BSD_BCAST - if ((dest & ~ip_port->ip_subnetmask) == 0) + if (dest == HTONL((ipaddr_t)0)) + return 1; +#endif + netmask= ip_port->ip_subnetmask; + my_ipaddr= ip_port->ip_ipaddr; + + if (((my_ipaddr ^ dest) & netmask) != 0) + { + classmask= ip_port->ip_classfulmask; + + /* Not a subnet broadcast, maybe a classful broadcast */ + if (((my_ipaddr ^ dest) & classmask) != 0) + { + return 0; + } + /* Two possibilities, net.0 (iff IP_42BSD_BCAST) and net.-1 */ + if ((dest & ~classmask) == ~classmask) + { + return 1; + } +#if IP_42BSD_BCAST + if ((dest & ~classmask) == 0) + return 1; +#endif + return 0; + } + + if (!(ip_port->ip_flags & IPF_SUBNET_BCAST)) + return 0; /* No subnet broadcasts on this network */ + + /* Two possibilities, subnet.0 (iff IP_42BSD_BCAST) and subnet.-1 */ + if ((dest & ~netmask) == ~netmask) + return 1; +#if IP_42BSD_BCAST + if ((dest & ~netmask) == 0) return 1; #endif return 0; @@ -856,7 +1035,7 @@ ev_arg_t arg; ip_port= arg.ev_ptr; assert(ev == &ip_port->ip_loopb_event); - while(pack= ip_port->ip_loopb_head) + while(pack= ip_port->ip_loopb_head, pack != NULL) { ip_port->ip_loopb_head= pack->acc_ext_link; ip_arrived(ip_port, pack); @@ -864,5 +1043,5 @@ ev_arg_t arg; } /* - * $PchId: ip_read.c,v 1.9 1997/01/31 08:51:39 philip Exp $ + * $PchId: ip_read.c,v 1.33 2005/06/28 14:18:50 philip Exp $ */ diff --git a/servers/inet/generic/ip_write.c b/servers/inet/generic/ip_write.c index 8840e57d7..5438837ab 100644 --- a/servers/inet/generic/ip_write.c +++ b/servers/inet/generic/ip_write.c @@ -59,11 +59,12 @@ size_t data_len; ip_port_t *ip_port; ip_fd_t *ip_fd; ip_hdr_t *ip_hdr, *tmp_hdr; - ipaddr_t dstaddr, netmask, nexthop, hostrep_dst; + ipaddr_t dstaddr, nexthop, hostrep_dst, my_ipaddr, netmask; u8_t *addrInBytes; acc_t *tmp_pack, *tmp_pack1; int hdr_len, hdr_opt_len, r; - int broadcast, ttl; + int type, ttl; + size_t req_mtu; ev_arg_t arg; ip_fd= &ip_fd_table[fd]; @@ -75,9 +76,16 @@ size_t data_len; return EBADMODE; } - data_len= bf_bufsize(data); + if (!(ip_fd->if_port->ip_flags & IPF_IPADDRSET)) + { + /* Interface is down. What kind of error do we want? For + * the moment, we return OK. + */ + bf_afree(data); + return NW_OK; + } - assert(ip_fd->if_port->ip_flags & IPF_IPADDRSET); + data_len= bf_bufsize(data); if (ip_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY) { @@ -179,6 +187,9 @@ size_t data_len; if (ip_fd->if_ipopt.nwio_flags & NWIO_REMSPEC) ip_hdr->ih_dst= ip_fd->if_ipopt.nwio_rem; + netmask= ip_port->ip_subnetmask; + my_ipaddr= ip_port->ip_ipaddr; + dstaddr= ip_hdr->ih_dst; hostrep_dst= ntohl(dstaddr); r= 0; @@ -188,10 +199,13 @@ size_t data_len; ; /* OK, Multicast */ else if ((hostrep_dst & 0xf0000000l) == 0xf0000000l) r= EBADDEST; /* Bad class */ - else if ((dstaddr ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) + else if ((dstaddr ^ my_ipaddr) & netmask) ; /* OK, remote destination */ - else if (!(dstaddr & ~ip_port->ip_subnetmask)) + else if (!(dstaddr & ~netmask) && + (ip_port->ip_flags & IPF_SUBNET_BCAST)) + { r= EBADDEST; /* Zero host part */ + } if (r<0) { DIFBLOCK(1, r == EBADDEST, @@ -207,6 +221,20 @@ size_t data_len; assert (data->acc_length >= IP_MIN_HDR_SIZE); ip_hdr= (ip_hdr_t *)ptr2acc_data(data); + if (ip_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG)) + { + req_mtu= bf_bufsize(data); + if (req_mtu > ip_port->ip_mtu) + { + DBLOCK(1, printf( + "packet is larger than link MTU and DF is set\n")); + bf_afree(data); + return EPACKSIZE; + } + } + else + req_mtu= 0; + addrInBytes= (u8_t *)&dstaddr; if ((addrInBytes[0] & 0xff) == 0x7f) /* local loopback */ @@ -231,15 +259,24 @@ size_t data_len; return NW_OK; } - if (dstaddr == (ipaddr_t)-1) + if ((dstaddr & HTONL(0xe0000000)) == HTONL(0xe0000000)) { - r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data, - /* broadcast */ 1); - return r; + if (dstaddr == (ipaddr_t)-1) + { + r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data, + IP_LT_BROADCAST); + return r; + } + if (ip_nettype(dstaddr) == IPNT_CLASS_D) + { + /* Multicast, what about multicast routing? */ + r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data, + IP_LT_MULTICAST); + return r; + } } - netmask= ip_get_netmask(dstaddr); - if (dstaddr == ip_port->ip_ipaddr) + if (dstaddr == my_ipaddr) { assert (data->acc_linkC == 1); @@ -258,17 +295,18 @@ size_t data_len; return NW_OK; } - if (((dstaddr ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) == 0) + if (((dstaddr ^ my_ipaddr) & netmask) == 0) { - broadcast= (dstaddr == (ip_port->ip_ipaddr | - ~ip_port->ip_subnetmask)); + type= ((dstaddr == (my_ipaddr | ~netmask) && + (ip_port->ip_flags & IPF_SUBNET_BCAST)) ? + IP_LT_BROADCAST : IP_LT_NORMAL); - r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data, - broadcast); + r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data, type); return r; } - r= oroute_frag (ip_port - ip_port_table, dstaddr, ttl, &nexthop); + r= oroute_frag (ip_port - ip_port_table, dstaddr, ttl, req_mtu, + &nexthop); if (r == NW_OK) { @@ -289,7 +327,7 @@ size_t data_len; else { r= (*ip_port->ip_dev_send)(ip_port, - nexthop, data, /* no broadcast */ 0); + nexthop, data, IP_LT_NORMAL); } } else @@ -308,18 +346,18 @@ int ip_hdr_len; ip_hdr->ih_hdr_chk= ~oneC_sum (0, (u16_t *)ip_hdr, ip_hdr_len); } -PUBLIC acc_t *ip_split_pack (ip_port, ref_last, first_size) +PUBLIC acc_t *ip_split_pack (ip_port, ref_last, mtu) ip_port_t *ip_port; acc_t **ref_last; -int first_size; +int mtu; { int pack_siz; ip_hdr_t *first_hdr, *second_hdr; int first_hdr_len, second_hdr_len; int first_data_len, second_data_len; - int new_first_data_len; + int data_len, max_data_len, nfrags, new_first_data_len; int first_opt_size, second_opt_size; - acc_t *first_pack, *second_pack, *tmp_pack, *tmp_pack1; + acc_t *first_pack, *second_pack, *tmp_pack; u8_t *first_optptr, *second_optptr; int i, optlen; @@ -327,26 +365,53 @@ int first_size; *ref_last= 0; second_pack= 0; + first_pack= bf_align(first_pack, IP_MIN_HDR_SIZE, 4); first_pack= bf_packIffLess(first_pack, IP_MIN_HDR_SIZE); assert (first_pack->acc_length >= IP_MIN_HDR_SIZE); first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack); first_hdr_len= (first_hdr->ih_vers_ihl & IH_IHL_MASK) * 4; - - pack_siz= bf_bufsize(first_pack); - assert(pack_siz > first_size); - - if (first_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG)) + if (first_hdr_len>IP_MIN_HDR_SIZE) { - icmp_snd_unreachable(ip_port->ip_port, first_pack, - ICMP_FRAGM_AND_DF); - return NULL; + first_pack= bf_packIffLess(first_pack, first_hdr_len); + first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack); } - first_data_len= ntohs(first_hdr->ih_length) - first_hdr_len; - new_first_data_len= (first_size- first_hdr_len) & ~7; - /* data goes in 8 byte chuncks */ - second_data_len= first_data_len-new_first_data_len; + pack_siz= bf_bufsize(first_pack); + assert(pack_siz > mtu); + + assert (!(first_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG))); + + if (first_pack->acc_linkC != 1 || + first_pack->acc_buffer->buf_linkC != 1) + { + /* Get a private copy of the IP header */ + tmp_pack= bf_memreq(first_hdr_len); + memcpy(ptr2acc_data(tmp_pack), first_hdr, first_hdr_len); + first_pack= bf_delhead(first_pack, first_hdr_len); + tmp_pack->acc_next= first_pack; + first_pack= tmp_pack; tmp_pack= NULL; + first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack); + } + + data_len= ntohs(first_hdr->ih_length) - first_hdr_len; + + /* Try to split the packet evenly. */ + assert(mtu > first_hdr_len); + max_data_len= mtu-first_hdr_len; + nfrags= (data_len/max_data_len)+1; + new_first_data_len= data_len/nfrags; + if (new_first_data_len < 8) + { + /* Special case for extremely small MTUs */ + new_first_data_len= 8; + } + new_first_data_len &= ~7; /* data goes in 8 byte chuncks */ + + assert(new_first_data_len >= 8); + assert(new_first_data_len+first_hdr_len <= mtu); + + second_data_len= data_len-new_first_data_len; second_pack= bf_cut(first_pack, first_hdr_len+ new_first_data_len, second_data_len); tmp_pack= first_pack; @@ -406,7 +471,7 @@ int first_size; } second_hdr_len= IP_MIN_HDR_SIZE + second_opt_size; - second_hdr->ih_vers_ihl= second_hdr->ih_vers_ihl & 0xf0 + second_hdr->ih_vers_ihl= (second_hdr->ih_vers_ihl & 0xf0) + (second_hdr_len/4); second_hdr->ih_length= htons(second_data_len+ second_hdr_len); @@ -421,7 +486,7 @@ int first_size; assert (!(second_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG))); ip_hdr_chksum(first_hdr, first_hdr_len); - if (second_data_len+second_hdr_len <= first_size) + if (second_data_len+second_hdr_len <= mtu) { /* second_pack will not be split any further, so we have to * calculate the header checksum. @@ -430,6 +495,7 @@ int first_size; } *ref_last= second_pack; + return first_pack; } @@ -440,12 +506,10 @@ int error; if ((*ip_fd->if_get_userdata)(ip_fd->if_srfd, (size_t)error, (size_t)0, FALSE)) { -#if !CRAMPED ip_panic(( "can't error_reply" )); -#endif } } /* - * $PchId: ip_write.c,v 1.7.1.1.1.1 2001/01/22 19:59:07 philip Exp $ + * $PchId: ip_write.c,v 1.22 2004/08/03 11:11:04 philip Exp $ */ diff --git a/servers/inet/generic/ipr.c b/servers/inet/generic/ipr.c index cb2996b9b..f9137caef 100644 --- a/servers/inet/generic/ipr.c +++ b/servers/inet/generic/ipr.c @@ -17,7 +17,7 @@ Copyright 1995 Philip Homburg THIS_FILE -#define OROUTE_NR 32 +#define OROUTE_NR 128 #define OROUTE_STATIC_NR 16 #define OROUTE_HASH_ASS_NR 4 #define OROUTE_HASH_NR 32 @@ -40,7 +40,7 @@ PRIVATE oroute_t *oroute_head; PRIVATE int static_oroute_nr; PRIVATE oroute_hash_t oroute_hash_table[OROUTE_HASH_NR][OROUTE_HASH_ASS_NR]; -#define IROUTE_NR (sizeof(int) == 2 ? 64 : 512) +#define IROUTE_NR 512 #define IROUTE_HASH_ASS_NR 4 #define IROUTE_HASH_NR 32 #define IROUTE_HASH_MASK (IROUTE_HASH_NR-1) @@ -64,8 +64,8 @@ FORWARD oroute_t *oroute_find_ent ARGS(( int port_nr, ipaddr_t dest )); FORWARD void oroute_del ARGS(( oroute_t *oroute )); FORWARD oroute_t *sort_dists ARGS(( oroute_t *oroute )); FORWARD oroute_t *sort_gws ARGS(( oroute_t *oroute )); -FORWARD oroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask )); -FORWARD iroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask )); +FORWARD void oroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask )); +FORWARD void iroute_uncache_nw ARGS(( ipaddr_t dest, ipaddr_t netmask )); PUBLIC void ipr_init() { @@ -73,17 +73,13 @@ PUBLIC void ipr_init() oroute_t *oroute; iroute_t *iroute; -#if ZERO for (i= 0, oroute= oroute_table; iort_flags= ORTF_EMPTY; static_oroute_nr= 0; -#endif assert(OROUTE_HASH_ASS_NR == 4); -#if ZERO for (i= 0, iroute= iroute_table; iirt_flags= IRTF_EMPTY; -#endif assert(IROUTE_HASH_ASS_NR == 4); } @@ -92,14 +88,12 @@ PUBLIC iroute_t *iroute_frag(port_nr, dest) int port_nr; ipaddr_t dest; { - int hash, i, r_hash_ind; + int hash, i; iroute_hash_t *iroute_hash; iroute_hash_t tmp_hash; iroute_t *iroute, *bestroute; - time_t currtim; unsigned long hash_tmp; - - currtim= get_time(); + u32_t tmp_mask; hash= hash_iroute(port_nr, dest, hash_tmp); iroute_hash= &iroute_hash_table[hash][0]; @@ -150,11 +144,12 @@ ipaddr_t dest; /* More specific netmasks are better */ if (iroute->irt_subnetmask != bestroute->irt_subnetmask) { - if (ntohl(iroute->irt_subnetmask) > - ntohl(bestroute->irt_subnetmask)) - { + /* Using two ntohl macros in one expression + * is not allowed (tmp_l is modified twice) + */ + tmp_mask= ntohl(iroute->irt_subnetmask); + if (tmp_mask > ntohl(bestroute->irt_subnetmask)) bestroute= iroute; - } continue; } @@ -189,10 +184,11 @@ ipaddr_t dest; return bestroute; } -PUBLIC int oroute_frag(port_nr, dest, ttl, nexthop) +PUBLIC int oroute_frag(port_nr, dest, ttl, msgsize, nexthop) int port_nr; ipaddr_t dest; int ttl; +size_t msgsize; ipaddr_t *nexthop; { oroute_t *oroute; @@ -200,6 +196,11 @@ ipaddr_t *nexthop; oroute= oroute_find_ent(port_nr, dest); if (!oroute || oroute->ort_dist > ttl) return EDSTNOTRCH; + if (msgsize && oroute->ort_mtu && + oroute->ort_mtu < msgsize) + { + return EPACKSIZE; + } *nexthop= oroute->ort_gateway; return NW_OK; @@ -207,13 +208,14 @@ ipaddr_t *nexthop; PUBLIC int ipr_add_oroute(port_nr, dest, subnetmask, gateway, - timeout, dist, static_route, preference, oroute_p) + timeout, dist, mtu, static_route, preference, oroute_p) int port_nr; ipaddr_t dest; ipaddr_t subnetmask; ipaddr_t gateway; time_t timeout; int dist; +int mtu; int static_route; i32_t preference; oroute_t **oroute_p; @@ -222,24 +224,30 @@ oroute_t **oroute_p; ip_port_t *ip_port; oroute_t *oroute, *oldest_route, *prev, *nw_route, *gw_route, *prev_route; - time_t currtim; + time_t currtim, exp_tim, exp_tim_orig; oldest_route= 0; currtim= get_time(); + if (timeout) + exp_tim= timeout+currtim; + else + exp_tim= 0; DBLOCK(0x10, - printf("adding oroute to "); writeIpAddr(dest); + printf("ip[%d]: adding oroute to ", port_nr); + writeIpAddr(dest); printf("["); writeIpAddr(subnetmask); printf("] through "); writeIpAddr(gateway); - printf(" timeout: %lds, distance %d\n", - (long)timeout/HZ, dist)); + printf(" timeout: %lds, distance %d, pref %ld, mtu %d\n", + (long)timeout/HZ, dist, (long)preference, mtu)); ip_port= &ip_port_table[port_nr]; /* Validate gateway */ if (((gateway ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) != 0) { - DBLOCK(2, printf("ipr_add_oroute: invalid gateway: "); writeIpAddr(gateway); printf("\n")); + DBLOCK(1, printf("ip[%d]: (ipr_add_oroute) invalid gateway: ", + port_nr); writeIpAddr(gateway); printf("\n")); return EINVAL; } @@ -273,24 +281,27 @@ oroute_t **oroute_p; continue; if (oroute->ort_dist > dist) continue; - if (oroute->ort_dist == dist && - oroute->ort_pref == preference) - { - if (timeout) - oroute->ort_exp_tim= currtim + timeout; - else - oroute->ort_exp_tim= 0; - oroute->ort_timestamp= currtim; - assert(oroute->ort_port == port_nr); - if (oroute_p != NULL) - *oroute_p= oroute; - return NW_OK; - } break; } if (oroute) { assert(oroute->ort_port == port_nr); + if (dest != 0) + { + /* The new expire should not be later + * than the old expire time. Except for + * default routes, where the expire time + * is simple set to the new value. + */ + exp_tim_orig= oroute->ort_exp_tim; + if (!exp_tim) + exp_tim= exp_tim_orig; + else if (exp_tim_orig && + exp_tim > exp_tim_orig) + { + exp_tim= exp_tim_orig; + } + } oroute_del(oroute); oroute->ort_flags= 0; oldest_route= oroute; @@ -341,12 +352,10 @@ oroute_t **oroute_p; oldest_route->ort_dest= dest; oldest_route->ort_gateway= gateway; oldest_route->ort_subnetmask= subnetmask; - if (timeout) - oldest_route->ort_exp_tim= currtim + timeout; - else - oldest_route->ort_exp_tim= 0; + oldest_route->ort_exp_tim= exp_tim; oldest_route->ort_timestamp= currtim; oldest_route->ort_dist= dist; + oldest_route->ort_mtu= mtu; oldest_route->ort_port= port_nr; oldest_route->ort_flags= ORTF_INUSE; oldest_route->ort_pref= preference; @@ -357,12 +366,12 @@ oroute_t **oroute_p; * and insert the entry during the reconstruction. */ for (prev= 0, nw_route= oroute_head; nw_route; - prev= nw_route, nw_route= nw_route->ort_nextnw) + prev= nw_route, nw_route= nw_route->ort_nextnw) { if (nw_route->ort_port != port_nr) continue; if (nw_route->ort_dest == dest && - nw_route->ort_subnetmask == subnetmask) + nw_route->ort_subnetmask == subnetmask) { if (prev) prev->ort_nextnw= nw_route->ort_nextnw; @@ -373,7 +382,7 @@ oroute_t **oroute_p; } prev_route= nw_route; for(prev= NULL, gw_route= nw_route; gw_route; - prev= gw_route, gw_route= gw_route->ort_nextgw) + prev= gw_route, gw_route= gw_route->ort_nextgw) { if (gw_route->ort_gateway == gateway) { @@ -399,6 +408,92 @@ oroute_t **oroute_p; return NW_OK; } +PUBLIC int ipr_del_oroute(port_nr, dest, subnetmask, gateway, static_route) +int port_nr; +ipaddr_t dest; +ipaddr_t subnetmask; +ipaddr_t gateway; +int static_route; +{ + int i; + oroute_t *oroute; + + for(i= 0, oroute= oroute_table; iort_flags & ORTF_INUSE) == 0) + continue; + if (oroute->ort_port != port_nr || + oroute->ort_dest != dest || + oroute->ort_subnetmask != subnetmask || + oroute->ort_gateway != gateway) + { + continue; + } + if (!!(oroute->ort_flags & ORTF_STATIC) != static_route) + continue; + break; + } + + if (i == OROUTE_NR) + return ESRCH; + + if (static_route) + static_oroute_nr--; + + oroute_del(oroute); + oroute->ort_flags &= ~ORTF_INUSE; + return NW_OK; +} + + + +PUBLIC void ipr_chk_otab(port_nr, addr, mask) +int port_nr; +ipaddr_t addr; +ipaddr_t mask; +{ + int i; + oroute_t *oroute; + + DBLOCK(1, + printf("ip[%d] (ipr_chk_otab): addr ", port_nr); + writeIpAddr(addr); + printf(" mask "); + writeIpAddr(mask); + printf("\n"); + ); + + if (addr == 0) + { + /* Special hack to flush entries for an interface that + * goes down. + */ + addr= mask= HTONL(0xffffffff); + } + + for(i= 0, oroute= oroute_table; iort_flags & ORTF_INUSE) == 0) + continue; + if (oroute->ort_port != port_nr || + ((oroute->ort_gateway ^ addr) & mask) == 0) + { + continue; + } + DBLOCK(1, printf("ip[%d] (ipr_chk_otab): deleting route to ", + port_nr); + writeIpAddr(oroute->ort_dest); + printf(" gw "); + writeIpAddr(oroute->ort_gateway); + printf("\n")); + + if (oroute->ort_flags & ORTF_STATIC) + static_oroute_nr--; + oroute_del(oroute); + oroute->ort_flags &= ~ORTF_INUSE; + } +} + PUBLIC void ipr_gateway_down(port_nr, gateway, timeout) int port_nr; @@ -421,7 +516,8 @@ time_t timeout; continue; result= ipr_add_oroute(port_nr, route_ind->ort_dest, route_ind->ort_subnetmask, gateway, - timeout, ORTD_UNREACHABLE, FALSE, 0, NULL); + timeout, ORTD_UNREACHABLE, route_ind->ort_mtu, + FALSE, 0, NULL); assert(result == NW_OK); } } @@ -440,13 +536,14 @@ time_t timeout; if (!oroute) { - DBLOCK(1, printf("got a dest unreachable for "); + DBLOCK(1, printf("ip[%d]: got a dest unreachable for ", + port_nr); writeIpAddr(dest); printf("but no route present\n")); return; } result= ipr_add_oroute(port_nr, dest, netmask, oroute->ort_gateway, - timeout, ORTD_UNREACHABLE, FALSE, 0, NULL); + timeout, ORTD_UNREACHABLE, oroute->ort_mtu, FALSE, 0, NULL); assert(result == NW_OK); } @@ -461,29 +558,41 @@ ipaddr_t new_gateway; time_t timeout; { oroute_t *oroute; + ip_port_t *ip_port; int result; + ip_port= &ip_port_table[port_nr]; oroute= oroute_find_ent(port_nr, dest); if (!oroute) { - DBLOCK(1, printf("got a redirect for "); + DBLOCK(1, printf("ip[%d]: got a redirect for ", port_nr); writeIpAddr(dest); printf("but no route present\n")); return; } if (oroute->ort_gateway != old_gateway) { - DBLOCK(1, printf("got a redirect from "); + DBLOCK(1, printf("ip[%d]: got a redirect from ", port_nr); writeIpAddr(old_gateway); printf(" for "); writeIpAddr(dest); printf(" but curr gateway is "); writeIpAddr(oroute->ort_gateway); printf("\n")); return; } + if ((new_gateway ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) + { + DBLOCK(1, printf("ip[%d]: redirect from ", port_nr); + writeIpAddr(old_gateway); printf(" for "); + writeIpAddr(dest); printf(" but new gateway "); + writeIpAddr(new_gateway); + printf(" is not on local subnet\n")); + return; + } if (oroute->ort_flags & ORTF_STATIC) { if (oroute->ort_dest == dest) { - DBLOCK(1, printf("got a redirect for "); + DBLOCK(1, printf("ip[%d]: got a redirect for ", + port_nr); writeIpAddr(dest); printf("but route is fixed\n")); return; @@ -493,11 +602,11 @@ time_t timeout; { result= ipr_add_oroute(port_nr, dest, netmask, oroute->ort_gateway, HZ, ORTD_UNREACHABLE, - FALSE, 0, NULL); + oroute->ort_mtu, FALSE, 0, NULL); assert(result == NW_OK); } result= ipr_add_oroute(port_nr, dest, netmask, new_gateway, - timeout, 1, FALSE, 0, NULL); + timeout, 1, oroute->ort_mtu, FALSE, 0, NULL); assert(result == NW_OK); } @@ -516,18 +625,20 @@ time_t timeout; if (!oroute) { - DBLOCK(1, printf("got a ttl exceeded for "); + DBLOCK(1, printf("ip[%d]: got a ttl exceeded for ", + port_nr); writeIpAddr(dest); printf("but no route present\n")); return; } new_dist= oroute->ort_dist * 2; - if (new_dist>IP_MAX_TTL) + if (new_dist > IP_DEF_TTL) { new_dist= oroute->ort_dist+1; - if (new_dist>IP_MAX_TTL) + if (new_dist >= IP_DEF_TTL) { - DBLOCK(1, printf("got a ttl exceeded for "); + DBLOCK(1, printf("ip[%d]: got a ttl exceeded for ", + port_nr); writeIpAddr(dest); printf(" but dist is %d\n", oroute->ort_dist)); @@ -536,7 +647,37 @@ time_t timeout; } result= ipr_add_oroute(port_nr, dest, netmask, oroute->ort_gateway, - timeout, new_dist, FALSE, 0, NULL); + timeout, new_dist, oroute->ort_mtu, FALSE, 0, NULL); + assert(result == NW_OK); +} + +PUBLIC void ipr_mtu(port_nr, dest, mtu, timeout) +int port_nr; +ipaddr_t dest; +u16_t mtu; +time_t timeout; +{ + oroute_t *oroute; + int result; + + oroute= oroute_find_ent(port_nr, dest); + + if (!oroute) + { + DBLOCK(1, printf("ip[%d]: got a mtu exceeded for ", + port_nr); + writeIpAddr(dest); printf("but no route present\n")); + return; + } + + if (mtu < IP_MIN_MTU) + return; + if (oroute->ort_mtu && mtu >= oroute->ort_mtu) + return; /* Only decrease mtu */ + + result= ipr_add_oroute(port_nr, dest, HTONL(0xffffffff), + oroute->ort_gateway, timeout, oroute->ort_dist, mtu, + FALSE, 0, NULL); assert(result == NW_OK); } @@ -572,6 +713,7 @@ nwio_route_t *route_ent; route_ent->nwr_flags |= NWRF_STATIC; } route_ent->nwr_pref= oroute->ort_pref; + route_ent->nwr_mtu= oroute->ort_mtu; route_ent->nwr_ifaddr= ip_get_ifaddr(oroute->ort_port); return NW_OK; } @@ -581,12 +723,13 @@ PRIVATE oroute_t *oroute_find_ent(port_nr, dest) int port_nr; ipaddr_t dest; { - int hash, i, r_hash_ind; + int hash; oroute_hash_t *oroute_hash; oroute_hash_t tmp_hash; oroute_t *oroute, *bestroute; time_t currtim; unsigned long hash_tmp; + u32_t tmp_mask; currtim= get_time(); @@ -645,8 +788,11 @@ ipaddr_t dest; continue; } assert(oroute->ort_dest != bestroute->ort_dest); - if (ntohl(oroute->ort_subnetmask) > - ntohl(bestroute->ort_subnetmask)) + /* Using two ntohl macros in one expression + * is not allowed (tmp_l is modified twice) + */ + tmp_mask= ntohl(oroute->ort_subnetmask); + if (tmp_mask > ntohl(bestroute->ort_subnetmask)) { bestroute= oroute; continue; @@ -670,6 +816,19 @@ oroute_t *oroute; { oroute_t *prev, *nw_route, *gw_route, *dist_route, *prev_route; + DBLOCK(0x10, + printf("ip[%d]: deleting oroute to ", oroute->ort_port); + writeIpAddr(oroute->ort_dest); + printf("["); writeIpAddr(oroute->ort_subnetmask); + printf("] through "); + writeIpAddr(oroute->ort_gateway); + printf( + " timestamp %lds, timeout: %lds, distance %d pref %ld mtu %ld ", + (long)oroute->ort_timestamp/HZ, + (long)oroute->ort_exp_tim/HZ, oroute->ort_dist, + (long)oroute->ort_pref, (long)oroute->ort_mtu); + printf("flags 0x%x\n", oroute->ort_flags)); + for (prev= NULL, nw_route= oroute_head; nw_route; prev= nw_route, nw_route= nw_route->ort_nextnw) { @@ -735,6 +894,8 @@ oroute_t *oroute; int best_dist, best_pref; best= NULL; + best_dist= best_pref= 0; + best_prev= NULL; for (prev= NULL, r= oroute; r; prev= r, r= r->ort_nextdist) { if (best == NULL) @@ -777,6 +938,8 @@ oroute_t *oroute; int best_dist, best_pref; best= NULL; + best_dist= best_pref= 0; + best_prev= NULL; for (prev= NULL, r= oroute; r; prev= r, r= r->ort_nextgw) { if (best == NULL) @@ -812,7 +975,7 @@ oroute_t *oroute; } -PRIVATE oroute_uncache_nw(dest, netmask) +PRIVATE void oroute_uncache_nw(dest, netmask) ipaddr_t dest; ipaddr_t netmask; { @@ -849,6 +1012,7 @@ nwio_route_t *route_ent; iroute= &iroute_table[ent_no]; + route_ent->nwr_ent_no= ent_no; route_ent->nwr_ent_count= IROUTE_NR; route_ent->nwr_dest= iroute->irt_dest; route_ent->nwr_netmask= iroute->irt_subnetmask; @@ -864,23 +1028,38 @@ nwio_route_t *route_ent; route_ent->nwr_flags |= NWRF_UNREACHABLE; } route_ent->nwr_pref= 0; + route_ent->nwr_mtu= iroute->irt_mtu; route_ent->nwr_ifaddr= ip_get_ifaddr(iroute->irt_port); return NW_OK; } PUBLIC int ipr_add_iroute(port_nr, dest, subnetmask, gateway, - dist, static_route, iroute_p) + dist, mtu, static_route, iroute_p) int port_nr; ipaddr_t dest; ipaddr_t subnetmask; ipaddr_t gateway; int dist; +int mtu; int static_route; iroute_t **iroute_p; { int i; iroute_t *iroute, *unused_route; + ip_port_t *ip_port; + + ip_port= &ip_port_table[port_nr]; + + /* Check gateway */ + if (((gateway ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) != 0 && + gateway != 0) + { + DBLOCK(1, printf("ip[%d] (ipr_add_iroute): invalid gateway: ", + port_nr); + writeIpAddr(gateway); printf("\n")); + return EINVAL; + } unused_route= NULL; if (static_route) @@ -932,6 +1111,7 @@ iroute_t **iroute_p; iroute->irt_subnetmask= subnetmask; iroute->irt_gateway= gateway; iroute->irt_dist= dist; + iroute->irt_mtu= mtu; iroute->irt_flags= IRTF_INUSE; if (static_route) iroute->irt_flags |= IRTF_STATIC; @@ -943,13 +1123,11 @@ iroute_t **iroute_p; } -PUBLIC int ipr_del_iroute(port_nr, dest, subnetmask, gateway, - dist, static_route) +PUBLIC int ipr_del_iroute(port_nr, dest, subnetmask, gateway, static_route) int port_nr; ipaddr_t dest; ipaddr_t subnetmask; ipaddr_t gateway; -int dist; int static_route; { int i; @@ -983,7 +1161,63 @@ int static_route; } -PRIVATE iroute_uncache_nw(dest, netmask) +PUBLIC void ipr_chk_itab(port_nr, addr, mask) +int port_nr; +ipaddr_t addr; +ipaddr_t mask; +{ + int i; + iroute_t *iroute; + + DBLOCK(1, + printf("ip[%d] (ipr_chk_itab): addr ", port_nr); + writeIpAddr(addr); + printf(" mask "); + writeIpAddr(mask); + printf("\n"); + ); + + if (addr == 0) + { + /* Special hack to flush entries for an interface that + * goes down. + */ + addr= mask= HTONL(0xffffffff); + } + + for(i= 0, iroute= iroute_table; iirt_flags & IRTF_INUSE) == 0) + continue; + if (iroute->irt_port != port_nr) + continue; + if (iroute->irt_gateway == 0) + { + /* Special case: attached network. */ + if (iroute->irt_subnetmask == mask && + iroute->irt_dest == (addr & mask)) + { + /* Nothing changed. */ + continue; + } + } + if (((iroute->irt_gateway ^ addr) & mask) == 0) + continue; + + DBLOCK(1, printf("ip[%d] (ipr_chk_itab): deleting route to ", + port_nr); + writeIpAddr(iroute->irt_dest); + printf(" gw "); + writeIpAddr(iroute->irt_gateway); + printf("\n")); + + iroute_uncache_nw(iroute->irt_dest, iroute->irt_subnetmask); + iroute->irt_flags &= ~IRTF_INUSE; + } +} + + +PRIVATE void iroute_uncache_nw(dest, netmask) ipaddr_t dest; ipaddr_t netmask; { @@ -1008,9 +1242,5 @@ ipaddr_t netmask; /* - * Debugging, management - */ - -/* - * $PchId: ipr.c,v 1.9 1996/07/31 17:26:33 philip Exp $ + * $PchId: ipr.c,v 1.23 2003/01/22 11:49:58 philip Exp $ */ diff --git a/servers/inet/generic/ipr.h b/servers/inet/generic/ipr.h index 552515d76..bece6b056 100644 --- a/servers/inet/generic/ipr.h +++ b/servers/inet/generic/ipr.h @@ -14,6 +14,7 @@ typedef struct oroute ipaddr_t ort_subnetmask; int ort_dist; i32_t ort_pref; + u32_t ort_mtu; ipaddr_t ort_gateway; time_t ort_exp_tim; time_t ort_timestamp; @@ -36,6 +37,7 @@ typedef struct iroute ipaddr_t irt_gateway; ipaddr_t irt_subnetmask; int irt_dist; + u32_t irt_mtu; int irt_port; int irt_flags; } iroute_t; @@ -50,22 +52,28 @@ typedef struct iroute #define IPR_TTL_TIMEOUT (60L * HZ) #define IPR_REDIRECT_TIMEOUT (20 * 60L * HZ) #define IPR_GW_DOWN_TIMEOUT (60L * HZ) +#define IPR_MTU_TIMEOUT (10*60L * HZ) /* RFC-1191 */ /* Prototypes */ iroute_t *iroute_frag ARGS(( int port_nr, ipaddr_t dest )); -int oroute_frag ARGS(( int port_nr, ipaddr_t dest, int ttl, +int oroute_frag ARGS(( int port_nr, ipaddr_t dest, int ttl, size_t msgsize, ipaddr_t *nexthop )); void ipr_init ARGS(( void )); int ipr_get_iroute ARGS(( int ent_no, nwio_route_t *route_ent )); int ipr_add_iroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, - ipaddr_t gateway, int dist, int static_route, iroute_t **route_p )); + ipaddr_t gateway, int dist, int mtu, int static_route, + iroute_t **route_p )); int ipr_del_iroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, - ipaddr_t gateway, int dist, int static_route )); + ipaddr_t gateway, int static_route )); +void ipr_chk_itab ARGS(( int port_nr, ipaddr_t addr, ipaddr_t mask )); int ipr_get_oroute ARGS(( int ent_no, nwio_route_t *route_ent )); int ipr_add_oroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, - ipaddr_t gateway, time_t timeout, int dist, int static_route, + ipaddr_t gateway, time_t timeout, int dist, int mtu, int static_route, i32_t preference, oroute_t **route_p )); +int ipr_del_oroute ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, + ipaddr_t gateway, int static_route )); +void ipr_chk_otab ARGS(( int port_nr, ipaddr_t addr, ipaddr_t mask )); void ipr_gateway_down ARGS(( int port_nr, ipaddr_t gateway, time_t timeout )); void ipr_redirect ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, ipaddr_t old_gateway, ipaddr_t new_gateway, time_t timeout )); @@ -73,9 +81,10 @@ void ipr_destunrch ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, time_t timeout )); void ipr_ttl_exc ARGS(( int port_nr, ipaddr_t dest, ipaddr_t subnetmask, time_t timeout )); +void ipr_mtu ARGS(( int port_nr, ipaddr_t dest, U16_t mtu, time_t timeout )); #endif /* IPR_H */ /* - * $PchId: ipr.h,v 1.4 1995/11/21 06:45:27 philip Exp $ + * $PchId: ipr.h,v 1.8 2002/06/09 07:48:11 philip Exp $ */ diff --git a/servers/inet/generic/psip.c b/servers/inet/generic/psip.c index 7fdeb443b..586056442 100644 --- a/servers/inet/generic/psip.c +++ b/servers/inet/generic/psip.c @@ -17,8 +17,6 @@ Copyright 1995 Philip Homburg #include "psip.h" #include "sr.h" -#if ENABLE_PSIP - THIS_FILE typedef struct psip_port @@ -55,21 +53,24 @@ typedef struct psip_fd #define PFF_INUSE 1 #define PFF_READ_IP 2 #define PFF_PROMISC 4 +#define PFF_NEXTHOP 8 PRIVATE psip_port_t *psip_port_table; PRIVATE psip_fd_t psip_fd_table[PSIP_FD_NR]; FORWARD int psip_open ARGS(( int port, int srfd, get_userdata_t get_userdata, put_userdata_t put_userdata, - put_pkt_t pkt_pkt )); + put_pkt_t pkt_pkt, select_res_t select_res )); FORWARD int psip_ioctl ARGS(( int fd, ioreq_t req )); FORWARD int psip_read ARGS(( int fd, size_t count )); FORWARD int psip_write ARGS(( int fd, size_t count )); +FORWARD int psip_select ARGS(( int port_nr, unsigned operations )); FORWARD void psip_close ARGS(( int fd )); FORWARD int psip_cancel ARGS(( int fd, int which_operation )); FORWARD void promisc_restart_read ARGS(( psip_port_t *psip_port )); FORWARD int psip_setopt ARGS(( psip_fd_t *psip_fd, nwio_psipopt_t *newoptp )); FORWARD void psip_buffree ARGS(( int priority )); +FORWARD void check_promisc ARGS(( psip_port_t *psip_port )); #ifdef BUF_CONSISTENCY_CHECK FORWARD void psip_bufcheck ARGS(( void )); #endif @@ -89,22 +90,18 @@ PUBLIC void psip_init() psip_port_t *psip_port; psip_fd_t *psip_fd; -#if ZERO for (i=0, psip_port= psip_port_table; ipp_flags= PPF_EMPTY; for (i=0, psip_fd= psip_fd_table; ipf_flags= PFF_EMPTY; -#endif for (i=0, psip_port= psip_port_table; ipp_flags |= PPF_CONFIGURED; -#if ZERO psip_port->pp_opencnt= 0; psip_port->pp_rd_head= NULL; psip_port->pp_promisc_head= NULL; -#endif } #ifndef BUF_CONSISTENCY_CHECK @@ -120,30 +117,34 @@ int ip_port_nr; { psip_port_t *psip_port; - assert(port_nr >= 0 && port_nr < psip_conf_nr); + assert(port_nr >= 0); + if (port_nr >= psip_conf_nr) + return -1; psip_port= &psip_port_table[port_nr]; - assert(psip_port->pp_flags & PPF_CONFIGURED); + if (!(psip_port->pp_flags &PPF_CONFIGURED)) + return -1; psip_port->pp_ipdev= ip_port_nr; psip_port->pp_flags |= PPF_ENABLED; sr_add_minor(if2minor(psip_conf[port_nr].pc_ifno, PSIP_DEV_OFF), port_nr, psip_open, psip_close, psip_read, - psip_write, psip_ioctl, psip_cancel); + psip_write, psip_ioctl, psip_cancel, psip_select); return NW_OK; } -PUBLIC int psip_send(port_nr, pack) +PUBLIC int psip_send(port_nr, dest, pack) int port_nr; +ipaddr_t dest; acc_t *pack; { psip_port_t *psip_port; psip_fd_t *psip_fd, *mark_fd; int i, result, result1; - size_t buf_size; - acc_t *hdr_pack; + size_t buf_size, extrasize; + acc_t *hdr_pack, *acc; psip_io_hdr_t *hdr; assert(port_nr >= 0 && port_nr < psip_conf_nr); @@ -182,8 +183,13 @@ acc_t *pack; assert(psip_fd->pf_flags & PFF_READ_IP); psip_fd->pf_flags &= ~PFF_READ_IP; + if (psip_fd->pf_flags & PFF_NEXTHOP) + extrasize= sizeof(dest); + else + extrasize= 0; + buf_size= bf_bufsize(pack); - if (buf_size <= psip_fd->pf_rd_count) + if (buf_size+extrasize <= psip_fd->pf_rd_count) { if (psip_port->pp_flags & PPF_PROMISC) { @@ -192,6 +198,7 @@ acc_t *pack; hdr= (psip_io_hdr_t *)ptr2acc_data(hdr_pack); memset(hdr, '\0', sizeof(*hdr)); hdr->pih_flags |= PF_LOC2REM; + hdr->pih_nexthop= dest; pack->acc_linkC++; hdr_pack->acc_next= pack; @@ -212,6 +219,17 @@ acc_t *pack; promisc_restart_read(psip_port); } } + + if (extrasize) + { + /* Prepend nexthop address */ + acc= bf_memreq(sizeof(dest)); + *(ipaddr_t *)(ptr2acc_data(acc))= dest; + acc->acc_next= pack; + pack= acc; acc= NULL; + buf_size += extrasize; + } + result= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd, (size_t)0, pack, FALSE); if (result == NW_OK) @@ -230,12 +248,14 @@ acc_t *pack; return NW_SUSPEND; } -PRIVATE int psip_open(port, srfd, get_userdata, put_userdata, put_pkt) +PRIVATE int psip_open(port, srfd, get_userdata, put_userdata, put_pkt, + select_res) int port; int srfd; get_userdata_t get_userdata; put_userdata_t put_userdata; put_pkt_t put_pkt; +select_res_t select_res; { psip_port_t *psip_port; psip_fd_t *psip_fd; @@ -272,6 +292,7 @@ ioreq_t req; int result; psip_fd_t *psip_fd; acc_t *data; + nwio_ipconf_t *ipconfp; nwio_psipopt_t *psip_opt, *newoptp; assert(fd >= 0 && fd < PSIP_FD_NR); @@ -279,6 +300,22 @@ ioreq_t req; switch(req) { + case NWIOSIPCONF: + data= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd, 0, + sizeof(*ipconfp), TRUE); + if (!data) + { + result= EFAULT; + break; + } + data= bf_packIffLess(data, sizeof(*ipconfp)); + assert (data->acc_length == sizeof(*ipconfp)); + + ipconfp= (nwio_ipconf_t *)ptr2acc_data(data); + result= ip_setconf(psip_fd->pf_port->pp_ipdev, ipconfp); + bf_afree(data); + reply_thr_get(psip_fd, result, TRUE); + break; case NWIOSPSIPOPT: data= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd, 0, sizeof(*psip_opt), TRUE); @@ -303,7 +340,15 @@ ioreq_t req; else { psip_fd->pf_flags &= ~PFF_PROMISC; - /* XXX check port flags */ + check_promisc(psip_fd->pf_port); + } + if (psip_fd->pf_psipopt.nwpo_flags & NWPO_EN_NEXTHOP) + { + psip_fd->pf_flags |= PFF_NEXTHOP; + } + else + { + psip_fd->pf_flags &= ~PFF_NEXTHOP; } } reply_thr_get(psip_fd, result, TRUE); @@ -331,11 +376,36 @@ size_t count; { psip_port_t *psip_port; psip_fd_t *psip_fd; + acc_t *pack; + size_t buf_size; + int result, result1; assert(fd >= 0 && fd < PSIP_FD_NR); psip_fd= &psip_fd_table[fd]; psip_port= psip_fd->pf_port; + if ((psip_fd->pf_flags & PFF_PROMISC) && psip_port->pp_promisc_head) + { + /* Deliver a queued packet. */ + pack= psip_port->pp_promisc_head; + buf_size= bf_bufsize(pack); + if (buf_size <= count) + { + psip_port->pp_promisc_head= pack->acc_ext_link; + result= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd, + (size_t)0, pack, FALSE); + if (result == NW_OK) + result= buf_size; + } + else + result= EPACKSIZE; + + result1= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd, + (size_t)result, NULL, FALSE); + assert(result1 == NW_OK); + return NW_OK; + } + psip_fd->pf_rd_count= count; if (psip_port->pp_rd_head == NULL) psip_port->pp_rd_head= psip_fd; @@ -345,9 +415,7 @@ size_t count; psip_port->pp_rd_tail= psip_fd; psip_fd->pf_flags |= PFF_READ_IP; - if (psip_fd->pf_flags & PFF_PROMISC) - promisc_restart_read(psip_port); - else + if (!(psip_fd->pf_flags & PFF_PROMISC)) ipps_get(psip_port->pp_ipdev); if (psip_fd->pf_flags & PFF_READ_IP) return NW_SUSPEND; @@ -362,6 +430,8 @@ size_t count; psip_fd_t *psip_fd; acc_t *pack, *hdr_pack; psip_io_hdr_t *hdr; + size_t pack_len; + ipaddr_t nexthop; assert(fd >= 0 && fd < PSIP_FD_NR); psip_fd= &psip_fd_table[fd]; @@ -376,6 +446,33 @@ size_t count; assert(pack == NULL); return NW_OK; } + + if (psip_fd->pf_flags & PFF_NEXTHOP) + { + pack_len= bf_bufsize(pack); + if (pack_len <= sizeof(nexthop)) + { + /* Something strange */ + bf_afree(pack); pack= NULL; + pack= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd, + (size_t)EPACKSIZE, (size_t)0, FALSE); + assert(pack == NULL); + return NW_OK; + } + pack= bf_packIffLess(pack, sizeof(nexthop)); + nexthop= *(ipaddr_t *)ptr2acc_data(pack); + pack= bf_delhead(pack, sizeof(nexthop)); + + /* Map multicast to broadcast */ + if ((nexthop & HTONL(0xE0000000)) == HTONL(0xE0000000)) + nexthop= HTONL(0xffffffff); + } + else + { + /* Assume point to point */ + nexthop= HTONL(0x00000000); + } + if (psip_port->pp_flags & PPF_PROMISC) { /* Deal with promiscuous mode. */ @@ -383,6 +480,7 @@ size_t count; hdr= (psip_io_hdr_t *)ptr2acc_data(hdr_pack); memset(hdr, '\0', sizeof(*hdr)); hdr->pih_flags |= PF_REM2LOC; + hdr->pih_nexthop= nexthop; pack->acc_linkC++; hdr_pack->acc_next= pack; @@ -391,69 +489,55 @@ size_t count; { /* Append at the end. */ psip_port->pp_promisc_tail->acc_ext_link= hdr_pack; + psip_port->pp_promisc_tail= hdr_pack; } else { /* First packet. */ psip_port->pp_promisc_head= hdr_pack; + psip_port->pp_promisc_tail= hdr_pack; if (psip_port->pp_rd_head) promisc_restart_read(psip_port); } } - ipps_put(psip_port->pp_ipdev, pack); + ipps_put(psip_port->pp_ipdev, nexthop, pack); pack= (*psip_fd->pf_get_userdata)(psip_fd->pf_srfd, (size_t)count, (size_t)0, FALSE); assert(pack == NULL); return NW_OK; } +PRIVATE int psip_select(fd, operations) +int fd; +unsigned operations; +{ + printf("psip_select: not implemented\n"); + return 0; +} + PRIVATE void psip_close(fd) int fd; { psip_port_t *psip_port; psip_fd_t *psip_fd; - acc_t *acc, *acc_next; - int i; assert(fd >= 0 && fd < PSIP_FD_NR); psip_fd= &psip_fd_table[fd]; psip_port= psip_fd->pf_port; + if (psip_fd->pf_flags & PFF_PROMISC) + { + /* Check if the port should still be in promiscuous mode. + */ + psip_fd->pf_flags &= ~PFF_PROMISC; + check_promisc(psip_fd->pf_port); + } + assert(psip_port->pp_opencnt >0); psip_port->pp_opencnt--; psip_fd->pf_flags= PFF_EMPTY; ipps_get(psip_port->pp_ipdev); - /* Check if the port should still be in promiscuous mode. */ - if (psip_port->pp_flags & PPF_PROMISC) - { - psip_port->pp_flags &= ~PPF_PROMISC; - for (i= 0, psip_fd= psip_fd_table; ipf_flags & (PFF_INUSE|PFF_PROMISC)) != - (PFF_INUSE|PFF_PROMISC)) - { - continue; - } - if (psip_fd->pf_port != psip_port) - continue; - psip_port->pp_flags |= PPF_PROMISC; - break; - } - if (!(psip_port->pp_flags & PPF_PROMISC)) - { - /* Delete queued packets. */ - acc= psip_port->pp_promisc_head; - psip_port->pp_promisc_head= NULL; - while (acc) - { - acc_next= acc->acc_ext_link; - bf_afree(acc); - acc= acc_next; - } - } - } } PRIVATE int psip_cancel(fd, which_operation) @@ -472,22 +556,18 @@ int which_operation; switch(which_operation) { -#if !CRAMPED case SR_CANCEL_IOCTL: ip_panic(( "should not be here" )); -#endif case SR_CANCEL_READ: assert(psip_fd->pf_flags & PFF_READ_IP); for (prev_fd= NULL, tmp_fd= psip_port->pp_rd_head; tmp_fd; - prev_fd= tmp_fd, tmp_fd= tmp_fd->pf_rd_next) + prev_fd= tmp_fd, tmp_fd= tmp_fd->pf_rd_next) { if (tmp_fd == psip_fd) break; } -#if !CRAMPED if (tmp_fd == NULL) ip_panic(( "unable to find to request to cancel" )); -#endif if (prev_fd == NULL) psip_port->pp_rd_head= psip_fd->pf_rd_next; else @@ -499,12 +579,10 @@ int which_operation; (size_t)EINTR, NULL, FALSE); assert(result == NW_OK); break; -#if !CRAMPED case SR_CANCEL_WRITE: ip_panic(( "should not be here" )); default: ip_panic(( "invalid operation for cancel" )); -#endif } return NW_OK; } @@ -512,39 +590,36 @@ int which_operation; PRIVATE void promisc_restart_read(psip_port) psip_port_t *psip_port; { - psip_fd_t *psip_fd, *mark_fd; + psip_fd_t *psip_fd, *prev, *next; acc_t *pack; size_t buf_size; - int i, result, result1; + int result, result1; - while (psip_port->pp_promisc_head) + /* Overkill at the moment: just one reader in promiscious mode is + * allowed. + */ + pack= psip_port->pp_promisc_head; + if (!pack) + return; + assert(pack->acc_ext_link == NULL); + + for(psip_fd= psip_port->pp_rd_head, prev= NULL; psip_fd; + prev= psip_fd, psip_fd= psip_fd->pf_rd_next) { - mark_fd= psip_port->pp_rd_tail; - - for(i= 0; ipp_rd_head; - if (!psip_fd) - return; - psip_port->pp_rd_head= psip_fd->pf_rd_next; - if (psip_fd->pf_flags & PFF_PROMISC) - break; - psip_fd->pf_rd_next= NULL; - if (psip_port->pp_rd_head == NULL) - psip_port->pp_rd_head= psip_fd; - else - psip_port->pp_rd_tail->pf_rd_next= psip_fd; - psip_port->pp_rd_tail= psip_fd; - if (psip_fd == mark_fd) - return; - } - if (i == PSIP_FD_NR) - ip_panic(( "psip'promisc_restart_read: loop" )); +again: + if (!(psip_fd->pf_flags & PFF_PROMISC)) + continue; + next= psip_fd->pf_rd_next; + if (prev) + prev->pf_rd_next= next; + else + psip_port->pp_rd_head= next; + if (!next) + psip_port->pp_rd_tail= prev; assert(psip_fd->pf_flags & PFF_READ_IP); psip_fd->pf_flags &= ~PFF_READ_IP; - pack= psip_port->pp_promisc_head; buf_size= bf_bufsize(pack); if (buf_size <= psip_fd->pf_rd_count) { @@ -560,6 +635,16 @@ psip_port_t *psip_port; result1= (*psip_fd->pf_put_userdata)(psip_fd->pf_srfd, (size_t)result, NULL, FALSE); assert(result1 == NW_OK); + + if (psip_port->pp_promisc_head) + { + /* Restart from the beginning */ + assert(result == EPACKSIZE); + psip_fd= psip_port->pp_rd_head; + prev= NULL; + goto again; + } + break; } } @@ -568,11 +653,8 @@ psip_fd_t *psip_fd; nwio_psipopt_t *newoptp; { nwio_psipopt_t oldopt; - int result; - unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags, - all_flags, flags; + unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags; unsigned long new_flags; - int i; oldopt= psip_fd->pf_psipopt; @@ -593,6 +675,15 @@ nwio_psipopt_t *newoptp; } new_flags= ((unsigned long)new_di_flags << 16) | new_en_flags; + if ((new_flags & NWPO_EN_PROMISC) && + (psip_fd->pf_port->pp_flags & PPF_PROMISC)) + { + printf("psip_setopt: EBUSY for port %d, flags 0x%x\n", + psip_fd->pf_port - psip_port_table, + psip_fd->pf_port->pp_flags); + /* We can support only one at a time. */ + return EBUSY; + } psip_fd->pf_psipopt= *newoptp; psip_fd->pf_psipopt.nwpo_flags= new_flags; @@ -600,6 +691,48 @@ nwio_psipopt_t *newoptp; return NW_OK; } +PRIVATE void check_promisc(psip_port) +psip_port_t *psip_port; +{ + int i; + psip_fd_t *psip_fd; + acc_t *acc, *acc_next; + + /* Check if the port should still be in promiscuous mode. Overkill + * at the moment. + */ + if (!(psip_port->pp_flags & PPF_PROMISC)) + return; + + psip_port->pp_flags &= ~PPF_PROMISC; + for (i= 0, psip_fd= psip_fd_table; ipf_flags & (PFF_INUSE|PFF_PROMISC)) != + (PFF_INUSE|PFF_PROMISC)) + { + continue; + } + if (psip_fd->pf_port != psip_port) + continue; + printf("check_promisc: setting PROMISC for port %d\n", + psip_port-psip_port_table); + psip_port->pp_flags |= PPF_PROMISC; + break; + } + if (!(psip_port->pp_flags & PPF_PROMISC)) + { + /* Delete queued packets. */ + acc= psip_port->pp_promisc_head; + psip_port->pp_promisc_head= NULL; + while (acc) + { + acc_next= acc->acc_ext_link; + bf_afree(acc); + acc= acc_next; + } + } +} + PRIVATE void psip_buffree (priority) int priority; { @@ -679,8 +812,7 @@ int for_ioctl; assert (!result); } -#endif /* ENABLE_PSIP */ /* - * $PchId: psip.c,v 1.6 1996/05/07 20:50:31 philip Exp $ + * $PchId: psip.c,v 1.15 2005/06/28 14:19:29 philip Exp $ */ diff --git a/servers/inet/generic/psip.h b/servers/inet/generic/psip.h index 570ccc7e0..22f38d359 100644 --- a/servers/inet/generic/psip.h +++ b/servers/inet/generic/psip.h @@ -14,10 +14,10 @@ Copyright 1995 Philip Homburg void psip_prep ARGS(( void )); void psip_init ARGS(( void )); int psip_enable ARGS(( int port_nr, int ip_port_nr )); -int psip_send ARGS(( int port_nr, acc_t *pack )); +int psip_send ARGS(( int port_nr, ipaddr_t dest, acc_t *pack )); #endif /* PSIP_H */ /* - * $PchId: psip.h,v 1.4 1995/11/21 06:45:27 philip Exp $ + * $PchId: psip.h,v 1.6 2001/04/19 21:16:22 philip Exp $ */ diff --git a/servers/inet/generic/rand256.c b/servers/inet/generic/rand256.c new file mode 100644 index 000000000..736cdbe33 --- /dev/null +++ b/servers/inet/generic/rand256.c @@ -0,0 +1,37 @@ +/* +rand256.c + +Created: Oct 2000 by Philip Homburg + +Generate 256-bit random numbers +*/ + +#include +#include "inet.h" +#include "rand256.h" + +PRIVATE u32_t base_bits[8]; + +PUBLIC void init_rand256(bits) +u8_t bits[32]; +{ + memcpy(base_bits, bits, sizeof(base_bits)); +} + +PUBLIC void rand256(bits) +u8_t bits[32]; +{ + u32_t a; + SHA256_CTX ctx; + + a= ++base_bits[0]; + if (a == 0) + base_bits[1]++; + SHA256_Init(&ctx); + SHA256_Update(&ctx, (unsigned char *)base_bits, sizeof(base_bits)); + SHA256_Final(bits, &ctx); +} + +/* + * $PchId: rand256.c,v 1.1 2005/06/28 14:13:43 philip Exp $ + */ diff --git a/servers/inet/generic/rand256.h b/servers/inet/generic/rand256.h new file mode 100644 index 000000000..0fd544413 --- /dev/null +++ b/servers/inet/generic/rand256.h @@ -0,0 +1,14 @@ +/* +rand256.h + +Created: Oct 2000 by Philip Homburg + +Provide 256-bit random numbers +*/ + +void init_rand256 ARGS(( u8_t bits[32] )); +void rand256 ARGS(( u8_t bits[32] )); + +/* + * $PchId: rand256.h,v 1.1 2005/06/28 14:14:05 philip Exp $ + */ diff --git a/servers/inet/generic/sr.h b/servers/inet/generic/sr.h index 11c41a00f..86cdaf2ba 100644 --- a/servers/inet/generic/sr.h +++ b/servers/inet/generic/sr.h @@ -13,6 +13,11 @@ Copyright 1995 Philip Homburg #define SR_CANCEL_READ 2 #define SR_CANCEL_WRITE 3 +#define SR_SELECT_READ 0x01 +#define SR_SELECT_WRITE 0x02 +#define SR_SELECT_EXCEPTION 0x04 +#define SR_SELECT_POLL 0x10 + /* Forward struct declarations */ struct acc; @@ -21,21 +26,24 @@ struct acc; typedef int (*sr_open_t) ARGS(( int port, int srfd, get_userdata_t get_userdata, put_userdata_t put_userdata, - put_pkt_t put_pkt )); + put_pkt_t put_pkt, select_res_t select_res )); typedef void (*sr_close_t) ARGS(( int fd )); typedef int (*sr_read_t) ARGS(( int fd, size_t count )); typedef int (*sr_write_t) ARGS(( int fd, size_t count )); typedef int (*sr_ioctl_t) ARGS(( int fd, ioreq_t req )); typedef int (*sr_cancel_t) ARGS(( int fd, int which_operation )); +typedef int (*sr_select_t) ARGS(( int fd, unsigned operations )); void sr_init ARGS(( void )); void sr_add_minor ARGS(( int minor, int port, sr_open_t openf, sr_close_t closef, sr_read_t sr_read, sr_write_t sr_write, - sr_ioctl_t ioctlf, sr_cancel_t cancelf )); + sr_ioctl_t ioctlf, sr_cancel_t cancelf, sr_select_t selectf )); #endif /* SR_H */ +/* Track TCP connections back into sr (for lsof, identd, etc.) */ +EXTERN sr_cancel_t tcp_cancel_f; /* - * $PchId: sr.h,v 1.6 1996/05/07 20:50:51 philip Exp $ + * $PchId: sr.h,v 1.9 2005/06/28 14:19:51 philip Exp $ */ diff --git a/servers/inet/generic/tcp.c b/servers/inet/generic/tcp.c index 2f788380c..2add222fe 100644 --- a/servers/inet/generic/tcp.c +++ b/servers/inet/generic/tcp.c @@ -10,12 +10,11 @@ Copyright 1995 Philip Homburg #include "event.h" #include "type.h" -#if !CRAMPED #include "io.h" #include "ip.h" -#endif #include "sr.h" #include "assert.h" +#include "rand256.h" #include "tcp.h" #include "tcp_int.h" @@ -24,8 +23,10 @@ THIS_FILE PUBLIC tcp_port_t *tcp_port_table; PUBLIC tcp_fd_t tcp_fd_table[TCP_FD_NR]; PUBLIC tcp_conn_t tcp_conn_table[TCP_CONN_NR]; +PUBLIC sr_cancel_t tcp_cancel_f; FORWARD void tcp_main ARGS(( tcp_port_t *port )); +FORWARD int tcp_select ARGS(( int fd, unsigned operations )); FORWARD acc_t *tcp_get_data ARGS(( int fd, size_t offset, size_t count, int for_ioctl )); FORWARD int tcp_put_data ARGS(( int fd, size_t offset, @@ -49,22 +50,23 @@ FORWARD tcp_conn_t *find_best_conn ARGS(( ip_hdr_t *ip_hdr, tcp_hdr_t *tcp_hdr )); FORWARD int maybe_listen ARGS(( ipaddr_t locaddr, Tcpport_t locport, ipaddr_t remaddr, Tcpport_t remport )); -FORWARD int conn_right4fd ARGS(( tcp_conn_t *tcp_conn, tcp_fd_t *tcp_fd )); FORWARD int tcp_su4connect ARGS(( tcp_fd_t *tcp_fd )); FORWARD void tcp_buffree ARGS(( int priority )); #ifdef BUF_CONSISTENCY_CHECK FORWARD void tcp_bufcheck ARGS(( void )); #endif -FORWARD void tcp_setup_conn ARGS(( tcp_conn_t *tcp_conn )); +FORWARD void tcp_setup_conn ARGS(( tcp_port_t *tcp_port, + tcp_conn_t *tcp_conn )); +FORWARD u32_t tcp_rand32 ARGS(( void )); PUBLIC void tcp_prep() { - tcp_port_table= alloc(ip_conf_nr * sizeof(tcp_port_table[0])); + tcp_port_table= alloc(tcp_conf_nr * sizeof(tcp_port_table[0])); } PUBLIC void tcp_init() { - int i, j, k; + int i, j, k, ifno; tcp_fd_t *tcp_fd; tcp_port_t *tcp_port; tcp_conn_t *tcp_conn; @@ -74,7 +76,6 @@ PUBLIC void tcp_init() assert (BUF_S >= sizeof(struct nwio_tcpconf)); assert (BUF_S >= IP_MAX_HDR_SIZE + TCP_MAX_HDR_SIZE); -#if ZERO for (i=0, tcp_fd= tcp_fd_table; itf_flags= TFF_EMPTY; @@ -86,7 +87,6 @@ PUBLIC void tcp_init() tcp_conn->tc_flags= TCF_EMPTY; tcp_conn->tc_busy= 0; } -#endif #ifndef BUF_CONSISTENCY_CHECK bf_logon(tcp_buffree); @@ -94,17 +94,15 @@ PUBLIC void tcp_init() bf_logon(tcp_buffree, tcp_bufcheck); #endif - for (i=0, tcp_port= tcp_port_table; itp_ipdev= i; + tcp_port->tp_ipdev= tcp_conf[i].tc_port; -#if ZERO tcp_port->tp_flags= TPF_EMPTY; tcp_port->tp_state= TPS_EMPTY; tcp_port->tp_snd_head= NULL; tcp_port->tp_snd_tail= NULL; ev_init(&tcp_port->tp_snd_event); -#endif for (j= 0; jtp_ipdev].ic_ifno; + sr_add_minor(if2minor(ifno, TCP_DEV_OFF), i, tcp_open, tcp_close, tcp_read, - tcp_write, tcp_ioctl, tcp_cancel); + tcp_write, tcp_ioctl, tcp_cancel, tcp_select); tcp_main(tcp_port); } + tcp_cancel_f= tcp_cancel; } PRIVATE void tcp_main(tcp_port) @@ -135,7 +135,7 @@ tcp_port_t *tcp_port; tcp_port->tp_state= TPS_SETPROTO; tcp_port->tp_ipfd= ip_open(tcp_port->tp_ipdev, tcp_port->tp_ipdev, tcp_get_data, - tcp_put_data, tcp_put_pkt); + tcp_put_data, tcp_put_pkt, 0 /* no select_res */); if (tcp_port->tp_ipfd < 0) { tcp_port->tp_state= TPS_ERROR; @@ -206,8 +206,12 @@ tcp_port_t *tcp_port; tcp_conn->tc_rt_dead= TCP_DEF_RT_DEAD; tcp_conn->tc_stt= 0; tcp_conn->tc_0wnd_to= 0; + tcp_conn->tc_artt= TCP_DEF_RTT*TCP_RTT_SCALE; + tcp_conn->tc_drtt= 0; tcp_conn->tc_rtt= TCP_DEF_RTT; - tcp_conn->tc_mss= TCP_DEF_MSS; + tcp_conn->tc_max_mtu= tcp_port->tp_mtu; + tcp_conn->tc_mtu= tcp_conn->tc_max_mtu; + tcp_conn->tc_mtutim= 0; tcp_conn->tc_error= NW_OK; tcp_conn->tc_snd_wnd= TCP_MAX_SND_WND_SIZE; tcp_conn->tc_snd_cinc= @@ -233,13 +237,59 @@ tcp_port_t *tcp_port; read_ip_packets(tcp_port); return; -#if !CRAMPED default: ip_panic(( "unknown state" )); -#endif + break; } } +PRIVATE int tcp_select(fd, operations) +int fd; +unsigned operations; +{ + unsigned resops; + + tcp_fd_t *tcp_fd; + tcp_conn_t *tcp_conn; + + tcp_fd= &tcp_fd_table[fd]; + assert (tcp_fd->tf_flags & TFF_INUSE); + + resops= 0; + if (operations & SR_SELECT_READ) + { + if (!(tcp_fd->tf_flags & TFF_CONNECTED)) + return ENOTCONN; /* Is this right? */ + + tcp_conn= tcp_fd->tf_conn; + + if (tcp_conn->tc_state == TCS_CLOSED || tcp_sel_read(tcp_conn)) + resops |= SR_SELECT_READ; + else if (!(operations & SR_SELECT_POLL)) + tcp_fd->tf_flags |= TFF_SEL_READ; + } + if (operations & SR_SELECT_WRITE) + { + if (!(tcp_fd->tf_flags & TFF_CONNECTED)) + return ENOTCONN; /* Is this right? */ + tcp_conn= tcp_fd->tf_conn; + + if (tcp_conn->tc_state == TCS_CLOSED || + tcp_conn->tc_flags & TCF_FIN_SENT || + tcp_sel_write(tcp_conn)) + { + resops |= SR_SELECT_WRITE; + } + else if (!(operations & SR_SELECT_POLL)) + tcp_fd->tf_flags |= TFF_SEL_WRITE; + } + if (operations & SR_SELECT_EXCEPTION) + { + printf("tcp_select: not implemented for exceptions\n"); + } + return resops; +} + PRIVATE acc_t *tcp_get_data (port, offset, count, for_ioctl) int port; size_t offset; @@ -325,10 +375,8 @@ assert (count == sizeof(struct nwio_ipopt)); } break; default: -#if !CRAMPED printf("tcp_get_data(%d, 0x%x, 0x%x) called but tp_state= 0x%x\n", port, offset, count, tcp_port->tp_state); -#endif break; } return NW_OK; @@ -368,6 +416,8 @@ int for_ioctl; ipconf= (struct nwio_ipconf *)ptr2acc_data(data); assert (ipconf->nwic_flags & NWIC_IPADDR_SET); tcp_port->tp_ipaddr= ipconf->nwic_ipaddr; + tcp_port->tp_subnetmask= ipconf->nwic_netmask; + tcp_port->tp_mtu= ipconf->nwic_mtu; bf_afree(data); } break; @@ -397,10 +447,9 @@ assert (ipconf->nwic_flags & NWIC_IPADDR_SET); } break; default: -#if !CRAMPED - printf("tcp_put_data(%d, 0x%x, 0x%x) called but tp_state= 0x%x\n", - fd, offset, data, tcp_port->tp_state); -#endif + printf( + "tcp_put_data(%d, 0x%x, %p) called but tp_state= 0x%x\n", + fd, offset, data, tcp_port->tp_state); break; } return NW_OK; @@ -421,10 +470,10 @@ size_t datalen; tcp_hdr_t *tcp_hdr; acc_t *ip_pack, *tcp_pack; size_t ip_datalen, tcp_datalen, ip_hdr_len, tcp_hdr_len; - u16_t sum; + u16_t sum, mtu; u32_t bits; - int hash; - ipaddr_t srcaddr, dstaddr; + int i, hash; + ipaddr_t srcaddr, dstaddr, ipaddr, mask; tcpport_t srcport, dstport; tcp_port= &tcp_port_table[fd]; @@ -435,7 +484,32 @@ size_t datalen; ip_datalen= datalen - ip_hdr_len; if (ip_datalen == 0) { - DBLOCK(1, printf("tcp_put_pkt: no TCP header\n")); + if (ip_hdr->ih_proto == 0) + { + /* IP layer reports new IP address */ + ipaddr= ip_hdr->ih_src; + mask= ip_hdr->ih_dst; + mtu= ntohs(ip_hdr->ih_length); + tcp_port->tp_ipaddr= ipaddr; + tcp_port->tp_subnetmask= mask; + tcp_port->tp_mtu= mtu; + DBLOCK(1, printf("tcp_put_pkt: using address "); + writeIpAddr(ipaddr); + printf(", netmask "); + writeIpAddr(mask); + printf(", mtu %u\n", mtu)); + for (i= 0, tcp_conn= tcp_conn_table+i; + itc_flags & TCF_INUSE)) + continue; + if (tcp_conn->tc_port != tcp_port) + continue; + tcp_conn->tc_locaddr= ipaddr; + } + } + else + DBLOCK(1, printf("tcp_put_pkt: no TCP header\n")); bf_afree(data); return; } @@ -549,7 +623,7 @@ size_t datalen; } else tcp_conn= NULL; - if (tcp_conn != NULL && tcp_conn->tc_state == TCS_CLOSED || + if ((tcp_conn != NULL && tcp_conn->tc_state == TCS_CLOSED) || (tcp_hdr->th_flags & THF_SYN)) { tcp_conn= NULL; @@ -583,12 +657,14 @@ size_t datalen; } -PUBLIC int tcp_open (port, srfd, get_userdata, put_userdata, put_pkt) +PUBLIC int tcp_open (port, srfd, get_userdata, put_userdata, put_pkt, + select_res) int port; int srfd; get_userdata_t get_userdata; put_userdata_t put_userdata; put_pkt_t put_pkt; +select_res_t select_res; { int i; tcp_fd_t *tcp_fd; @@ -603,7 +679,7 @@ put_pkt_t put_pkt; tcp_fd= &tcp_fd_table[i]; tcp_fd->tf_flags= TFF_INUSE; - tcp_fd->tf_flags |= TFF_PUSH_DATA; /* XXX */ + tcp_fd->tf_flags |= TFF_PUSH_DATA; tcp_fd->tf_port= &tcp_port_table[port]; tcp_fd->tf_srfd= srfd; @@ -613,6 +689,7 @@ put_pkt_t put_pkt; tcp_fd->tf_tcpopt.nwto_flags= TCP_DEF_OPT; tcp_fd->tf_get_userdata= get_userdata; tcp_fd->tf_put_userdata= put_userdata; + tcp_fd->tf_select_res= select_res; tcp_fd->tf_conn= 0; return i; } @@ -730,9 +807,31 @@ assert (conf_acc->acc_length == sizeof(*tcp_conf)); tcp_conn->tc_busy--; tcp_conn_write(tcp_conn, 0); if (!(tcp_fd->tf_flags & TFF_IOCTL_IP)) - return NW_OK; + result= NW_OK; else - return NW_SUSPEND; + result= NW_SUSPEND; + break; + case NWIOTCPPUSH: + if (!(tcp_fd->tf_flags & TFF_CONNECTED)) + { + tcp_fd->tf_flags &= ~TFF_IOCTL_IP; + reply_thr_get (tcp_fd, ENOTCONN, TRUE); + result= NW_OK; + break; + } + tcp_conn= tcp_fd->tf_conn; + tcp_conn->tc_SND_PSH= tcp_conn->tc_SND_NXT; + tcp_conn->tc_flags &= ~TCF_NO_PUSH; + tcp_conn->tc_flags |= TCF_PUSH_NOW; + + /* Start the timer (if necessary) */ + if (tcp_conn->tc_SND_TRM == tcp_conn->tc_SND_UNA) + tcp_set_send_timer(tcp_conn); + + tcp_conn_write(tcp_conn, 0); + tcp_fd->tf_flags &= ~TFF_IOCTL_IP; + reply_thr_get (tcp_fd, NW_OK, TRUE); + result= NW_OK; break; default: tcp_fd->tf_flags &= ~TFF_IOCTL_IP; @@ -754,8 +853,6 @@ tcp_fd_t *tcp_fd; nwio_tcpconf_t *tcpconf; nwio_tcpconf_t oldconf, newconf; acc_t *data; - int result; - tcpport_t port; tcp_fd_t *fd_ptr; unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags, all_flags, flags; @@ -947,12 +1044,8 @@ tcp_fd_t *tcp_fd; nwio_tcpopt_t *tcpopt; nwio_tcpopt_t oldopt, newopt; acc_t *data; - int result; - tcpport_t port; - tcp_fd_t *fd_ptr; unsigned int new_en_flags, new_di_flags, - old_en_flags, old_di_flags, all_flags, flags; - int i; + old_en_flags, old_di_flags; data= (*tcp_fd->tf_get_userdata) (tcp_fd->tf_srfd, 0, sizeof(nwio_tcpopt_t), TRUE); @@ -968,11 +1061,9 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t)); newopt= *tcpopt; old_en_flags= oldopt.nwto_flags & 0xffff; - old_di_flags= (oldopt.nwto_flags >> 16) & - 0xffff; + old_di_flags= (oldopt.nwto_flags >> 16) & 0xffff; new_en_flags= newopt.nwto_flags & 0xffff; - new_di_flags= (newopt.nwto_flags >> 16) & - 0xffff; + new_di_flags= (newopt.nwto_flags >> 16) & 0xffff; if (new_en_flags & new_di_flags) { tcp_fd->tf_flags &= ~TFF_IOCTL_IP; @@ -981,33 +1072,24 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t)); } /* NWTO_SND_URG_MASK */ - if (!((new_en_flags | new_di_flags) & - NWTO_SND_URG_MASK)) + if (!((new_en_flags | new_di_flags) & NWTO_SND_URG_MASK)) { - new_en_flags |= (old_en_flags & - NWTO_SND_URG_MASK); - new_di_flags |= (old_di_flags & - NWTO_SND_URG_MASK); + new_en_flags |= (old_en_flags & NWTO_SND_URG_MASK); + new_di_flags |= (old_di_flags & NWTO_SND_URG_MASK); } /* NWTO_RCV_URG_MASK */ - if (!((new_en_flags | new_di_flags) & - NWTO_RCV_URG_MASK)) + if (!((new_en_flags | new_di_flags) & NWTO_RCV_URG_MASK)) { - new_en_flags |= (old_en_flags & - NWTO_RCV_URG_MASK); - new_di_flags |= (old_di_flags & - NWTO_RCV_URG_MASK); + new_en_flags |= (old_en_flags & NWTO_RCV_URG_MASK); + new_di_flags |= (old_di_flags & NWTO_RCV_URG_MASK); } /* NWTO_BSD_URG_MASK */ - if (!((new_en_flags | new_di_flags) & - NWTO_BSD_URG_MASK)) + if (!((new_en_flags | new_di_flags) & NWTO_BSD_URG_MASK)) { - new_en_flags |= (old_en_flags & - NWTO_BSD_URG_MASK); - new_di_flags |= (old_di_flags & - NWTO_BSD_URG_MASK); + new_en_flags |= (old_en_flags & NWTO_BSD_URG_MASK); + new_di_flags |= (old_di_flags & NWTO_BSD_URG_MASK); } else { @@ -1021,17 +1103,21 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t)); } /* NWTO_DEL_RST_MASK */ - if (!((new_en_flags | new_di_flags) & - NWTO_DEL_RST_MASK)) + if (!((new_en_flags | new_di_flags) & NWTO_DEL_RST_MASK)) { - new_en_flags |= (old_en_flags & - NWTO_DEL_RST_MASK); - new_di_flags |= (old_di_flags & - NWTO_DEL_RST_MASK); + new_en_flags |= (old_en_flags & NWTO_DEL_RST_MASK); + new_di_flags |= (old_di_flags & NWTO_DEL_RST_MASK); } - newopt.nwto_flags= ((unsigned long)new_di_flags - << 16) | new_en_flags; + /* NWTO_BULK_MASK */ + if (!((new_en_flags | new_di_flags) & NWTO_BULK_MASK)) + { + new_en_flags |= (old_en_flags & NWTO_BULK_MASK); + new_di_flags |= (old_di_flags & NWTO_BULK_MASK); + } + + newopt.nwto_flags= ((unsigned long)new_di_flags << 16) | + new_en_flags; tcp_fd->tf_tcpopt= newopt; if (newopt.nwto_flags & NWTO_SND_URG) tcp_fd->tf_flags |= TFF_WR_URG; @@ -1046,13 +1132,9 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t)); if (tcp_fd->tf_conn) { if (newopt.nwto_flags & NWTO_BSD_URG) - { tcp_fd->tf_conn->tc_flags |= TCF_BSD_URG; - } else - { tcp_fd->tf_conn->tc_flags &= ~TCF_BSD_URG; - } } if (newopt.nwto_flags & NWTO_DEL_RST) @@ -1060,6 +1142,11 @@ assert (data->acc_length == sizeof(nwio_tcpopt_t)); else tcp_fd->tf_flags &= ~TFF_DEL_RST; + if (newopt.nwto_flags & NWTO_BULK) + tcp_fd->tf_flags &= ~TFF_PUSH_DATA; + else + tcp_fd->tf_flags |= TFF_PUSH_DATA; + bf_afree(data); tcp_fd->tf_flags &= ~TFF_IOCTL_IP; reply_thr_get(tcp_fd, NW_OK, TRUE); @@ -1072,20 +1159,20 @@ int fd; { tcpport_t port, nw_port; - nw_port= htons(0xC000+fd); - if (is_unused_port(nw_port)) - return nw_port; - - for (port= 0xC000+TCP_FD_NR; port < 0xFFFF; port++) + for (port= 0x8000+fd; port < 0xffff-TCP_FD_NR; port+= TCP_FD_NR) + { + nw_port= htons(port); + if (is_unused_port(nw_port)) + return nw_port; + } + for (port= 0x8000; port < 0xffff; port++) { nw_port= htons(port); if (is_unused_port(nw_port)) return nw_port; } -#if !CRAMPED ip_panic(( "unable to find unused port (shouldn't occur)" )); return 0; -#endif } PRIVATE int is_unused_port(port) @@ -1103,9 +1190,9 @@ tcpport_t port; if (tcp_fd->tf_tcpconf.nwtc_locport == port) return FALSE; } - for (i= ip_conf_nr, tcp_conn= tcp_conn_table+i; + for (i= tcp_conf_nr, tcp_conn= tcp_conn_table+i; itc_flags & TCF_INUSE)) continue; @@ -1115,8 +1202,7 @@ tcpport_t port; return TRUE; } -PRIVATE int -reply_thr_put(tcp_fd, reply, for_ioctl) +PRIVATE int reply_thr_put(tcp_fd, reply, for_ioctl) tcp_fd_t *tcp_fd; int reply; int for_ioctl; @@ -1142,7 +1228,6 @@ PUBLIC int tcp_su4listen(tcp_fd) tcp_fd_t *tcp_fd; { tcp_conn_t *tcp_conn; - acc_t *tmp_acc; tcp_conn= tcp_fd->tf_conn; @@ -1157,8 +1242,7 @@ tcp_fd_t *tcp_fd; else tcp_conn->tc_remaddr= 0; - tcp_setup_conn(tcp_conn); - tcp_conn->tc_port= tcp_fd->tf_port; + tcp_setup_conn(tcp_fd->tf_port, tcp_conn); tcp_conn->tc_fd= tcp_fd; tcp_conn->tc_connInprogress= 1; tcp_conn->tc_orglisten= TRUE; @@ -1179,11 +1263,10 @@ PRIVATE tcp_conn_t *find_empty_conn() { int i; tcp_conn_t *tcp_conn; - int state; - for (i=ip_conf_nr, tcp_conn= tcp_conn_table+i; + for (i=tcp_conf_nr, tcp_conn= tcp_conn_table+i; itc_flags == TCF_EMPTY) @@ -1300,9 +1383,9 @@ tcp_hdr_t *tcp_hdr; best_level= 0; best_conn= NULL; listen_conn= NULL; - for (i= ip_conf_nr, tcp_conn= tcp_conn_table+i; + for (i= tcp_conf_nr, tcp_conn= tcp_conn_table+i; itc_flags & TCF_INUSE)) @@ -1379,7 +1462,7 @@ tcp_hdr_t *tcp_hdr; return NULL; } - for (i=0, tcp_conn= tcp_conn_table; itc_flags & TCF_INUSE)) @@ -1538,14 +1621,14 @@ size_t count; tcp_fd->tf_write_offset= 0; tcp_fd->tf_write_count= count; + /* New data may cause a segment to be sent. Clear PUSH_NOW + * from last NWIOTCPPUSH ioctl. + */ + tcp_conn->tc_flags &= ~(TCF_NO_PUSH|TCF_PUSH_NOW); + /* Start the timer (if necessary) */ - if (tcp_conn->tc_SND_UNA == tcp_conn->tc_SND_NXT && - tcp_conn->tc_transmit_seq == tcp_conn->tc_SND_UNA) - { + if (tcp_conn->tc_SND_TRM == tcp_conn->tc_SND_UNA) tcp_set_send_timer(tcp_conn); - } - assert(tcp_conn->tc_transmit_timer.tim_active || - (tcp_print_conn(tcp_conn), printf("\n"), 0)); assert(tcp_conn->tc_busy == 0); tcp_conn->tc_busy++; @@ -1677,7 +1760,6 @@ int which_operation; { tcp_fd_t *tcp_fd; tcp_conn_t *tcp_conn; - int i; tcp_fd= &tcp_fd_table[fd]; @@ -1733,10 +1815,9 @@ assert (tcp_fd->tf_flags & TFF_IOCTL_IP); break; } break; -#if !CRAMPED default: ip_panic(( "unknown cancel request" )); -#endif + break; } return NW_OK; } @@ -1749,7 +1830,6 @@ PRIVATE int tcp_connect(tcp_fd) tcp_fd_t *tcp_fd; { tcp_conn_t *tcp_conn; - int state; if (!(tcp_fd->tf_flags & TFF_CONF_SET)) { @@ -1803,7 +1883,6 @@ PRIVATE int tcp_su4connect(tcp_fd) tcp_fd_t *tcp_fd; { tcp_conn_t *tcp_conn; - acc_t *tmp_acc; tcp_conn= tcp_fd->tf_conn; @@ -1815,10 +1894,9 @@ tcp_fd_t *tcp_fd; tcp_conn->tc_remport= tcp_fd->tf_tcpconf.nwtc_remport; tcp_conn->tc_remaddr= tcp_fd->tf_tcpconf.nwtc_remaddr; - tcp_setup_conn(tcp_conn); + tcp_setup_conn(tcp_fd->tf_port, tcp_conn); tcp_conn->tc_fd= tcp_fd; - tcp_conn->tc_port= tcp_fd->tf_port; tcp_conn->tc_connInprogress= 1; tcp_conn->tc_orglisten= FALSE; tcp_conn->tc_state= TCS_SYN_SENT; @@ -1835,30 +1913,6 @@ tcp_fd_t *tcp_fd; return NW_OK; } -PRIVATE int conn_right4fd(tcp_conn, tcp_fd) -tcp_fd_t *tcp_fd; -tcp_conn_t *tcp_conn; -{ - unsigned long flags; - - flags= tcp_fd->tf_tcpconf.nwtc_flags; - - if (tcp_fd->tf_tcpconf.nwtc_locport != tcp_conn->tc_locport) - return FALSE; - - if ((flags & NWTC_SET_RA) && tcp_fd->tf_tcpconf.nwtc_remaddr != - tcp_conn->tc_remaddr) - return FALSE; - - if ((flags & NWTC_SET_RP) && tcp_fd->tf_tcpconf.nwtc_remport != - tcp_conn->tc_remport) - return FALSE; - - if (tcp_fd->tf_port != tcp_conn->tc_port) - return FALSE; - - return TRUE; -} /* tcp_listen @@ -1868,7 +1922,6 @@ PRIVATE int tcp_listen(tcp_fd) tcp_fd_t *tcp_fd; { tcp_conn_t *tcp_conn; - int state; if (!(tcp_fd->tf_flags & TFF_CONF_SET)) { @@ -2005,7 +2058,7 @@ PRIVATE void tcp_bufcheck() tcp_conn_t *tcp_conn; tcp_port_t *tcp_port; - for (i= 0, tcp_port= tcp_port_table; itp_pack) bf_check_acc(tcp_port->tp_pack); @@ -2041,7 +2094,7 @@ tcp_conn_t *tcp_conn; tcp_close_connection(tcp_conn, EDSTNOTRCH); return; } - else if (new_ttl == TCP_DEF_TTL) + else if (new_ttl < TCP_DEF_TTL_NEXT) new_ttl= TCP_DEF_TTL_NEXT; else { @@ -2055,14 +2108,154 @@ tcp_conn_t *tcp_conn; tcp_conn_write(tcp_conn, 1); } +FORWARD u32_t mtu_table[]= +{ /* From RFC-1191 */ +/* Plateau MTU Comments Reference */ +/* ------ --- -------- --------- */ +/* 65535 Official maximum MTU RFC 791 */ +/* 65535 Hyperchannel RFC 1044 */ + 65535, + 32000, /* Just in case */ +/* 17914 16Mb IBM Token Ring ref. [6] */ + 17914, +/* 8166 IEEE 802.4 RFC 1042 */ + 8166, +/* 4464 IEEE 802.5 (4Mb max) RFC 1042 */ +/* 4352 FDDI (Revised) RFC 1188 */ + 4352, /* (1%) */ +/* 2048 Wideband Network RFC 907 */ +/* 2002 IEEE 802.5 (4Mb recommended) RFC 1042 */ + 2002, /* (2%) */ +/* 1536 Exp. Ethernet Nets RFC 895 */ +/* 1500 Ethernet Networks RFC 894 */ +/* 1500 Point-to-Point (default) RFC 1134 */ +/* 1492 IEEE 802.3 RFC 1042 */ + 1492, /* (3%) */ +/* 1006 SLIP RFC 1055 */ +/* 1006 ARPANET BBN 1822 */ + 1006, +/* 576 X.25 Networks RFC 877 */ +/* 544 DEC IP Portal ref. [10] */ +/* 512 NETBIOS RFC 1088 */ +/* 508 IEEE 802/Source-Rt Bridge RFC 1042 */ +/* 508 ARCNET RFC 1051 */ + 508, /* (13%) */ +/* 296 Point-to-Point (low delay) RFC 1144 */ + 296, + 68, /* Official minimum MTU RFC 791 */ + 0, /* End of list */ +}; + +PUBLIC void tcp_mtu_exceeded(tcp_conn) +tcp_conn_t *tcp_conn; +{ + u16_t mtu; + int i; + clock_t curr_time; + + if (!(tcp_conn->tc_flags & TCF_PMTU)) + { + /* Strange, got MTU exceeded but DF is not set. Ignore + * the error. If the problem persists, the connection will + * time-out. + */ + return; + } + curr_time= get_time(); + + /* We get here in cases. Either were are trying to find an MTU + * that works at all, or we are trying see how far we can increase + * the current MTU. If the last change to the MTU was a long time + * ago, we assume the second case. + */ + if (curr_time >= tcp_conn->tc_mtutim + TCP_PMTU_INCR_IV) + { + mtu= tcp_conn->tc_mtu; + mtu -= mtu/TCP_PMTU_INCR_FRAC; + tcp_conn->tc_mtu= mtu; + tcp_conn->tc_mtutim= curr_time; + DBLOCK(1, printf( + "tcp_mtu_exceeded: new (lowered) mtu %d for conn %d\n", + mtu, tcp_conn-tcp_conn_table)); + tcp_conn->tc_stt= 0; + tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA; + tcp_conn_write(tcp_conn, 1); + return; + } + + tcp_conn->tc_mtutim= curr_time; + mtu= tcp_conn->tc_mtu; + for (i= 0; mtu_table[i] >= mtu; i++) + ; /* Nothing to do */ + mtu= mtu_table[i]; + if (mtu >= TCP_MIN_PATH_MTU) + { + tcp_conn->tc_mtu= mtu; + } + else + { + /* Small MTUs can be used for denial-of-service attacks. + * Switch-off PMTU if the MTU becomes too small. + */ + tcp_conn->tc_flags &= ~TCF_PMTU; + tcp_conn->tc_mtu= TCP_MIN_PATH_MTU; + DBLOCK(1, printf( + "tcp_mtu_exceeded: clearing TCF_PMTU for conn %d\n", + tcp_conn-tcp_conn_table);); + + } + DBLOCK(1, printf("tcp_mtu_exceeded: new mtu %d for conn %d\n", + mtu, tcp_conn-tcp_conn_table);); + tcp_conn->tc_stt= 0; + tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA; + tcp_conn_write(tcp_conn, 1); +} + +PUBLIC void tcp_mtu_incr(tcp_conn) +tcp_conn_t *tcp_conn; +{ + clock_t curr_time; + u32_t mtu; + + assert(tcp_conn->tc_mtu < tcp_conn->tc_max_mtu); + if (!(tcp_conn->tc_flags & TCF_PMTU)) + { + /* Use a much longer time-out for retrying PMTU discovery + * after is has been disabled. Note that PMTU discovery + * can be disabled during a short loss of connectivity. + */ + curr_time= get_time(); + if (curr_time > tcp_conn->tc_mtutim+TCP_PMTU_EN_IV) + { + tcp_conn->tc_flags |= TCF_PMTU; + DBLOCK(1, printf( + "tcp_mtu_incr: setting TCF_PMTU for conn %d\n", + tcp_conn-tcp_conn_table);); + } + return; + } + + mtu= tcp_conn->tc_mtu; + mtu += mtu/TCP_PMTU_INCR_FRAC; + if (mtu > tcp_conn->tc_max_mtu) + mtu= tcp_conn->tc_max_mtu; + tcp_conn->tc_mtu= mtu; + DBLOCK(0x1, printf("tcp_mtu_incr: new mtu %ld for conn %d\n", + mtu, tcp_conn-tcp_conn_table);); +} + /* tcp_setup_conn */ -PRIVATE void tcp_setup_conn(tcp_conn) +PRIVATE void tcp_setup_conn(tcp_port, tcp_conn) +tcp_port_t *tcp_port; tcp_conn_t *tcp_conn; { + u16_t mss; + assert(!tcp_conn->tc_connInprogress); + tcp_conn->tc_port= tcp_port; if (tcp_conn->tc_flags & TCF_INUSE) { assert (tcp_conn->tc_state == TCS_CLOSED); @@ -2082,13 +2275,13 @@ tcp_conn_t *tcp_conn; } if (!tcp_conn->tc_ISS) { - tcp_conn->tc_ISS= (get_time()/HZ)*ISS_INC_FREQ; + tcp_conn->tc_ISS= tcp_rand32(); } tcp_conn->tc_SND_UNA= tcp_conn->tc_ISS; tcp_conn->tc_SND_TRM= tcp_conn->tc_ISS; tcp_conn->tc_SND_NXT= tcp_conn->tc_ISS+1; tcp_conn->tc_SND_UP= tcp_conn->tc_ISS; - tcp_conn->tc_SND_PSH= tcp_conn->tc_ISS; + tcp_conn->tc_SND_PSH= tcp_conn->tc_ISS+1; tcp_conn->tc_IRS= 0; tcp_conn->tc_RCV_LO= tcp_conn->tc_IRS; tcp_conn->tc_RCV_NXT= tcp_conn->tc_IRS; @@ -2098,6 +2291,9 @@ tcp_conn_t *tcp_conn; assert(tcp_conn->tc_rcvd_data == NULL); assert(tcp_conn->tc_adv_data == NULL); assert(tcp_conn->tc_send_data == NULL); + + tcp_conn->tc_ka_time= TCP_DEF_KEEPALIVE; + tcp_conn->tc_remipopt= NULL; tcp_conn->tc_tcpopt= NULL; @@ -2106,10 +2302,15 @@ tcp_conn_t *tcp_conn; tcp_conn->tc_stt= 0; tcp_conn->tc_rt_dead= TCP_DEF_RT_DEAD; tcp_conn->tc_0wnd_to= 0; + tcp_conn->tc_artt= TCP_DEF_RTT*TCP_RTT_SCALE; + tcp_conn->tc_drtt= 0; tcp_conn->tc_rtt= TCP_DEF_RTT; - tcp_conn->tc_mss= TCP_DEF_MSS; + tcp_conn->tc_max_mtu= tcp_conn->tc_port->tp_mtu; + tcp_conn->tc_mtu= tcp_conn->tc_max_mtu; + tcp_conn->tc_mtutim= 0; tcp_conn->tc_error= NW_OK; - tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + 2*tcp_conn->tc_mss; + mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE; + tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + 2*mss; tcp_conn->tc_snd_cthresh= TCP_MAX_SND_WND_SIZE; tcp_conn->tc_snd_cinc= (long)TCP_DEF_MSS*TCP_DEF_MSS/TCP_MAX_SND_WND_SIZE+1; @@ -2118,11 +2319,20 @@ tcp_conn_t *tcp_conn; tcp_conn->tc_rt_seq= 0; tcp_conn->tc_rt_threshold= tcp_conn->tc_ISS; tcp_conn->tc_flags= TCF_INUSE; + tcp_conn->tc_flags |= TCF_PMTU; clck_untimer(&tcp_conn->tc_transmit_timer); tcp_conn->tc_transmit_seq= 0; } +PRIVATE u32_t tcp_rand32() +{ + u8_t bits[32]; + + rand256(bits); + return bits[0] | (bits[1] << 8) | (bits[2] << 16) | (bits[3] << 24); +} + /* - * $PchId: tcp.c,v 1.14.2.2 1999/11/17 22:05:27 philip Exp $ + * $PchId: tcp.c,v 1.34 2005/06/28 14:20:27 philip Exp $ */ diff --git a/servers/inet/generic/tcp.h b/servers/inet/generic/tcp.h index 53194d07c..7b96d44e1 100644 --- a/servers/inet/generic/tcp.h +++ b/servers/inet/generic/tcp.h @@ -7,19 +7,18 @@ Copyright 1995 Philip Homburg #ifndef TCP_H #define TCP_H -#define ISS_INC_FREQ 250000L #define TCP_MAX_DATAGRAM 8192 #ifndef TCP_MAX_SND_WND_SIZE -#define TCP_MAX_SND_WND_SIZE ((CRAMPED ? 4 : 16) * 1024) +#define TCP_MAX_SND_WND_SIZE (32*1024) #endif #ifndef TCP_MIN_RCV_WND_SIZE -#define TCP_MIN_RCV_WND_SIZE (4 * 1024) +#define TCP_MIN_RCV_WND_SIZE (4*1024) #endif #ifndef TCP_MAX_RCV_WND_SIZE -#define TCP_MAX_RCV_WND_SIZE ((CRAMPED ? 4 : 8) * 1024) +#define TCP_MAX_RCV_WND_SIZE (TCP_MIN_RCV_WND_SIZE + 28*1024) #endif #define TCP_DEF_TOS 0 @@ -50,22 +49,39 @@ Copyright 1995 Philip Homburg #define TCP_RTT_MAX (10*HZ) /* The maximum retransmission interval * is TCP_RTT_MAX ticks */ +#define TCP_RTT_SMOOTH 16 /* weight is 15/16 */ +#define TCP_DRTT_MULT 4 /* weight of the deviation */ +#define TCP_RTT_SCALE 256 /* Scaled values for more accuracy */ + +#ifndef TCP_DEF_KEEPALIVE +#define TCP_DEF_KEEPALIVE (20L*60*HZ) /* Keepalive interval */ +#endif #ifndef TCP_DEF_MSS #define TCP_DEF_MSS 1400 #endif +#define TCP_MIN_PATH_MTU 500 +#define TCP_PMTU_INCR_IV (1L*60*HZ) /* 1 minute in ticks */ +#define TCP_PMTU_EN_IV (10L*60*HZ) /* 10 minutes in ticks */ +#define TCP_PMTU_INCR_FRAC 100 /* Add 1% each time */ +#define TCP_PMTU_BLACKHOLE (10*HZ) /* Assume a PMTU blackhole + * after 10 seconds. + */ + #define TCP_DEF_CONF (NWTC_COPY | NWTC_LP_UNSET | NWTC_UNSET_RA | \ NWTC_UNSET_RP) #define TCP_DEF_OPT (NWTO_NOFLAG) +#define TCP_DACK_RETRANS 3 /* # dup ACKs to start fast retrans. */ + struct acc; void tcp_prep ARGS(( void )); void tcp_init ARGS(( void )); int tcp_open ARGS(( int port, int srfd, get_userdata_t get_userdata, put_userdata_t put_userdata, - put_pkt_t put_pkt )); + put_pkt_t put_pkt, select_res_t select_res )); int tcp_read ARGS(( int fd, size_t count)); int tcp_write ARGS(( int fd, size_t count)); int tcp_ioctl ARGS(( int fd, ioreq_t req)); @@ -75,5 +91,5 @@ void tcp_close ARGS(( int fd)); #endif /* TCP_H */ /* - * $PchId: tcp.h,v 1.8 1996/05/07 20:51:37 philip Exp $ + * $PchId: tcp.h,v 1.17 2005/06/28 14:20:54 philip Exp $ */ diff --git a/servers/inet/generic/tcp_int.h b/servers/inet/generic/tcp_int.h index 0e59ca55a..8d6f47987 100644 --- a/servers/inet/generic/tcp_int.h +++ b/servers/inet/generic/tcp_int.h @@ -7,6 +7,8 @@ Copyright 1995 Philip Homburg #ifndef TCP_INT_H #define TCP_INT_H +#define IP_TCP_MIN_HDR_SIZE (IP_MIN_HDR_SIZE+TCP_MIN_HDR_SIZE) + #define TCP_CONN_HASH_SHIFT 4 #define TCP_CONN_HASH_NR (1 << TCP_CONN_HASH_SHIFT) @@ -18,6 +20,8 @@ typedef struct tcp_port int tp_ipfd; acc_t *tp_pack; ipaddr_t tp_ipaddr; + ipaddr_t tp_subnetmask; + u16_t tp_mtu; struct tcp_conn *tp_snd_head; struct tcp_conn *tp_snd_tail; event_t tp_snd_event; @@ -48,6 +52,7 @@ typedef struct tcp_fd nwio_tcpopt_t tf_tcpopt; get_userdata_t tf_get_userdata; put_userdata_t tf_put_userdata; + select_res_t tf_select_res; struct tcp_conn *tf_conn; size_t tf_write_offset; size_t tf_write_count; @@ -57,23 +62,26 @@ typedef struct tcp_fd #define TFF_EMPTY 0x0 #define TFF_INUSE 0x1 -#define TFF_IOCTL_IP 0x2 -#define TFF_CONF_SET 0x4 -#define TFF_IOC_INIT_SP 0x8 -#define TFF_CONNECT 0x20 -#define TFF_WRITE_IP 0x80 +#define TFF_READ_IP 0x2 +#define TFF_WRITE_IP 0x4 +#define TFF_IOCTL_IP 0x8 +#define TFF_CONF_SET 0x10 +#define TFF_IOC_INIT_SP 0x20 +#define TFF_CONNECT 0x40 +#define TFF_CONNECTED 0x80 #define TFF_WR_URG 0x100 #define TFF_PUSH_DATA 0x200 -#define TFF_READ_IP 0x400 -#define TFF_RECV_URG 0x800 -#define TFF_CONNECTED 0x1000 -#define TFF_DEL_RST 0x2000 +#define TFF_RECV_URG 0x400 +#define TFF_DEL_RST 0x800 +#define TFF_SEL_READ 0x1000 +#define TFF_SEL_WRITE 0x2000 +#define TFF_SEL_EXCEPT 0x4000 typedef struct tcp_conn { int tc_flags; int tc_state; - int tc_busy; /* do not steal buffer when a counnection is + int tc_busy; /* do not steal buffer when a connection is * busy */ tcp_port_t *tc_port; @@ -84,11 +92,9 @@ typedef struct tcp_conn tcpport_t tc_remport; ipaddr_t tc_remaddr; -#if 1 int tc_connInprogress; -#endif int tc_orglisten; - time_t tc_senddis; + clock_t tc_senddis; /* Sending side */ u32_t tc_ISS; /* initial sequence number */ @@ -104,12 +110,15 @@ typedef struct tcp_conn u32_t tc_snd_cthresh; /* threshold for send window */ u32_t tc_snd_cinc; /* increment for send window threshold */ u16_t tc_snd_wnd; /* max send queue size */ + u16_t tc_snd_dack; /* # of duplicate ACKs */ /* round trip calculation. */ - time_t tc_rt_time; + clock_t tc_rt_time; u32_t tc_rt_seq; u32_t tc_rt_threshold; - time_t tc_rtt; + clock_t tc_artt; /* Avg. retransmission time. Scaled. */ + clock_t tc_drtt; /* Diviation, also scaled. */ + clock_t tc_rtt; /* Computed retrans time */ acc_t *tc_send_data; acc_t *tc_frag2send; @@ -127,17 +136,28 @@ typedef struct tcp_conn acc_t *tc_adv_data; u32_t tc_adv_seq; + /* Keep alive. Record SDN_NXT and RCV_NXT in tc_ka_snd and + * tc_ka_rcv when setting the keepalive timer to detect + * any activity that may have happend before the timer + * expired. + */ + u32_t tc_ka_snd; + u32_t tc_ka_rcv; + clock_t tc_ka_time; + acc_t *tc_remipopt; acc_t *tc_tcpopt; u8_t tc_tos; u8_t tc_ttl; - u16_t tc_mss; + u16_t tc_max_mtu; /* Max. negotiated (or selected) MTU */ + u16_t tc_mtu; /* discovered PMTU */ + clock_t tc_mtutim; /* Last time MTU/TCF_PMTU flag was changed */ struct timer tc_transmit_timer; u32_t tc_transmit_seq; - time_t tc_0wnd_to; - time_t tc_stt; /* time of first send after last ack */ - time_t tc_rt_dead; + clock_t tc_0wnd_to; + clock_t tc_stt; /* time of first send after last ack */ + clock_t tc_rt_dead; int tc_error; int tc_inconsistent; @@ -151,6 +171,9 @@ typedef struct tcp_conn #define TCF_SEND_ACK 0x10 #define TCF_FIN_SENT 0x20 #define TCF_BSD_URG 0x40 +#define TCF_NO_PUSH 0x80 +#define TCF_PUSH_NOW 0x100 +#define TCF_PMTU 0x200 #if DEBUG & 0x200 #define TCF_DEBUG 0x1000 @@ -167,13 +190,18 @@ typedef struct tcp_conn void tcp_frag2conn ARGS(( tcp_conn_t *tcp_conn, ip_hdr_t *ip_hdr, tcp_hdr_t *tcp_hdr, acc_t *tcp_data, size_t data_len )); void tcp_fd_read ARGS(( tcp_conn_t *tcp_conn, int enq )); +unsigned tcp_sel_read ARGS(( tcp_conn_t *tcp_conn )); +void tcp_rsel_read ARGS(( tcp_conn_t *tcp_conn )); /* tcp_send.c */ void tcp_conn_write ARGS(( tcp_conn_t *tcp_conn, int enq )); void tcp_release_retrans ARGS(( tcp_conn_t *tcp_conn, u32_t seg_ack, U16_t new_win )); +void tcp_fast_retrans ARGS(( tcp_conn_t *tcp_conn )); void tcp_set_send_timer ARGS(( tcp_conn_t *tcp_conn )); void tcp_fd_write ARGS(( tcp_conn_t *tcp_conn )); +unsigned tcp_sel_write ARGS(( tcp_conn_t *tcp_conn )); +void tcp_rsel_write ARGS(( tcp_conn_t *tcp_conn )); void tcp_close_connection ARGS(( tcp_conn_t *tcp_conn, int error )); void tcp_port_write ARGS(( tcp_port_t *tcp_port )); @@ -183,7 +211,7 @@ void tcp_shutdown ARGS(( tcp_conn_t *tcp_conn )); void tcp_extract_ipopt ARGS(( tcp_conn_t *tcp_conn, ip_hdr_t *ip_hdr )); void tcp_extract_tcpopt ARGS(( tcp_conn_t *tcp_conn, - tcp_hdr_t *tcp_hdr )); + tcp_hdr_t *tcp_hdr, size_t *mssp )); void tcp_get_ipopt ARGS(( tcp_conn_t *tcp_conn, ip_hdropt_t *ip_hdropt )); void tcp_get_tcpopt ARGS(( tcp_conn_t *tcp_conn, tcp_hdropt_t @@ -207,6 +235,8 @@ void tcp_reply_ioctl ARGS(( tcp_fd_t *tcp_fd, int reply )); void tcp_reply_write ARGS(( tcp_fd_t *tcp_fd, size_t reply )); void tcp_reply_read ARGS(( tcp_fd_t *tcp_fd, size_t reply )); void tcp_notreach ARGS(( tcp_conn_t *tcp_conn )); +void tcp_mtu_exceeded ARGS(( tcp_conn_t *tcp_conn )); +void tcp_mtu_incr ARGS(( tcp_conn_t *tcp_conn )); #define TCP_FD_NR (10*IP_PORT_MAX) #define TCP_CONN_NR (2*TCP_FD_NR) @@ -223,5 +253,5 @@ EXTERN tcp_fd_t tcp_fd_table[TCP_FD_NR]; #endif /* TCP_INT_H */ /* - * $PchId: tcp_int.h,v 1.10 1996/05/07 20:51:59 philip Exp $ + * $PchId: tcp_int.h,v 1.17 2005/06/28 14:21:08 philip Exp $ */ diff --git a/servers/inet/generic/tcp_lib.c b/servers/inet/generic/tcp_lib.c index 243262e68..ddd849e44 100644 --- a/servers/inet/generic/tcp_lib.c +++ b/servers/inet/generic/tcp_lib.c @@ -16,8 +16,6 @@ Copyright 1995 Philip Homburg THIS_FILE -#if you_want_to_be_complete - #undef tcp_LEmod4G PUBLIC int tcp_LEmod4G(n1, n2) u32_t n1; @@ -49,7 +47,6 @@ u32_t n2; { return !!((u32_t)(n2-n1) & 0x80000000L); } -#endif PUBLIC void tcp_extract_ipopt(tcp_conn, ip_hdr) tcp_conn_t *tcp_conn; @@ -64,17 +61,62 @@ ip_hdr_t *ip_hdr; DBLOCK(1, printf("ip_hdr options NOT supported (yet?)\n")); } -PUBLIC void tcp_extract_tcpopt(tcp_conn, tcp_hdr) +PUBLIC void tcp_extract_tcpopt(tcp_conn, tcp_hdr, mssp) tcp_conn_t *tcp_conn; tcp_hdr_t *tcp_hdr; +size_t *mssp; { - int tcp_hdr_len; + int i, tcp_hdr_len, type, len; + u8_t *cp; + u16_t mss; + + *mssp= 0; /* No mss */ tcp_hdr_len= (tcp_hdr->th_data_off & TH_DO_MASK) >> 2; if (tcp_hdr_len == TCP_MIN_HDR_SIZE) return; - - DBLOCK(2, printf("tcp_hdr options NOT supported (yet?)\n")); + i= TCP_MIN_HDR_SIZE; + while (i tcp_hdr_len) + break; /* No length field */ + len= cp[1]; + if (i+len > tcp_hdr_len) + break; /* Truncated option */ + i += len; + switch(type) + { + case TCP_OPT_MSS: + if (len != 4) + break; + mss= (cp[2] << 8) | cp[3]; + DBLOCK(1, printf("tcp_extract_tcpopt: got mss %d\n", + mss);); + *mssp= mss; + break; + case TCP_OPT_WSOPT: /* window scale option */ + case TCP_OPT_SACKOK: /* SACK permitted */ + case TCP_OPT_TS: /* Timestamps option */ + case TCP_OPT_CCNEW: /* new connection count */ + /* Ignore this option. */ + break; + default: + DBLOCK(0x1, + printf( + "tcp_extract_tcpopt: unknown option %d, len %d\n", + type, len)); + break; + } + } } PUBLIC u16_t tcp_pack_oneCsum(ip_hdr, tcp_pack) @@ -239,6 +281,8 @@ acc_t *data; ip_hdr->ih_src= tcp_conn->tc_locaddr; ip_hdr->ih_dst= tcp_conn->tc_remaddr; ip_hdr->ih_flags_fragoff= 0; + if (tcp_conn->tc_flags & TCF_PMTU) + ip_hdr->ih_flags_fragoff |= HTONS(IH_DONT_FRAG); tcp_hdr->th_srcport= tcp_conn->tc_locport; tcp_hdr->th_dstport= tcp_conn->tc_remport; @@ -251,10 +295,10 @@ acc_t *data; return hdr_acc; } -#if !CRAMPED PUBLIC void tcp_print_state (tcp_conn) tcp_conn_t *tcp_conn; { +#if DEBUG printf("tcp_conn_table[%d]->tc_state= ", tcp_conn- tcp_conn_table); if (!(tcp_conn->tc_flags & TCF_INUSE)) @@ -272,8 +316,8 @@ tcp_conn_t *tcp_conn; case TCS_CLOSING: printf("CLOSING"); break; default: printf("unknown (=%d)", tcp_conn->tc_state); break; } -} #endif +} PUBLIC int tcp_check_conn(tcp_conn) tcp_conn_t *tcp_conn; @@ -286,9 +330,7 @@ tcp_conn_t *tcp_conn; if (tcp_conn->tc_inconsistent) { assert(tcp_conn->tc_inconsistent == 1); -#if !CRAMPED printf("tcp_check_conn: connection is inconsistent\n"); -#endif return allright; } @@ -308,38 +350,34 @@ tcp_conn_t *tcp_conn; size= hi_queue-lo_queue; if (size<0) { -#if !CRAMPED printf("rcv hi_queue-lo_queue < 0\n"); - printf("SND_NXT= 0x%x, SND_UNA= 0x%x\n", - tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA); - printf("lo_queue= 0x%x, hi_queue= 0x%x\n", - lo_queue, hi_queue); + printf("SND_NXT= 0x%lx, SND_UNA= 0x%lx\n", + (unsigned long)tcp_conn->tc_SND_NXT, + (unsigned long)tcp_conn->tc_SND_UNA); + printf("lo_queue= 0x%lx, hi_queue= 0x%lx\n", + (unsigned long)lo_queue, + (unsigned long)hi_queue); printf("size= %d\n", size); -#endif allright= FALSE; } else if (!tcp_conn->tc_rcvd_data) { if (size) { -#if !CRAMPED printf("RCV_NXT-RCV_LO != 0\n"); tcp_print_conn(tcp_conn); printf("lo_queue= %lu, hi_queue= %lu\n", lo_queue, hi_queue); -#endif allright= FALSE; } } else if (size != bf_bufsize(tcp_conn->tc_rcvd_data)) { -#if !CRAMPED printf("RCV_NXT-RCV_LO != sizeof tc_rcvd_data\n"); tcp_print_conn(tcp_conn); printf( "lo_queue= %lu, hi_queue= %lu, sizeof tc_rcvd_data= %d\n", lo_queue, hi_queue, bf_bufsize(tcp_conn->tc_rcvd_data)); -#endif allright= FALSE; } else if (size != 0 && (tcp_conn->tc_state == TCS_CLOSED || @@ -347,18 +385,15 @@ tcp_conn_t *tcp_conn; tcp_conn->tc_state == TCS_SYN_RECEIVED || tcp_conn->tc_state == TCS_SYN_SENT)) { -#if !CRAMPED printf("received data but not connected\n"); tcp_print_conn(tcp_conn); -#endif allright= FALSE; } if (tcp_Lmod4G(tcp_conn->tc_RCV_HI, tcp_conn->tc_RCV_NXT)) { -#if !CRAMPED - printf("tc_RCV_HI (%d) < tc_RCV_NXT (%d)\n", - tcp_conn->tc_RCV_HI, tcp_conn->tc_RCV_NXT); -#endif + printf("tc_RCV_HI (0x%lx) < tc_RCV_NXT (0x%lx)\n", + (unsigned long)tcp_conn->tc_RCV_HI, + (unsigned long)tcp_conn->tc_RCV_NXT); allright= FALSE; } @@ -380,71 +415,64 @@ tcp_conn_t *tcp_conn; size= hi_queue-lo_queue; if (size<0) { -#if !CRAMPED printf("snd hi_queue-lo_queue < 0\n"); - printf("SND_ISS= 0x%x, SND_UNA= 0x%x, SND_NXT= 0x%x\n", - tcp_conn->tc_ISS, tcp_conn->tc_SND_UNA, - tcp_conn->tc_SND_NXT); - printf("hi_queue= 0x%x, lo_queue= 0x%x, size= %d\n", - hi_queue, lo_queue, size); -#endif + printf("SND_ISS= 0x%lx, SND_UNA= 0x%lx, SND_NXT= 0x%lx\n", + (unsigned long)tcp_conn->tc_ISS, + (unsigned long)tcp_conn->tc_SND_UNA, + (unsigned long)tcp_conn->tc_SND_NXT); + printf("hi_queue= 0x%lx, lo_queue= 0x%lx, size= %d\n", + (unsigned long)hi_queue, (unsigned long)lo_queue, + size); allright= FALSE; } else if (!tcp_conn->tc_send_data) { if (size) { -#if !CRAMPED printf("SND_NXT-SND_UNA != 0\n"); - printf("SND_NXT= %d, SND_UNA= %d\n", - tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA); - printf("lo_queue= %d, hi_queue= %d\n", - lo_queue, hi_queue); -#endif + printf("SND_NXT= 0x%lx, SND_UNA= 0x%lx\n", + (unsigned long)tcp_conn->tc_SND_NXT, + (unsigned long)tcp_conn->tc_SND_UNA); + printf("lo_queue= 0x%lx, hi_queue= 0x%lx\n", + (unsigned long)lo_queue, + (unsigned long)hi_queue); allright= FALSE; } } else if (size != bf_bufsize(tcp_conn->tc_send_data)) { -#if !CRAMPED printf("SND_NXT-SND_UNA != sizeof tc_send_data\n"); - printf("SND_NXT= %d, SND_UNA= %d\n", - tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA); - printf("lo_queue= %d, lo_queue= %d\n", - lo_queue, hi_queue); + printf("SND_NXT= 0x%lx, SND_UNA= 0x%lx\n", + (unsigned long)tcp_conn->tc_SND_NXT, + (unsigned long)tcp_conn->tc_SND_UNA); + printf("lo_queue= 0x%lx, lo_queue= 0x%lx\n", + (unsigned long)lo_queue, + (unsigned long)hi_queue); printf("bf_bufsize(data)= %d\n", bf_bufsize(tcp_conn->tc_send_data)); -#endif + allright= FALSE; } /* checking counters */ if (!tcp_GEmod4G(tcp_conn->tc_SND_UNA, tcp_conn->tc_ISS)) { -#if !CRAMPED printf("SND_UNA < ISS\n"); -#endif allright= FALSE; } if (!tcp_GEmod4G(tcp_conn->tc_SND_NXT, tcp_conn->tc_SND_UNA)) { -#if !CRAMPED printf("SND_NXTtc_SND_TRM, tcp_conn->tc_SND_UNA)) { -#if !CRAMPED printf("SND_TRMtc_SND_NXT, tcp_conn->tc_SND_TRM)) { -#if !CRAMPED printf("SND_NXTth_flags & THF_PSH) printf(" "); if (tcp_hdr->th_flags & THF_ACK) - printf(" ", ntohl(tcp_hdr->th_ack_nr), + printf(" ", ntohl(tcp_hdr->th_ack_nr), ntohs(tcp_hdr->th_window)); if (tcp_hdr->th_flags & THF_URG) printf(" ", tcp_hdr->th_urgptr); @@ -488,13 +515,12 @@ tcp_hdr_t *tcp_hdr; if (tcp_hdr_len != TCP_MIN_HDR_SIZE) printf(" ", tcp_hdr_len-TCP_MIN_HDR_SIZE); } -#endif -#if !CRAMPED PUBLIC void tcp_print_conn(tcp_conn) tcp_conn_t *tcp_conn; { - int iss, irs; + u32_t iss, irs; + tcp_fd_t *tcp_fd; iss= tcp_conn->tc_ISS; irs= tcp_conn->tc_IRS; @@ -505,6 +531,10 @@ tcp_conn_t *tcp_conn; iss, tcp_conn->tc_SND_UNA-iss, tcp_conn->tc_SND_UNA, tcp_conn->tc_SND_TRM-iss, tcp_conn->tc_SND_TRM, tcp_conn->tc_SND_NXT-iss, tcp_conn->tc_SND_NXT); + printf( + " UP +0x%lx(0x%lx) PSH +0x%lx(0x%lx) ", + tcp_conn->tc_SND_UP-iss, tcp_conn->tc_SND_UP, + tcp_conn->tc_SND_PSH-iss, tcp_conn->tc_SND_PSH); printf(" snd_cwnd +0x%lx(0x%lx)", tcp_conn->tc_snd_cwnd-tcp_conn->tc_SND_UNA, tcp_conn->tc_snd_cwnd); @@ -516,7 +546,7 @@ tcp_conn_t *tcp_conn; printf("+0x%lx(0x%lx)", tcp_conn->tc_transmit_seq-iss, tcp_conn->tc_transmit_seq); } - printf(" IRS 0x%lx LO +0x%x(0x%lx) NXT +0x%x(0x%lx) HI +0x%x(0x%lx)", + printf(" IRS 0x%lx LO +0x%lx(0x%lx) NXT +0x%lx(0x%lx) HI +0x%lx(0x%lx)", irs, tcp_conn->tc_RCV_LO-irs, tcp_conn->tc_RCV_LO, tcp_conn->tc_RCV_NXT-irs, tcp_conn->tc_RCV_NXT, tcp_conn->tc_RCV_HI-irs, tcp_conn->tc_RCV_HI); @@ -532,9 +562,31 @@ tcp_conn_t *tcp_conn; printf(" TCF_SEND_ACK"); if (tcp_conn->tc_flags & TCF_FIN_SENT) printf(" TCF_FIN_SENT"); + if (tcp_conn->tc_flags & TCF_BSD_URG) + printf(" TCF_BSD_URG"); + if (tcp_conn->tc_flags & TCF_NO_PUSH) + printf(" TCF_NO_PUSH"); + if (tcp_conn->tc_flags & TCF_PUSH_NOW) + printf(" TCF_PUSH_NOW"); + if (tcp_conn->tc_flags & TCF_PMTU) + printf(" TCF_PMTU"); + printf("\n"); + writeIpAddr(tcp_conn->tc_locaddr); + printf(", %u -> ", ntohs(tcp_conn->tc_locport)); + writeIpAddr(tcp_conn->tc_remaddr); + printf(", %u\n", ntohs(tcp_conn->tc_remport)); + tcp_fd= tcp_conn->tc_fd; + if (!tcp_fd) + printf("tc_fd NULL"); + else + { + printf("tc_fd #%d: flags 0x%x, r %u@%u, w %u@%u", + tcp_fd-tcp_fd_table, tcp_fd->tf_flags, + tcp_fd->tf_read_count, tcp_fd->tf_read_offset, + tcp_fd->tf_write_count, tcp_fd->tf_write_offset); + } } -#endif /* - * $PchId: tcp_lib.c,v 1.7 1995/11/21 06:45:27 philip Exp $ + * $PchId: tcp_lib.c,v 1.14 2005/01/31 21:41:38 philip Exp $ */ diff --git a/servers/inet/generic/tcp_recv.c b/servers/inet/generic/tcp_recv.c index ed73fb3af..0a510bee4 100644 --- a/servers/inet/generic/tcp_recv.c +++ b/servers/inet/generic/tcp_recv.c @@ -9,6 +9,7 @@ Copyright 1995 Philip Homburg #include "clock.h" #include "event.h" #include "type.h" +#include "sr.h" #include "io.h" #include "tcp_int.h" @@ -34,9 +35,10 @@ size_t data_len; tcp_fd_t *connuser; int tcp_hdr_flags; int ip_hdr_len, tcp_hdr_len; - u32_t seg_ack, seg_seq, rcv_hi; - u16_t seg_wnd; - int acceptable_ACK, segm_acceptable; + u32_t seg_ack, seg_seq, rcv_hi, snd_una, snd_nxt; + u16_t seg_wnd, mtu; + size_t mss; + int acceptable_ACK, segm_acceptable, send_rst; ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2; tcp_hdr_len= (tcp_hdr->th_data_off & TH_DO_MASK) >> 2; @@ -46,6 +48,11 @@ size_t data_len; seg_seq= ntohl(tcp_hdr->th_seq_nr); seg_wnd= ntohs(tcp_hdr->th_window); +#if 0 + { where(); tcp_print_conn(tcp_conn); printf("\n"); + tcp_print_pack(ip_hdr, tcp_hdr); printf("\n"); } +#endif + switch (tcp_conn->tc_state) { case TCS_CLOSED: @@ -105,7 +112,24 @@ LISTEN: if (tcp_hdr_flags & THF_SYN) { tcp_extract_ipopt(tcp_conn, ip_hdr); - tcp_extract_tcpopt(tcp_conn, tcp_hdr); + tcp_extract_tcpopt(tcp_conn, tcp_hdr, &mss); + mtu= mss+IP_TCP_MIN_HDR_SIZE; + if (mtu < IP_MIN_MTU) + { + /* No or unrealistic mss, use default MTU */ + mtu= IP_DEF_MTU; + } + if (mtu < tcp_conn->tc_max_mtu) + { + tcp_conn->tc_max_mtu= mtu; + tcp_conn->tc_mtu= mtu; + DBLOCK(1, printf( + "tcp[%d]: conn[%d]: mtu = %d\n", + tcp_conn->tc_port-tcp_port_table, + tcp_conn-tcp_conn_table, + mtu);); + } + tcp_conn->tc_RCV_LO= seg_seq+1; tcp_conn->tc_RCV_NXT= seg_seq+1; tcp_conn->tc_RCV_HI= tcp_conn->tc_RCV_LO+ @@ -194,8 +218,12 @@ SYN-SENT: break; else { + /* HACK: force sending a RST, + * normally, RSTs are not send + * if the segment is an ACK. + */ create_RST (tcp_conn, ip_hdr, - tcp_hdr, data_len); + tcp_hdr, data_len+1); tcp_conn_write(tcp_conn, 1); break; } @@ -219,6 +247,24 @@ SYN-SENT: } if (tcp_hdr_flags & THF_SYN) { + tcp_extract_ipopt(tcp_conn, ip_hdr); + tcp_extract_tcpopt(tcp_conn, tcp_hdr, &mss); + mtu= mss+IP_TCP_MIN_HDR_SIZE; + if (mtu < IP_MIN_MTU) + { + /* No or unrealistic mss, use default MTU */ + mtu= IP_DEF_MTU; + } + if (mtu < tcp_conn->tc_max_mtu) + { + tcp_conn->tc_max_mtu= mtu; + tcp_conn->tc_mtu= mtu; + DBLOCK(1, printf( + "tcp[%d]: conn[%d]: mtu = %d\n", + tcp_conn->tc_port-tcp_port_table, + tcp_conn-tcp_conn_table, + mtu);); + } tcp_conn->tc_RCV_LO= seg_seq+1; tcp_conn->tc_RCV_NXT= seg_seq+1; tcp_conn->tc_RCV_HI= tcp_conn->tc_RCV_LO + @@ -262,6 +308,7 @@ SYN-SENT: /* SYN-RECEIVED: test if segment is acceptable: + Segment Receive Test Length Window 0 0 SEG.SEQ == RCV.NXT @@ -271,10 +318,16 @@ SYN-RECEIVED: || (RCV.NXT <= SEG.SEQ+SEG.LEN-1 && SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND) for urgent data: use RCV.WND+1 for RCV.WND + + Special: Send RST if SEG.SEQ < IRS or SEG.SEQ > RCV.NXT+64K (and + the packet is not a RST packet itself). */ rcv_hi= tcp_conn->tc_RCV_HI; if (tcp_hdr_flags & THF_URG) rcv_hi++; + send_rst= tcp_Lmod4G(seg_seq, tcp_conn->tc_IRS) || + tcp_Gmod4G(seg_seq, tcp_conn->tc_RCV_NXT+0x10000); + if (!data_len) { if (rcv_hi == tcp_conn->tc_RCV_NXT) @@ -316,7 +369,15 @@ SYN-RECEIVED: */ if (!segm_acceptable) { - if (!(tcp_hdr_flags & THF_RST)) + if (tcp_hdr_flags & THF_RST) + ; /* do nothing */ + else if (send_rst) + { + create_RST(tcp_conn, ip_hdr, tcp_hdr, + data_len); + tcp_conn_write(tcp_conn, 1); + } + else { tcp_conn->tc_flags |= TCF_SEND_ACK; tcp_conn_write(tcp_conn, 1); @@ -343,6 +404,10 @@ SYN-RECEIVED: tcp_conn->tc_fd= NULL; tcp_close_connection (tcp_conn, ECONNREFUSED); + + /* Pick a new ISS next time */ + tcp_conn->tc_ISS= 0; + if (connuser) (void)tcp_su4listen(connuser); break; @@ -510,6 +575,12 @@ TIME-WAIT: { tcp_fd_read(tcp_conn, 1); } + if (tcp_conn->tc_fd && + (tcp_conn->tc_fd->tf_flags & + TFF_SEL_READ)) + { + tcp_rsel_read(tcp_conn); + } } } break; @@ -592,49 +663,61 @@ TIME-WAIT: if (tcp_conn->tc_state != TCS_CLOSING) tcp_conn->tc_stt= 0; - if (seg_ack == tcp_conn->tc_SND_UNA) + snd_una= tcp_conn->tc_SND_UNA; + snd_nxt= tcp_conn->tc_SND_NXT; + if (seg_ack == snd_una) { + + if (tcp_Gmod4G(snd_nxt, snd_una)) + { + /* Duplicate ACK */ + if (++tcp_conn->tc_snd_dack == + TCP_DACK_RETRANS) + { + tcp_fast_retrans(tcp_conn); + } + } + /* This ACK doesn't acknowledge any new data, this * is a likely situation if we are only receiving * data. We only update the window if we are * actually sending or if we currently have a * zero window. */ - if (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_UNA && + if (tcp_conn->tc_snd_cwnd == snd_una && seg_wnd != 0) { DBLOCK(2, printf("zero window opened\n")); /* The other side opened up its receive * window. */ - if (seg_wnd > 2*tcp_conn->tc_mss) - seg_wnd= 2*tcp_conn->tc_mss; - tcp_conn->tc_snd_cwnd= - tcp_conn->tc_SND_UNA+seg_wnd; + mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE; + if (seg_wnd > 2*mss) + seg_wnd= 2*mss; + tcp_conn->tc_snd_cwnd= snd_una+seg_wnd; tcp_conn_write(tcp_conn, 1); } if (seg_wnd == 0) { tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_TRM= - tcp_conn->tc_SND_UNA; + snd_una; } } - else if (tcp_Lmod4G(tcp_conn->tc_SND_UNA, seg_ack) - && tcp_LEmod4G(seg_ack, tcp_conn-> - tc_SND_NXT)) + else if (tcp_Lmod4G(snd_una, seg_ack) && + tcp_LEmod4G(seg_ack, snd_nxt)) { tcp_release_retrans(tcp_conn, seg_ack, seg_wnd); if (tcp_conn->tc_state == TCS_CLOSED) break; } else if (tcp_Gmod4G(seg_ack, - tcp_conn->tc_SND_NXT)) + snd_nxt)) { tcp_conn->tc_flags |= TCF_SEND_ACK; tcp_conn_write(tcp_conn, 1); DBLOCK(1, printf( "got an ack of something I haven't send\n"); printf( "seg_ack= %lu, SND_NXT= %lu\n", - seg_ack, tcp_conn->tc_SND_NXT)); + seg_ack, snd_nxt)); break; } @@ -642,7 +725,7 @@ TIME-WAIT: process data... */ tcp_extract_ipopt(tcp_conn, ip_hdr); - tcp_extract_tcpopt(tcp_conn, tcp_hdr); + tcp_extract_tcpopt(tcp_conn, tcp_hdr, &mss); if (data_len) { @@ -695,13 +778,16 @@ TIME-WAIT: { tcp_fd_read(tcp_conn, 1); } + if (tcp_conn->tc_fd && + (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ)) + { + tcp_rsel_read(tcp_conn); + } } break; default: -#if !CRAMPED printf("tcp_frag2conn: unknown state "); tcp_print_state(tcp_conn); -#endif break; } if (tcp_data != NULL) @@ -717,7 +803,7 @@ acc_t *tcp_data; int data_len; { u32_t lo_seq, hi_seq, urg_seq, seq_nr, adv_seq, nxt; - u16_t urgptr; + u32_t urgptr; int tcp_hdr_flags; unsigned int offset; acc_t *tmp_data, *rcvd_data, *adv_data; @@ -736,11 +822,43 @@ int data_len; lo_seq= seq_nr; tcp_hdr_flags= tcp_hdr->th_flags & TH_FLAGS_MASK; + if (tcp_Lmod4G(lo_seq, tcp_conn->tc_RCV_NXT)) + { + DBLOCK(0x10, + printf("segment is a retransmission\n")); + offset= tcp_conn->tc_RCV_NXT-lo_seq; + tcp_data= bf_delhead(tcp_data, offset); + lo_seq += offset; + data_len -= offset; + if (tcp_hdr_flags & THF_URG) + { + printf("process_data: updating urgent pointer\n"); + if (urgptr >= offset) + urgptr -= offset; + else + tcp_hdr_flags &= ~THF_URG; + } + } + assert (lo_seq == tcp_conn->tc_RCV_NXT); + + if (tcp_hdr_flags & THF_URG) + { + if (!(tcp_conn->tc_flags & TCF_BSD_URG)) + { + /* Update urgent pointer to point past the urgent + * data + */ + urgptr++; + } + if (urgptr == 0) + tcp_hdr_flags &= ~THF_URG; + } + if (tcp_hdr_flags & THF_URG) { if (urgptr > data_len) urgptr= data_len; - urg_seq= lo_seq+ urgptr; + urg_seq= lo_seq+urgptr; if (tcp_GEmod4G(urg_seq, tcp_conn->tc_RCV_HI)) urg_seq= tcp_conn->tc_RCV_HI; @@ -764,11 +882,18 @@ int data_len; { tcp_fd_read(tcp_conn, 1); } + if (tcp_conn->tc_fd && + (tcp_conn->tc_fd->tf_flags & + TFF_SEL_READ)) + { + tcp_rsel_read(tcp_conn); + } return; } } if (tcp_Gmod4G(urg_seq, tcp_conn->tc_RCV_UP)) tcp_conn->tc_RCV_UP= urg_seq; +#if 0 if (urgptr < data_len) { data_len= urgptr; @@ -777,6 +902,7 @@ int data_len; tcp_data= tmp_data; tcp_hdr_flags &= ~THF_FIN; } +#endif tcp_conn->tc_flags |= TCF_RCV_PUSH; } else @@ -789,17 +915,6 @@ int data_len; tcp_conn->tc_flags |= TCF_RCV_PUSH; } - if (tcp_Lmod4G(lo_seq, tcp_conn->tc_RCV_NXT)) - { - DBLOCK(0x10, - printf("segment is a retransmission\n")); - offset= tcp_conn->tc_RCV_NXT-lo_seq; - tcp_data= bf_delhead(tcp_data, offset); - lo_seq += offset; - data_len -= offset; - } - assert (lo_seq == tcp_conn->tc_RCV_NXT); - hi_seq= lo_seq+data_len; if (tcp_Gmod4G(hi_seq, tcp_conn->tc_RCV_HI)) { @@ -828,6 +943,8 @@ int data_len; if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_READ_IP)) tcp_fd_read(tcp_conn, 1); + if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ)) + tcp_rsel_read(tcp_conn); DIFBLOCK(2, (tcp_conn->tc_RCV_NXT == tcp_conn->tc_RCV_HI), printf("conn[[%d] full receive buffer\n", @@ -837,10 +954,8 @@ int data_len; return; if (tcp_hdr_flags & THF_FIN) { -#if !CRAMPED printf("conn[%d]: advanced data after FIN\n", tcp_conn-tcp_conn_table); -#endif tcp_data= tcp_conn->tc_adv_data; tcp_conn->tc_adv_data= NULL; bf_afree(tcp_data); @@ -884,6 +999,8 @@ int data_len; if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_READ_IP)) tcp_fd_read(tcp_conn, 1); + if (tcp_conn->tc_fd && (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ)) + tcp_rsel_read(tcp_conn); adv_data= tcp_conn->tc_adv_data; if (adv_data != NULL) @@ -932,6 +1049,11 @@ int data_len; { tcp_fd_read(tcp_conn, 1); } + if (tcp_conn->tc_fd && + (tcp_conn->tc_fd->tf_flags & TFF_SEL_READ)) + { + tcp_rsel_read(tcp_conn); + } } } @@ -956,6 +1078,7 @@ int data_len; if (tcp_hdr->th_flags & THF_URG) return; /* Urgent data is to complicated */ + if (tcp_hdr->th_flags & THF_PSH) tcp_conn->tc_flags |= TCF_RCV_PUSH; seq= ntohl(tcp_hdr->th_seq_nr); @@ -1003,13 +1126,10 @@ tcp_hdr_t *tcp_hdr; int data_len; { acc_t *tmp_ipopt, *tmp_tcpopt, *tcp_pack; - ip_hdropt_t ip_hdropt; - tcp_hdropt_t tcp_hdropt; acc_t *RST_acc; ip_hdr_t *RST_ip_hdr; tcp_hdr_t *RST_tcp_hdr; - char *ptr2RSThdr; - size_t pack_size, ip_hdr_len; + size_t pack_size, ip_hdr_len, mss; DBLOCK(0x10, printf("in create_RST, bad pack is:\n"); tcp_print_pack(ip_hdr, tcp_hdr); tcp_print_state(tcp_conn); @@ -1043,7 +1163,7 @@ int data_len; tmp_tcpopt->acc_linkC++; tcp_extract_ipopt (tcp_conn, ip_hdr); - tcp_extract_tcpopt (tcp_conn, tcp_hdr); + tcp_extract_tcpopt (tcp_conn, tcp_hdr, &mss); RST_acc= tcp_make_header (tcp_conn, &RST_ip_hdr, &RST_tcp_hdr, (acc_t *)0); @@ -1107,6 +1227,7 @@ int enq; /* Enqueue writes. */ acc_t *data; int fin_recv, urg, push, result; i32_t old_window, new_window; + u16_t mss; assert(tcp_conn->tc_busy); @@ -1130,7 +1251,13 @@ int enq; /* Enqueue writes. */ if (fin_recv) data_size--; if (urg) + { +#if DEBUG + printf("tcp_fd_read: RCV_UP = 0x%x, RCV_LO = 0x%x\n", + tcp_conn->tc_RCV_UP, tcp_conn->tc_RCV_LO); +#endif read_size= tcp_conn->tc_RCV_UP-tcp_conn->tc_RCV_LO; + } else read_size= data_size; @@ -1214,16 +1341,34 @@ int enq; /* Enqueue writes. */ tcp_conn->tc_RCV_LO += read_size; data_size -= read_size; } - if (tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_LO <= (tcp_conn-> - tc_rcv_wnd-tcp_conn->tc_mss)) + + /* Update IRS and often RCV_UP every 0.5GB */ + if (tcp_conn->tc_RCV_LO - tcp_conn->tc_IRS > 0x40000000) + { + tcp_conn->tc_IRS += 0x20000000; + DBLOCK(1, printf("tcp_fd_read: updating IRS to 0x%lx\n", + (unsigned long)tcp_conn->tc_IRS);); + if (tcp_Lmod4G(tcp_conn->tc_RCV_UP, tcp_conn->tc_IRS)) + { + tcp_conn->tc_RCV_UP= tcp_conn->tc_IRS; + DBLOCK(1, printf( + "tcp_fd_read: updating RCV_UP to 0x%lx\n", + (unsigned long)tcp_conn->tc_RCV_UP);); + } + DBLOCK(1, printf("tcp_fd_read: RCP_LO = 0x%lx\n", + (unsigned long)tcp_conn->tc_RCV_LO);); + } + + mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE; + if (tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_LO <= + tcp_conn->tc_rcv_wnd-mss) { old_window= tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_NXT; tcp_conn->tc_RCV_HI= tcp_conn->tc_RCV_LO + tcp_conn->tc_rcv_wnd; new_window= tcp_conn->tc_RCV_HI-tcp_conn->tc_RCV_NXT; assert(old_window >=0 && new_window >= old_window); - if (old_window < tcp_conn->tc_mss && - new_window >= tcp_conn->tc_mss) + if (old_window < mss && new_window >= mss) { tcp_conn->tc_flags |= TCF_SEND_ACK; DBLOCK(2, printf("opening window\n")); @@ -1236,18 +1381,67 @@ int enq; /* Enqueue writes. */ /* Out of data, clear PUSH flag and reply to a read. */ tcp_conn->tc_flags &= ~TCF_RCV_PUSH; } - if (fin_recv || urg || !tcp_fd->tf_read_count) - { - tcp_reply_read (tcp_fd, tcp_fd->tf_read_offset); - return; - } - if (tcp_fd->tf_read_offset) + if (fin_recv || urg || tcp_fd->tf_read_offset || + !tcp_fd->tf_read_count) { tcp_reply_read (tcp_fd, tcp_fd->tf_read_offset); return; } } +PUBLIC unsigned +tcp_sel_read(tcp_conn) +tcp_conn_t *tcp_conn; +{ + tcp_fd_t *tcp_fd; + size_t data_size; + int fin_recv, urg, push; + + tcp_fd= tcp_conn->tc_fd; + + if (tcp_conn->tc_state == TCS_CLOSED) + return 1; + + fin_recv= (tcp_conn->tc_flags & TCF_FIN_RECV); + if (fin_recv) + return 1; + + data_size= tcp_conn->tc_RCV_NXT-tcp_conn->tc_RCV_LO; + if (data_size == 0) + { + /* No data, and no end of file. */ + return 0; + } + + urg= tcp_Gmod4G(tcp_conn->tc_RCV_UP, tcp_conn->tc_RCV_LO); + push= (tcp_conn->tc_flags & TCF_RCV_PUSH); + + if (!push && !urg && data_size < TCP_MIN_RCV_WND_SIZE) + { + /* Defer until later. */ + return 0; + } + + return 1; +} + +PUBLIC void +tcp_rsel_read(tcp_conn) +tcp_conn_t *tcp_conn; +{ + tcp_fd_t *tcp_fd; + + if (tcp_sel_read(tcp_conn) == 0) + return; + + tcp_fd= tcp_conn->tc_fd; + tcp_fd->tf_flags &= ~TFF_SEL_READ; + if (tcp_fd->tf_select_res) + tcp_fd->tf_select_res(tcp_fd->tf_srfd, SR_SELECT_READ); + else + printf("tcp_rsel_read: no select_res\n"); +} + /* - * $PchId: tcp_recv.c,v 1.13.2.1 2000/05/02 18:53:06 philip Exp $ + * $PchId: tcp_recv.c,v 1.30 2005/06/28 14:21:35 philip Exp $ */ diff --git a/servers/inet/generic/tcp_send.c b/servers/inet/generic/tcp_send.c index 45803381e..dca1f9ad0 100644 --- a/servers/inet/generic/tcp_send.c +++ b/servers/inet/generic/tcp_send.c @@ -9,6 +9,7 @@ Copyright 1995 Philip Homburg #include "clock.h" #include "event.h" #include "type.h" +#include "sr.h" #include "assert.h" #include "io.h" @@ -35,7 +36,13 @@ int enq; /* Writes need to be enqueued. */ if (tcp_conn->tc_flags & TCF_MORE2WRITE) return; - /* XXX - do we really have something to send here? */ + /* Do we really have something to send here? */ + if (tcp_conn->tc_SND_UNA == tcp_conn->tc_SND_NXT && + !(tcp_conn->tc_flags & TCF_SEND_ACK) && + !tcp_conn->tc_frag2send) + { + return; + } tcp_conn->tc_flags |= TCF_MORE2WRITE; tcp_conn->tc_send_link= NULL; @@ -109,12 +116,17 @@ tcp_port_t *tcp_port; { if (r == NW_WOULDBLOCK) break; + if (r == EPACKSIZE) + { + tcp_mtu_exceeded(tcp_conn); + continue; + } if (r == EDSTNOTRCH) { tcp_notreach(tcp_conn); continue; } - else if (r == EBADDEST) + if (r == EBADDEST) continue; } assert(r == NW_OK || @@ -151,21 +163,23 @@ tcp_conn_t *tcp_conn; acc_t *pack2write, *tmp_pack, *tcp_pack; tcp_hdr_t *tcp_hdr; ip_hdr_t *ip_hdr; - int tot_hdr_size, ip_hdr_len; + int tot_hdr_size, ip_hdr_len, no_push, head, more2write; u32_t seg_seq, seg_lo_data, queue_lo_data, seg_hi, seg_hi_data; - u16_t seg_up; + u16_t seg_up, mss; u8_t seg_flags; - time_t new_dis; size_t pack_size; - time_t curr_time; + clock_t curr_time, new_dis; u8_t *optptr; + mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE; + assert(tcp_conn->tc_busy); curr_time= get_time(); switch (tcp_conn->tc_state) { case TCS_CLOSED: - return 0; + case TCS_LISTEN: + return NULL; case TCS_SYN_RECEIVED: case TCS_SYN_SENT: @@ -177,14 +191,19 @@ tcp_conn_t *tcp_conn; tcp_conn->tc_flags &= ~TCF_SEND_ACK; + /* Advertise a mss based on the port mtu. The current mtu may + * be lower if the other side sends a smaller mss. + */ + mss= tcp_conn->tc_port->tp_mtu-IP_TCP_MIN_HDR_SIZE; + /* Include a max segment size option. */ assert(tcp_conn->tc_tcpopt == NULL); tcp_conn->tc_tcpopt= bf_memreq(4); optptr= (u8_t *)ptr2acc_data(tcp_conn->tc_tcpopt); optptr[0]= TCP_OPT_MSS; optptr[1]= 4; - optptr[2]= tcp_conn->tc_mss >> 8; - optptr[3]= tcp_conn->tc_mss & 0xFF; + optptr[2]= mss >> 8; + optptr[3]= mss & 0xFF; pack2write= tcp_make_header(tcp_conn, &ip_hdr, &tcp_hdr, (acc_t *)0); @@ -216,7 +235,7 @@ tcp_conn_t *tcp_conn; tcp_hdr->th_seq_nr= htonl(seg_seq); tcp_hdr->th_ack_nr= htonl(tcp_conn->tc_RCV_NXT); tcp_hdr->th_flags= seg_flags; - tcp_hdr->th_window= htons(tcp_conn->tc_mss); + tcp_hdr->th_window= htons(mss); /* Initially we allow one segment */ ip_hdr->ih_length= htons(tot_hdr_size); @@ -293,15 +312,71 @@ tcp_conn_t *tcp_conn; } tot_hdr_size= bf_bufsize(pack2write); - if (seg_hi_data - seg_lo_data > tcp_conn->tc_mss - - tot_hdr_size) + + no_push= (tcp_LEmod4G(tcp_conn->tc_SND_PSH, seg_seq)); + head= (seg_seq == tcp_conn->tc_SND_UNA); + if (no_push) { - seg_hi_data= seg_lo_data + tcp_conn->tc_mss - - tot_hdr_size; + /* Shutdown sets SND_PSH */ + seg_flags &= ~THF_FIN; + if (seg_hi_data-seg_lo_data <= 1) + { + /* Allways keep at least one byte + * for a future push. + */ + DBLOCK(0x20, + printf("no data: no push\n")); + if (head) + { + DBLOCK(0x1, printf( + "no data: setting TCF_NO_PUSH\n")); + tcp_conn->tc_flags |= + TCF_NO_PUSH; + } + goto after_data; + } + seg_hi_data--; + } + + if (tot_hdr_size != IP_TCP_MIN_HDR_SIZE) + { + printf( + "tcp_write`make_pack: tot_hdr_size = %d\n", + tot_hdr_size); + mss= tcp_conn->tc_mtu-tot_hdr_size; + } + if (seg_hi_data - seg_lo_data > mss) + { + /* Truncate to at most one segment */ + seg_hi_data= seg_lo_data + mss; seg_hi= seg_hi_data; seg_flags &= ~THF_FIN; } + if (no_push && + seg_hi_data-seg_lo_data != mss) + { + DBLOCK(0x20, printf( + "no data: no push for partial segment\n")); + more2write= (tcp_conn->tc_fd && + (tcp_conn->tc_fd->tf_flags & + TFF_WRITE_IP)); + DIFBLOCK(2, more2write, + printf( + "tcp_send`make_pack: more2write -> !TCF_NO_PUSH\n"); + ); + if (head && !more2write) + { + DBLOCK(0x1, printf( + "partial segment: setting TCF_NO_PUSH\n")); + tcp_conn->tc_flags |= TCF_NO_PUSH; + tcp_print_conn(tcp_conn); + printf("\n"); + } + goto after_data; + } + + if (tcp_Gmod4G(seg_hi, tcp_conn->tc_snd_cwnd)) { seg_hi_data= tcp_conn->tc_snd_cwnd; @@ -309,6 +384,23 @@ tcp_conn_t *tcp_conn; seg_flags &= ~THF_FIN; } + if (!head && + seg_hi_data-seg_lo_data < mss) + { + if (tcp_conn->tc_flags & TCF_PUSH_NOW) + { + DBLOCK(0x20, + printf("push: no Nagle\n")); + } + else + { + DBLOCK(0x20, + printf("no data: partial packet\n")); + seg_flags &= ~THF_FIN; + goto after_data; + } + } + if (seg_hi-seg_seq == 0) { DBLOCK(0x20, @@ -316,18 +408,10 @@ tcp_conn_t *tcp_conn; goto after_data; } - if (seg_seq != tcp_conn->tc_SND_UNA && - seg_hi_data-seg_lo_data+tot_hdr_size < - tcp_conn->tc_mss) - { - DBLOCK(0x20, - printf("no data: partial packet\n")); - seg_flags &= ~THF_FIN; - goto after_data; - } - if (tcp_GEmod4G(tcp_conn->tc_SND_UP, seg_lo_data)) { + extern int killer_inet; + if (tcp_GEmod4G(tcp_conn->tc_SND_UP, seg_hi_data)) { @@ -338,7 +422,8 @@ tcp_conn_t *tcp_conn; seg_up= tcp_conn->tc_SND_UP-seg_seq; } seg_flags |= THF_URG; - if ((tcp_conn->tc_flags & TCF_BSD_URG) && + if (!killer_inet && + (tcp_conn->tc_flags & TCF_BSD_URG) && seg_up == 0) { /* A zero urgent pointer doesn't mean @@ -432,11 +517,9 @@ after_data: tcp_conn->tc_senddis= new_dis; return pack2write; -#if !CRAMPED default: DBLOCK(1, tcp_print_conn(tcp_conn); printf("\n")); ip_panic(( "Illegal state" )); -#endif } assert(0); return NULL; @@ -451,17 +534,24 @@ tcp_conn_t *tcp_conn; u32_t seg_ack; u16_t new_win; { + tcp_fd_t *tcp_fd; size_t size, offset; acc_t *pack; - time_t retrans_time, curr_time, rtt; + clock_t retrans_time, curr_time, rtt, artt, drtt, srtt; u32_t queue_lo, queue_hi; u16_t mss, cthresh; unsigned window; + DBLOCK(0x10, printf("tcp_release_retrans, conn[%d]: ack %lu, win %u\n", + tcp_conn-tcp_conn_table, (unsigned long)seg_ack, new_win);); + assert(tcp_conn->tc_busy); assert (tcp_GEmod4G(seg_ack, tcp_conn->tc_SND_UNA)); assert (tcp_LEmod4G(seg_ack, tcp_conn->tc_SND_NXT)); + tcp_conn->tc_snd_dack= 0; + mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE; + curr_time= get_time(); if (tcp_conn->tc_rt_seq != 0 && tcp_Gmod4G(seg_ack, tcp_conn->tc_rt_seq)) @@ -470,11 +560,6 @@ u16_t new_win; retrans_time= curr_time-tcp_conn->tc_rt_time; rtt= tcp_conn->tc_rtt; - DBLOCK(0x20, printf( - "tcp_release_retrans, conn[%d]: retrans_time= %ld ms\n", - tcp_conn-tcp_conn_table, retrans_time*1000/HZ)); - - tcp_conn->tc_rt_seq= 0; if (rtt == TCP_RTT_GRAN*CLOCK_GRAN && @@ -482,18 +567,25 @@ u16_t new_win; { /* Common in fast networks. Nothing to do. */ } - else if (rtt >= retrans_time && rtt <= 2*retrans_time) + else { - /* Nothing to do. We assume that a factor 2 for - * variance is enough. - */ - } - else if (retrans_time > rtt) - { - /* Retrans time is really too small. */ + srtt= retrans_time * TCP_RTT_SCALE; - tcp_conn->tc_rtt= rtt*2; - if (tcp_conn->tc_rtt > TCP_RTT_MAX) + artt= tcp_conn->tc_artt; + artt= ((TCP_RTT_SMOOTH-1)*artt+srtt)/TCP_RTT_SMOOTH; + + srtt -= artt; + if (srtt < 0) + srtt= -srtt; + drtt= tcp_conn->tc_drtt; + drtt= ((TCP_RTT_SMOOTH-1)*drtt+srtt)/TCP_RTT_SMOOTH; + + rtt= (artt+TCP_DRTT_MULT*drtt-1)/TCP_RTT_SCALE+1; + if (rtt < TCP_RTT_GRAN*CLOCK_GRAN) + { + rtt= TCP_RTT_GRAN*CLOCK_GRAN; + } + else if (rtt > TCP_RTT_MAX) { #if DEBUG static int warned /* = 0 */; @@ -506,50 +598,33 @@ u16_t new_win; warned= 1; } #endif - tcp_conn->tc_rtt= TCP_RTT_MAX; + rtt= TCP_RTT_MAX; } - assert (tcp_conn->tc_rtt); + DBLOCK(0x10, printf( + "tcp_release_retrans, conn[%d]: retrans_time= %ld ms, rtt = %ld ms\n", + tcp_conn-tcp_conn_table, + retrans_time*1000/HZ, + rtt*1000/HZ)); DBLOCK(0x10, printf( -"tcp_release_retrans, conn[%d]: (was too small) retrans_time= %ld ms, rtt= %ld ms\n", - tcp_conn-tcp_conn_table, retrans_time*1000/HZ, - tcp_conn->tc_rtt*1000/HZ)); + "tcp_release_retrans: artt= %ld -> %ld, drtt= %ld -> %ld\n", + tcp_conn->tc_artt, artt, + tcp_conn->tc_drtt, drtt)); - - } - else if (seg_ack - tcp_conn->tc_rt_seq == tcp_conn->tc_mss) - { - /* Retrans time is really too big. */ - rtt= (rtt*3)>>2; - if (rtt < TCP_RTT_GRAN*CLOCK_GRAN) - rtt= TCP_RTT_GRAN*CLOCK_GRAN; + tcp_conn->tc_artt= artt; + tcp_conn->tc_drtt= drtt; tcp_conn->tc_rtt= rtt; - assert (tcp_conn->tc_rtt); - - DBLOCK(0x10, printf( -"tcp_release_retrans, conn[%d]: (was too big) retrans_time= %ld ms, rtt= %ld ms\n", - tcp_conn-tcp_conn_table, retrans_time*1000/HZ, - tcp_conn->tc_rtt*1000/HZ)); } - else - { - /* Retrans time might be too big. Try a bit smaller. */ - rtt= (rtt*31)>>5; - if (rtt < TCP_RTT_GRAN*CLOCK_GRAN) - rtt= TCP_RTT_GRAN*CLOCK_GRAN; - tcp_conn->tc_rtt= rtt; - assert (tcp_conn->tc_rtt); - DBLOCK(0x20, printf( -"tcp_release_retrans, conn[%d]: (maybe too big) retrans_time= %ld ms, rtt= %ld ms\n", - tcp_conn-tcp_conn_table, retrans_time*1000/HZ, - tcp_conn->tc_rtt*1000/HZ)); + if (tcp_conn->tc_mtu != tcp_conn->tc_max_mtu && + curr_time > tcp_conn->tc_mtutim+TCP_PMTU_INCR_IV) + { + tcp_mtu_incr(tcp_conn); } } /* Update the current window. */ window= tcp_conn->tc_snd_cwnd-tcp_conn->tc_SND_UNA; - mss= tcp_conn->tc_mss; assert(seg_ack != tcp_conn->tc_SND_UNA); /* For every real ACK we try to increase the current window @@ -588,6 +663,22 @@ u16_t new_win; } assert(tcp_GEmod4G(tcp_conn->tc_snd_cwnd, seg_ack)); + /* Advance ISS every 0.5GB to avoid problem with wrap around */ + if (tcp_conn->tc_SND_UNA - tcp_conn->tc_ISS > 0x40000000) + { + tcp_conn->tc_ISS += 0x20000000; + DBLOCK(1, printf( + "tcp_release_retrans: updating ISS to 0x%lx\n", + (unsigned long)tcp_conn->tc_ISS);); + if (tcp_Lmod4G(tcp_conn->tc_SND_UP, tcp_conn->tc_ISS)) + { + tcp_conn->tc_SND_UP= tcp_conn->tc_ISS; + DBLOCK(1, printf( + "tcp_release_retrans: updating SND_UP to 0x%lx\n", + (unsigned long)tcp_conn->tc_SND_UP);); + } + } + if (queue_lo == tcp_conn->tc_ISS) queue_lo++; @@ -608,10 +699,6 @@ u16_t new_win; if (!size) { bf_afree(pack); - - /* Reset window if a write is completed */ - tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + - 2*tcp_conn->tc_mss; } else { @@ -622,18 +709,25 @@ u16_t new_win; if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_snd_cwnd)) tcp_conn->tc_SND_TRM= tcp_conn->tc_snd_cwnd; - /* Copy in new data if a write request is pending and - * SND_NXT-SND_TRM is less than 1 mss. + /* Copy in new data if an ioctl is pending or if a write request is + * pending and either the write can be completed or at least one + * mss buffer space is available. */ - if (tcp_conn->tc_fd) + tcp_fd= tcp_conn->tc_fd; + if (tcp_fd) { - if ((tcp_conn->tc_fd->tf_flags & - (TFF_WRITE_IP|TFF_IOCTL_IP)) && - tcp_conn->tc_SND_NXT-tcp_conn->tc_SND_TRM < - tcp_conn->tc_mss) + if (tcp_fd->tf_flags & TFF_IOCTL_IP) { tcp_fd_write(tcp_conn); } + if ((tcp_fd->tf_flags & TFF_WRITE_IP) && + (size+tcp_fd->tf_write_count <= TCP_MAX_SND_WND_SIZE || + size <= TCP_MAX_SND_WND_SIZE-mss)) + { + tcp_fd_write(tcp_conn); + } + if (tcp_fd->tf_flags & TFF_SEL_WRITE) + tcp_rsel_write(tcp_conn); } else { @@ -647,6 +741,12 @@ u16_t new_win; } } + if (!size && !tcp_conn->tc_send_data) + { + /* Reset window if a write is completed */ + tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + mss; + } + DIFBLOCK(2, (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_TRM), printf("not sending: zero window\n")); @@ -659,105 +759,13 @@ u16_t new_win; } /* -tcp_send_timeout +tcp_fast_retrans */ -PRIVATE void tcp_send_timeout(conn, timer) -int conn; -struct timer *timer; +PUBLIC void tcp_fast_retrans(tcp_conn) +tcp_conn_t *tcp_conn; { - tcp_conn_t *tcp_conn; u16_t mss, mss2; - time_t curr_time, stt, timeout; - - curr_time= get_time(); - - tcp_conn= &tcp_conn_table[conn]; - assert(tcp_conn->tc_flags & TCF_INUSE); - assert(tcp_conn->tc_state != TCS_CLOSED); - assert(tcp_conn->tc_state != TCS_LISTEN); - - if (tcp_conn->tc_SND_NXT == tcp_conn->tc_SND_UNA) - { - /* Nothing to do */ - assert(tcp_conn->tc_SND_TRM == tcp_conn->tc_SND_UNA); - - /* A new write sets the timer if tc_transmit_seq == SND_UNA */ - tcp_conn->tc_transmit_seq= tcp_conn->tc_SND_UNA; - tcp_conn->tc_stt= 0; - tcp_conn->tc_0wnd_to= 0; - assert(!tcp_conn->tc_fd || - !(tcp_conn->tc_fd->tf_flags & TFF_WRITE_IP)); - return; - } - - if (tcp_conn->tc_transmit_seq != tcp_conn->tc_SND_UNA) - { - /* Some data has been acknowledged since the last time the - * timer was set, set the timer again. */ - tcp_conn->tc_transmit_seq= tcp_conn->tc_SND_UNA; - tcp_conn->tc_stt= 0; - tcp_conn->tc_0wnd_to= 0; - - DBLOCK(0x20, printf( - "tcp_send_timeout: conn[%d] setting timer to %ld ms (+%ld ms)\n", - tcp_conn-tcp_conn_table, - (curr_time+tcp_conn->tc_rtt)*1000/HZ, - tcp_conn->tc_rtt*1000/HZ)); - - clck_timer(&tcp_conn->tc_transmit_timer, - curr_time+tcp_conn->tc_rtt, - tcp_send_timeout, tcp_conn-tcp_conn_table); - return; - } - - if (tcp_conn->tc_stt == 0) - { - /* Some packet arrived but did not acknowledge any data. - * Apparently, the other side is still alive and has a - * reason to transmit. We can asume a zero window. - */ - - DBLOCK(0x10, printf("conn[%d] setting zero window timer\n", - tcp_conn-tcp_conn_table)); - - if (tcp_conn->tc_0wnd_to < TCP_0WND_MIN) - tcp_conn->tc_0wnd_to= TCP_0WND_MIN; - else if (tcp_conn->tc_0wnd_to < tcp_conn->tc_rtt) - tcp_conn->tc_0wnd_to= tcp_conn->tc_rtt; - else - { - tcp_conn->tc_0wnd_to *= 2; - if (tcp_conn->tc_0wnd_to > TCP_0WND_MAX) - tcp_conn->tc_0wnd_to= TCP_0WND_MAX; - } - tcp_conn->tc_stt= curr_time; - - tcp_conn->tc_rt_seq= 0; - - DBLOCK(0x20, printf( - "tcp_send_timeout: conn[%d] setting timer to %ld ms (+%ld ms)\n", - tcp_conn-tcp_conn_table, - (curr_time+tcp_conn->tc_0wnd_to)*1000/HZ, - tcp_conn->tc_0wnd_to*1000/HZ)); - - clck_timer(&tcp_conn->tc_transmit_timer, - curr_time+tcp_conn->tc_0wnd_to, - tcp_send_timeout, tcp_conn-tcp_conn_table); - return; - } - - DIFBLOCK(0x10, (tcp_conn->tc_fd == 0), - printf("conn[%d] timeout in abondoned connection\n", - tcp_conn-tcp_conn_table)); - - /* At this point, we have do a retransmission, or send a zero window - * probe, which is almost the same. - */ - - DBLOCK(0x20, printf("tcp_send_timeout: conn[%d] una= %u, rtt= %dms\n", - tcp_conn-tcp_conn_table, - tcp_conn->tc_SND_UNA, tcp_conn->tc_rtt*1000/HZ)); /* Update threshold sequence number for retransmission calculation. */ if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_rt_threshold)) @@ -765,7 +773,7 @@ struct timer *timer; tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA; - mss= tcp_conn->tc_mss; + mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE; mss2= 2*mss; if (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_UNA) @@ -781,8 +789,231 @@ struct timer *timer; tcp_conn->tc_snd_cthresh= mss2; } + tcp_conn_write(tcp_conn, 1); +} + +#if 0 +PUBLIC void do_tcp_timeout(tcp_conn) +tcp_conn_t *tcp_conn; +{ + tcp_send_timeout(tcp_conn-tcp_conn_table, + &tcp_conn->tc_transmit_timer); +} +#endif + +/* +tcp_send_timeout +*/ + +PRIVATE void tcp_send_timeout(conn, timer) +int conn; +struct timer *timer; +{ + tcp_conn_t *tcp_conn; + u16_t mss, mss2; + u32_t snd_una, snd_nxt; + clock_t curr_time, rtt, stt, timeout; + acc_t *pkt; + int new_ttl, no_push; + + DBLOCK(0x20, printf("tcp_send_timeout: conn[%d]\n", conn)); + + curr_time= get_time(); + + tcp_conn= &tcp_conn_table[conn]; + assert(tcp_conn->tc_flags & TCF_INUSE); + assert(tcp_conn->tc_state != TCS_CLOSED); + assert(tcp_conn->tc_state != TCS_LISTEN); + + snd_una= tcp_conn->tc_SND_UNA; + snd_nxt= tcp_conn->tc_SND_NXT; + no_push= (tcp_conn->tc_flags & TCF_NO_PUSH); + if (snd_nxt == snd_una || no_push) + { + /* Nothing more to send */ + assert(tcp_conn->tc_SND_TRM == snd_una || no_push); + + /* A new write sets the timer if tc_transmit_seq == SND_UNA */ + tcp_conn->tc_transmit_seq= tcp_conn->tc_SND_UNA; + tcp_conn->tc_stt= 0; + tcp_conn->tc_0wnd_to= 0; + assert(!tcp_conn->tc_fd || + !(tcp_conn->tc_fd->tf_flags & TFF_WRITE_IP) || + (tcp_print_conn(tcp_conn), printf("\n"), 0)); + + if (snd_nxt != snd_una) + { + assert(no_push); + DBLOCK(1, printf("not setting keepalive timer\n");); + + /* No point in setting the keepalive timer if we + * still have to send more data. + */ + return; + } + + assert(tcp_conn->tc_send_data == NULL); + DBLOCK(0x20, printf("keep alive timer\n")); + if (tcp_conn->tc_ka_snd != tcp_conn->tc_SND_NXT || + tcp_conn->tc_ka_rcv != tcp_conn->tc_RCV_NXT) + { + tcp_conn->tc_ka_snd= tcp_conn->tc_SND_NXT; + tcp_conn->tc_ka_rcv= tcp_conn->tc_RCV_NXT; + DBLOCK(0x20, printf( +"tcp_send_timeout: conn[%d] setting keepalive timer (+%ld ms)\n", + tcp_conn-tcp_conn_table, + tcp_conn->tc_ka_time*1000/HZ)); + clck_timer(&tcp_conn->tc_transmit_timer, + curr_time+tcp_conn->tc_ka_time, + tcp_send_timeout, + tcp_conn-tcp_conn_table); + return; + } + DBLOCK(0x10, printf( + "tcp_send_timeout, conn[%d]: triggering keep alive probe\n", + tcp_conn-tcp_conn_table)); + tcp_conn->tc_ka_snd--; + if (!(tcp_conn->tc_flags & TCF_FIN_SENT)) + { + pkt= bf_memreq(1); + *ptr2acc_data(pkt)= '\xff'; /* a random char */ + tcp_conn->tc_send_data= pkt; pkt= NULL; + } + tcp_conn->tc_SND_UNA--; + if (tcp_conn->tc_SND_UNA == tcp_conn->tc_ISS) + { + /* We didn't send anything so far. Retrying the + * SYN is too hard. Decrement ISS and hope + * that the other side doesn't care. + */ + tcp_conn->tc_ISS--; + } + + /* Set tc_transmit_seq and tc_stt to trigger packet */ + tcp_conn->tc_transmit_seq= tcp_conn->tc_SND_UNA; + tcp_conn->tc_stt= curr_time; + + /* Set tc_rt_seq for round trip measurements */ + tcp_conn->tc_rt_time= curr_time; + tcp_conn->tc_rt_seq= tcp_conn->tc_SND_UNA; + + /* Set PSH to make sure that data gets sent */ + tcp_conn->tc_SND_PSH= tcp_conn->tc_SND_NXT; + assert(tcp_check_conn(tcp_conn)); + + /* Fall through */ + } + + rtt= tcp_conn->tc_rtt; + + if (tcp_conn->tc_transmit_seq != tcp_conn->tc_SND_UNA) + { + /* Some data has been acknowledged since the last time the + * timer was set, set the timer again. */ + tcp_conn->tc_transmit_seq= tcp_conn->tc_SND_UNA; + tcp_conn->tc_stt= 0; + tcp_conn->tc_0wnd_to= 0; + + DBLOCK(0x20, printf( + "tcp_send_timeout: conn[%d] setting timer to %ld ms (+%ld ms)\n", + tcp_conn-tcp_conn_table, + (curr_time+rtt)*1000/HZ, rtt*1000/HZ)); + + clck_timer(&tcp_conn->tc_transmit_timer, + curr_time+rtt, tcp_send_timeout, + tcp_conn-tcp_conn_table); + return; + } + stt= tcp_conn->tc_stt; + if (stt == 0) + { + /* Some packet arrived but did not acknowledge any data. + * Apparently, the other side is still alive and has a + * reason to transmit. We can asume a zero window. + */ + + DBLOCK(0x10, printf("conn[%d] setting zero window timer\n", + tcp_conn-tcp_conn_table)); + + if (tcp_conn->tc_0wnd_to < TCP_0WND_MIN) + tcp_conn->tc_0wnd_to= TCP_0WND_MIN; + else if (tcp_conn->tc_0wnd_to < rtt) + tcp_conn->tc_0wnd_to= rtt; + else + { + tcp_conn->tc_0wnd_to *= 2; + if (tcp_conn->tc_0wnd_to > TCP_0WND_MAX) + tcp_conn->tc_0wnd_to= TCP_0WND_MAX; + } + tcp_conn->tc_stt= curr_time; + tcp_conn->tc_rt_seq= 0; + + DBLOCK(0x10, printf( + "tcp_send_timeout: conn[%d] setting timer to %ld ms (+%ld ms)\n", + tcp_conn-tcp_conn_table, + (curr_time+tcp_conn->tc_0wnd_to)*1000/HZ, + tcp_conn->tc_0wnd_to*1000/HZ)); + + clck_timer(&tcp_conn->tc_transmit_timer, + curr_time+tcp_conn->tc_0wnd_to, + tcp_send_timeout, tcp_conn-tcp_conn_table); + return; + } assert(stt <= curr_time); + + DIFBLOCK(0x10, (tcp_conn->tc_fd == 0), + printf("conn[%d] timeout in abondoned connection\n", + tcp_conn-tcp_conn_table)); + + /* At this point, we have do a retransmission, or send a zero window + * probe, which is almost the same. + */ + + DBLOCK(0x20, printf("tcp_send_timeout: conn[%d] una= %lu, rtt= %ldms\n", + tcp_conn-tcp_conn_table, + (unsigned long)tcp_conn->tc_SND_UNA, rtt*1000/HZ)); + + /* Update threshold sequence number for retransmission calculation. */ + if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_rt_threshold)) + tcp_conn->tc_rt_threshold= tcp_conn->tc_SND_TRM; + + tcp_conn->tc_SND_TRM= tcp_conn->tc_SND_UNA; + + if (tcp_conn->tc_flags & TCF_PMTU && + curr_time > stt+TCP_PMTU_BLACKHOLE) + { + /* We can't tell the difference between a PMTU blackhole + * and a broken link. Assume a PMTU blackhole, and switch + * off PMTU discovery. + */ + DBLOCK(1, printf( + "tcp[%d]: PMTU blackhole (or broken link) on route to ", + tcp_conn-tcp_conn_table); + writeIpAddr(tcp_conn->tc_remaddr); + printf(", max mtu = %u\n", tcp_conn->tc_max_mtu);); + tcp_conn->tc_flags &= ~TCF_PMTU; + tcp_conn->tc_mtutim= curr_time; + if (tcp_conn->tc_max_mtu > IP_DEF_MTU) + tcp_conn->tc_mtu= IP_DEF_MTU; + } + + mss= tcp_conn->tc_mtu-IP_TCP_MIN_HDR_SIZE; + mss2= 2*mss; + + if (tcp_conn->tc_snd_cwnd == tcp_conn->tc_SND_UNA) + tcp_conn->tc_snd_cwnd++; + if (tcp_Gmod4G(tcp_conn->tc_snd_cwnd, tcp_conn->tc_SND_UNA + mss2)) + { + tcp_conn->tc_snd_cwnd= tcp_conn->tc_SND_UNA + mss2; + if (tcp_Gmod4G(tcp_conn->tc_SND_TRM, tcp_conn->tc_snd_cwnd)) + tcp_conn->tc_SND_TRM= tcp_conn->tc_snd_cwnd; + + tcp_conn->tc_snd_cthresh /= 2; + if (tcp_conn->tc_snd_cthresh < mss2) + tcp_conn->tc_snd_cthresh= mss2; + } + if (curr_time-stt > tcp_conn->tc_rt_dead) { tcp_close_connection(tcp_conn, ETIMEDOUT); @@ -790,8 +1021,8 @@ struct timer *timer; } timeout= (curr_time-stt) >> 3; - if (timeout < tcp_conn->tc_rtt) - timeout= tcp_conn->tc_rtt; + if (timeout < rtt) + timeout= rtt; timeout += curr_time; DBLOCK(0x20, printf( @@ -802,11 +1033,24 @@ struct timer *timer; clck_timer(&tcp_conn->tc_transmit_timer, timeout, tcp_send_timeout, tcp_conn-tcp_conn_table); +#if 0 if (tcp_conn->tc_rt_seq == 0) { - tcp_conn->tc_rt_time= curr_time-tcp_conn->tc_rtt; + printf("tcp_send_timeout: conn[%d]: setting tc_rt_time\n", + tcp_conn-tcp_conn_table); + tcp_conn->tc_rt_time= curr_time-rtt; tcp_conn->tc_rt_seq= tcp_conn->tc_SND_UNA; } +#endif + + if (tcp_conn->tc_state == TCS_SYN_SENT || + (curr_time-stt >= tcp_conn->tc_ttl*HZ)) + { + new_ttl= tcp_conn->tc_ttl+1; + if (new_ttl> IP_MAX_TTL) + new_ttl= IP_MAX_TTL; + tcp_conn->tc_ttl= new_ttl; + } tcp_conn_write(tcp_conn, 0); } @@ -818,8 +1062,8 @@ tcp_conn_t *tcp_conn; tcp_fd_t *tcp_fd; int urg, nourg, push; u32_t max_seq; - size_t max_count, max_trans, write_count, send_count; - acc_t *data, *tmp_acc, *send_data; + size_t max_trans, write_count; + acc_t *data, *send_data; assert(tcp_conn->tc_busy); tcp_fd= tcp_conn->tc_fd; @@ -872,10 +1116,7 @@ tcp_conn_t *tcp_conn; urg= (tcp_fd->tf_flags & TFF_WR_URG); push= (tcp_fd->tf_flags & TFF_PUSH_DATA); - max_seq= tcp_conn->tc_SND_UNA + tcp_conn->tc_snd_wnd; - if (urg) - max_seq++; - max_count= max_seq - tcp_conn->tc_SND_UNA; + max_seq= tcp_conn->tc_SND_UNA + TCP_MAX_SND_WND_SIZE; max_trans= max_seq - tcp_conn->tc_SND_NXT; if (tcp_fd->tf_write_count <= max_trans) write_count= tcp_fd->tf_write_count; @@ -937,6 +1178,63 @@ tcp_conn_t *tcp_conn; } } +PUBLIC unsigned tcp_sel_write(tcp_conn) +tcp_conn_t *tcp_conn; +{ + tcp_fd_t *tcp_fd; + int urg, nourg; + u32_t max_seq; + size_t max_trans; + + tcp_fd= tcp_conn->tc_fd; + + if (tcp_conn->tc_state == TCS_CLOSED) + return 1; + + urg= (tcp_fd->tf_flags & TFF_WR_URG); + + max_seq= tcp_conn->tc_SND_UNA + TCP_MAX_SND_WND_SIZE; + max_trans= max_seq - tcp_conn->tc_SND_NXT; + if (max_trans) + { + if (tcp_conn->tc_flags & TCF_BSD_URG) + { + if (tcp_Gmod4G(tcp_conn->tc_SND_NXT, + tcp_conn->tc_SND_UNA)) + { + nourg= tcp_LEmod4G(tcp_conn->tc_SND_UP, + tcp_conn->tc_SND_UNA); + if ((urg && nourg) || (!urg && !nourg)) + { + DBLOCK(0x20, + printf("not sending\n")); + return 0; + } + } + } + return 1; + } + + return 0; +} + +PUBLIC void +tcp_rsel_write(tcp_conn) +tcp_conn_t *tcp_conn; +{ + tcp_fd_t *tcp_fd; + + if (tcp_sel_write(tcp_conn) == 0) + return; + + tcp_fd= tcp_conn->tc_fd; + tcp_fd->tf_flags &= ~TFF_SEL_WRITE; + if (tcp_fd->tf_select_res) + tcp_fd->tf_select_res(tcp_fd->tf_srfd, SR_SELECT_WRITE); + else + printf("tcp_rsel_write: no select_res\n"); +} + /* tcp_shutdown */ @@ -957,40 +1255,39 @@ tcp_conn_t *tcp_conn; if (tcp_conn->tc_flags & TCF_FIN_SENT) return; tcp_conn->tc_flags |= TCF_FIN_SENT; + tcp_conn->tc_flags &= ~TCF_NO_PUSH; tcp_conn->tc_SND_NXT++; + tcp_conn->tc_SND_PSH= tcp_conn->tc_SND_NXT; assert (tcp_check_conn(tcp_conn) || (tcp_print_conn(tcp_conn), printf("\n"), 0)); tcp_conn_write(tcp_conn, 1); - /* Start the timer (if necessary) */ + /* Start the timer */ tcp_set_send_timer(tcp_conn); } PUBLIC void tcp_set_send_timer(tcp_conn) tcp_conn_t *tcp_conn; { - time_t curr_time; + clock_t curr_time; + clock_t rtt; assert(tcp_conn->tc_state != TCS_CLOSED); assert(tcp_conn->tc_state != TCS_LISTEN); curr_time= get_time(); - - /* Start the timer */ + rtt= tcp_conn->tc_rtt; DBLOCK(0x20, printf( "tcp_set_send_timer: conn[%d] setting timer to %ld ms (+%ld ms)\n", tcp_conn-tcp_conn_table, - (curr_time+tcp_conn->tc_rtt)*1000/HZ, - tcp_conn->tc_rtt*1000/HZ)); + (curr_time+rtt)*1000/HZ, rtt*1000/HZ)); + /* Start the timer */ clck_timer(&tcp_conn->tc_transmit_timer, - curr_time+tcp_conn->tc_rtt, - tcp_send_timeout, tcp_conn-tcp_conn_table); - tcp_conn->tc_stt= curr_time; - + curr_time+rtt, tcp_send_timeout, tcp_conn-tcp_conn_table); tcp_conn->tc_stt= curr_time; } @@ -1007,7 +1304,8 @@ int error; tcp_fd_t *tcp_fd; tcp_conn_t *tc; - assert (tcp_check_conn(tcp_conn)); + assert (tcp_check_conn(tcp_conn) || + (tcp_print_conn(tcp_conn), printf("\n"), 0)); assert (tcp_conn->tc_flags & TCF_INUSE); tcp_conn->tc_error= error; @@ -1027,6 +1325,8 @@ int error; if (tcp_fd->tf_flags & TFF_READ_IP) tcp_fd_read (tcp_conn, 1); assert (!(tcp_fd->tf_flags & TFF_READ_IP)); + if (tcp_fd->tf_flags & TFF_SEL_READ) + tcp_rsel_read (tcp_conn); if (tcp_fd->tf_flags & TFF_WRITE_IP) { @@ -1041,12 +1341,15 @@ int error; } if (tcp_fd->tf_flags & TFF_IOCTL_IP) assert(tcp_fd->tf_ioreq != NWIOTCPSHUTDOWN); + if (tcp_fd->tf_flags & TFF_SEL_WRITE) + tcp_rsel_write(tcp_conn); if (tcp_conn->tc_connInprogress) tcp_restart_connect(tcp_conn->tc_fd); assert (!tcp_conn->tc_connInprogress); assert (!(tcp_fd->tf_flags & TFF_IOCTL_IP) || - (printf("req= 0x%lx\n", tcp_fd->tf_ioreq), 0)); + (printf("req= 0x%lx\n", + (unsigned long)tcp_fd->tf_ioreq), 0)); tcp_conn->tc_busy--; } @@ -1120,5 +1423,5 @@ int error; } /* - * $PchId: tcp_send.c,v 1.12 1996/12/17 07:57:11 philip Exp $ + * $PchId: tcp_send.c,v 1.32 2005/06/28 14:21:52 philip Exp $ */ diff --git a/servers/inet/generic/type.h b/servers/inet/generic/type.h index d4a23b0ab..7a588b051 100644 --- a/servers/inet/generic/type.h +++ b/servers/inet/generic/type.h @@ -12,9 +12,10 @@ typedef struct acc *(*get_userdata_t) ARGS(( int fd, size_t offset, typedef int (*put_userdata_t) ARGS(( int fd, size_t offset, struct acc *data, int for_ioctl )); typedef void (*put_pkt_t) ARGS(( int fd, struct acc *data, size_t datalen )); +typedef void (*select_res_t) ARGS(( int fd, unsigned ops )); #endif /* INET_TYPE_H */ /* - * $PchId: type.h,v 1.5 1995/11/21 06:51:58 philip Exp $ + * $PchId: type.h,v 1.6 2005/06/28 14:22:04 philip Exp $ */ diff --git a/servers/inet/generic/udp.c b/servers/inet/generic/udp.c index 95211a7c8..9fdf0016d 100644 --- a/servers/inet/generic/udp.c +++ b/servers/inet/generic/udp.c @@ -15,74 +15,22 @@ Copyright 1995 Philip Homburg #include "ip.h" #include "sr.h" #include "udp.h" +#include "udp_int.h" THIS_FILE -#define UDP_FD_NR (4*IP_PORT_MAX) -#define UDP_PORT_HASH_NR 16 /* Must be a power of 2 */ - -typedef struct udp_port -{ - int up_flags; - int up_state; - int up_ipfd; - int up_ipdev; - acc_t *up_wr_pack; - ipaddr_t up_ipaddr; - struct udp_fd *up_next_fd; - struct udp_fd *up_write_fd; - struct udp_fd *up_port_any; - struct udp_fd *up_port_hash[UDP_PORT_HASH_NR]; -} udp_port_t; - -#define UPF_EMPTY 0x0 -#define UPF_WRITE_IP 0x1 -#define UPF_WRITE_SP 0x2 -#define UPF_READ_IP 0x4 -#define UPF_READ_SP 0x8 -#define UPF_SUSPEND 0x10 -#define UPF_MORE2WRITE 0x20 - -#define UPS_EMPTY 0 -#define UPS_SETPROTO 1 -#define UPS_GETCONF 2 -#define UPS_MAIN 3 -#define UPS_ERROR 4 - -typedef struct udp_fd -{ - int uf_flags; - udp_port_t *uf_port; - ioreq_t uf_ioreq; - int uf_srfd; - nwio_udpopt_t uf_udpopt; - get_userdata_t uf_get_userdata; - put_userdata_t uf_put_userdata; - acc_t *uf_rdbuf_head; - acc_t *uf_rdbuf_tail; - size_t uf_rd_count; - size_t uf_wr_count; - time_t uf_exp_tim; - struct udp_fd *uf_port_next; -} udp_fd_t; - -#define UFF_EMPTY 0x0 -#define UFF_INUSE 0x1 -#define UFF_IOCTL_IP 0x2 -#define UFF_READ_IP 0x4 -#define UFF_WRITE_IP 0x8 -#define UFF_OPTSET 0x10 - FORWARD void read_ip_packets ARGS(( udp_port_t *udp_port )); FORWARD void udp_buffree ARGS(( int priority )); #ifdef BUF_CONSISTENCY_CHECK FORWARD void udp_bufcheck ARGS(( void )); #endif FORWARD void udp_main ARGS(( udp_port_t *udp_port )); +FORWARD int udp_select ARGS(( int fd, unsigned operations )); FORWARD acc_t *udp_get_data ARGS(( int fd, size_t offset, size_t count, int for_ioctl )); FORWARD int udp_put_data ARGS(( int fd, size_t offset, acc_t *data, int for_ioctl )); +FORWARD int udp_peek ARGS(( udp_fd_t * )); FORWARD void udp_restart_write_port ARGS(( udp_port_t *udp_port )); FORWARD void udp_ip_arrived ARGS(( int port, acc_t *pack, size_t pack_size )); FORWARD void reply_thr_put ARGS(( udp_fd_t *udp_fd, int reply, @@ -96,24 +44,23 @@ FORWARD int udp_packet2user ARGS(( udp_fd_t *udp_fd )); FORWARD void restart_write_fd ARGS(( udp_fd_t *udp_fd )); FORWARD u16_t pack_oneCsum ARGS(( acc_t *pack )); FORWARD void udp_rd_enqueue ARGS(( udp_fd_t *udp_fd, acc_t *pack, - time_t exp_tim )); + clock_t exp_tim )); FORWARD void hash_fd ARGS(( udp_fd_t *udp_fd )); FORWARD void unhash_fd ARGS(( udp_fd_t *udp_fd )); -PRIVATE udp_port_t *udp_port_table; -PRIVATE udp_fd_t udp_fd_table[UDP_FD_NR]; +PUBLIC udp_port_t *udp_port_table; +PUBLIC udp_fd_t udp_fd_table[UDP_FD_NR]; PUBLIC void udp_prep() { - udp_port_table= alloc(ip_conf_nr * sizeof(udp_port_table[0])); + udp_port_table= alloc(udp_conf_nr * sizeof(udp_port_table[0])); } PUBLIC void udp_init() { udp_fd_t *udp_fd; udp_port_t *udp_port; - struct ip_conf *icp; - int i, j; + int i, j, ifno; assert (BUF_S >= sizeof(struct nwio_ipopt)); assert (BUF_S >= sizeof(struct nwio_ipconf)); @@ -122,13 +69,11 @@ PUBLIC void udp_init() assert (UDP_HDR_SIZE == sizeof(udp_hdr_t)); assert (UDP_IO_HDR_SIZE == sizeof(udp_io_hdr_t)); -#if ZERO for (i= 0, udp_fd= udp_fd_table; iuf_flags= UFF_EMPTY; udp_fd->uf_rdbuf_head= NULL; } -#endif #ifndef BUF_CONSISTENCY_CHECK bf_logon(udp_buffree); @@ -136,31 +81,147 @@ PUBLIC void udp_init() bf_logon(udp_buffree, udp_bufcheck); #endif - for (i= 0, udp_port= udp_port_table, icp= ip_conf; - iup_ipdev= i; + udp_port->up_ipdev= udp_conf[i].uc_port; -#if ZERO udp_port->up_flags= UPF_EMPTY; udp_port->up_state= UPS_EMPTY; -#endif udp_port->up_next_fd= udp_fd_table; -#if ZERO udp_port->up_write_fd= NULL; udp_port->up_port_any= NULL; for (j= 0; jup_port_hash[j]= NULL; -#endif - sr_add_minor(if2minor(icp->ic_ifno, UDP_DEV_OFF), + ifno= ip_conf[udp_port->up_ipdev].ic_ifno; + sr_add_minor(if2minor(ifno, UDP_DEV_OFF), i, udp_open, udp_close, udp_read, - udp_write, udp_ioctl, udp_cancel); + udp_write, udp_ioctl, udp_cancel, udp_select); udp_main(udp_port); } } +PUBLIC int udp_open (port, srfd, get_userdata, put_userdata, put_pkt, + select_res) +int port; +int srfd; +get_userdata_t get_userdata; +put_userdata_t put_userdata; +put_pkt_t put_pkt; +select_res_t select_res; +{ + int i; + udp_fd_t *udp_fd; + + for (i= 0; i= UDP_FD_NR) + { + DBLOCK(1, printf("out of fds\n")); + return EAGAIN; + } + + udp_fd= &udp_fd_table[i]; + + udp_fd->uf_flags= UFF_INUSE; + udp_fd->uf_port= &udp_port_table[port]; + udp_fd->uf_srfd= srfd; + udp_fd->uf_udpopt.nwuo_flags= UDP_DEF_OPT; + udp_fd->uf_get_userdata= get_userdata; + udp_fd->uf_put_userdata= put_userdata; + assert(udp_fd->uf_rdbuf_head == NULL); + udp_fd->uf_port_next= NULL; + + return i; + +} + +PUBLIC int udp_ioctl (fd, req) +int fd; +ioreq_t req; +{ + udp_fd_t *udp_fd; + udp_port_t *udp_port; + nwio_udpopt_t *udp_opt; + acc_t *opt_acc; + int result; + + udp_fd= &udp_fd_table[fd]; + +assert (udp_fd->uf_flags & UFF_INUSE); + + udp_port= udp_fd->uf_port; + udp_fd->uf_flags |= UFF_IOCTL_IP; + udp_fd->uf_ioreq= req; + + if (udp_port->up_state != UPS_MAIN) + return NW_SUSPEND; + + switch(req) + { + case NWIOSUDPOPT: + result= udp_setopt(udp_fd); + break; + case NWIOGUDPOPT: + opt_acc= bf_memreq(sizeof(*udp_opt)); +assert (opt_acc->acc_length == sizeof(*udp_opt)); + udp_opt= (nwio_udpopt_t *)ptr2acc_data(opt_acc); + + *udp_opt= udp_fd->uf_udpopt; + udp_opt->nwuo_locaddr= udp_fd->uf_port->up_ipaddr; + result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, 0, opt_acc, + TRUE); + if (result == NW_OK) + reply_thr_put(udp_fd, NW_OK, TRUE); + break; + case NWIOUDPPEEK: + result= udp_peek(udp_fd); + break; + default: + reply_thr_get(udp_fd, EBADIOCTL, TRUE); + result= NW_OK; + break; + } + if (result != NW_SUSPEND) + udp_fd->uf_flags &= ~UFF_IOCTL_IP; + return result; +} + +PUBLIC int udp_read (fd, count) +int fd; +size_t count; +{ + udp_fd_t *udp_fd; + acc_t *tmp_acc, *next_acc; + + udp_fd= &udp_fd_table[fd]; + if (!(udp_fd->uf_flags & UFF_OPTSET)) + { + reply_thr_put(udp_fd, EBADMODE, FALSE); + return NW_OK; + } + + udp_fd->uf_rd_count= count; + + if (udp_fd->uf_rdbuf_head) + { + if (get_time() <= udp_fd->uf_exp_tim) + return udp_packet2user (udp_fd); + tmp_acc= udp_fd->uf_rdbuf_head; + while (tmp_acc) + { + next_acc= tmp_acc->acc_ext_link; + bf_afree(tmp_acc); + tmp_acc= next_acc; + } + udp_fd->uf_rdbuf_head= NULL; + } + udp_fd->uf_flags |= UFF_READ_IP; + return NW_SUSPEND; +} + PRIVATE void udp_main(udp_port) udp_port_t *udp_port; { @@ -174,7 +235,7 @@ udp_port_t *udp_port; udp_port->up_ipfd= ip_open(udp_port->up_ipdev, udp_port->up_ipdev, udp_get_data, udp_put_data, - udp_ip_arrived); + udp_ip_arrived, 0 /* no select_res */); if (udp_port->up_ipfd < 0) { udp_port->up_state= UPS_ERROR; @@ -220,47 +281,20 @@ udp_port_t *udp_port; } read_ip_packets(udp_port); return; -#if !CRAMPED default: DBLOCK(1, printf("udp_port_table[%d].up_state= %d\n", udp_port->up_ipdev, udp_port->up_state)); ip_panic(( "unknown state" )); -#endif + break; } } -int udp_open (port, srfd, get_userdata, put_userdata, put_pkt) -int port; -int srfd; -get_userdata_t get_userdata; -put_userdata_t put_userdata; -put_pkt_t put_pkt; +PRIVATE int udp_select(fd, operations) +int fd; +unsigned operations; { - int i; - udp_fd_t *udp_fd; - - for (i= 0; i= UDP_FD_NR) - { - DBLOCK(1, printf("out of fds\n")); - return EAGAIN; - } - - udp_fd= &udp_fd_table[i]; - - udp_fd->uf_flags= UFF_INUSE; - udp_fd->uf_port= &udp_port_table[port]; - udp_fd->uf_srfd= srfd; - udp_fd->uf_udpopt.nwuo_flags= UDP_DEF_OPT; - udp_fd->uf_get_userdata= get_userdata; - udp_fd->uf_put_userdata= put_userdata; - assert(udp_fd->uf_rdbuf_head == NULL); - udp_fd->uf_port_next= NULL; - - return i; - + printf("udp_select: not implemented\n"); + return 0; } PRIVATE acc_t *udp_get_data (port, offset, count, for_ioctl) @@ -342,10 +376,8 @@ assert (udp_port->up_wr_pack); } break; default: -#if !CRAMPED printf("udp_get_data(%d, 0x%x, 0x%x) called but up_state= 0x%x\n", port, offset, count, udp_port->up_state); -#endif break; } return NULL; @@ -412,72 +444,20 @@ assert (!offset); /* This isn't a valid assertion but ip sends only udp_ip_arrived(fd, data, bf_bufsize(data)); } break; -#if !CRAMPED default: ip_panic(( - "udp_put_data(%d, 0x%x, 0x%x) called but up_state= 0x%x\n", + "udp_put_data(%d, 0x%x, %p) called but up_state= 0x%x\n", fd, offset, data, udp_port->up_state )); -#endif } return NW_OK; } -int udp_ioctl (fd, req) -int fd; -ioreq_t req; -{ - udp_fd_t *udp_fd; - udp_port_t *udp_port; - nwio_udpopt_t *udp_opt; - acc_t *opt_acc; - int result; - - udp_fd= &udp_fd_table[fd]; - -assert (udp_fd->uf_flags & UFF_INUSE); - - udp_port= udp_fd->uf_port; - udp_fd->uf_flags |= UFF_IOCTL_IP; - udp_fd->uf_ioreq= req; - - if (udp_port->up_state != UPS_MAIN) - return NW_SUSPEND; - - switch(req) - { - case NWIOSUDPOPT: - result= udp_setopt(udp_fd); - break; - case NWIOGUDPOPT: - opt_acc= bf_memreq(sizeof(*udp_opt)); -assert (opt_acc->acc_length == sizeof(*udp_opt)); - udp_opt= (nwio_udpopt_t *)ptr2acc_data(opt_acc); - - *udp_opt= udp_fd->uf_udpopt; - udp_opt->nwuo_locaddr= udp_fd->uf_port->up_ipaddr; - result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, 0, opt_acc, - TRUE); - if (result == NW_OK) - reply_thr_put(udp_fd, NW_OK, TRUE); - break; - default: - reply_thr_get(udp_fd, EBADIOCTL, TRUE); - result= NW_OK; - break; - } - if (result != NW_SUSPEND) - udp_fd->uf_flags &= ~UFF_IOCTL_IP; - return result; -} - PRIVATE int udp_setopt(udp_fd) udp_fd_t *udp_fd; { udp_fd_t *fd_ptr; nwio_udpopt_t oldopt, newopt; acc_t *data; - int result; - udpport_t port; unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags, all_flags, flags; unsigned long new_flags; @@ -675,20 +655,20 @@ int fd; { udpport_t port, nw_port; - nw_port= htons(0xC000+fd); - if (is_unused_port(nw_port)) - return nw_port; - - for (port= 0xC000+UDP_FD_NR; port < 0xFFFF; port++) + for (port= 0x8000+fd; port < 0xffff-UDP_FD_NR; port+= UDP_FD_NR) + { + nw_port= htons(port); + if (is_unused_port(nw_port)) + return nw_port; + } + for (port= 0x8000; port < 0xffff; port++) { nw_port= htons(port); if (is_unused_port(nw_port)) return nw_port; } -#if !CRAMPED ip_panic(( "unable to find unused port (shouldn't occur)" )); return 0; -#endif } /* @@ -759,26 +739,34 @@ assert(result == NW_OK); } -PUBLIC int udp_read (fd, count) -int fd; -size_t count; +PRIVATE int udp_peek (udp_fd) +udp_fd_t *udp_fd; { - udp_fd_t *udp_fd; - acc_t *tmp_acc, *next_acc; + acc_t *pack, *tmp_acc, *next_acc; + int result; - udp_fd= &udp_fd_table[fd]; if (!(udp_fd->uf_flags & UFF_OPTSET)) { - reply_thr_put(udp_fd, EBADMODE, FALSE); + udp_fd->uf_flags &= ~UFF_IOCTL_IP; + reply_thr_put(udp_fd, EBADMODE, TRUE); return NW_OK; } - udp_fd->uf_rd_count= count; - if (udp_fd->uf_rdbuf_head) { if (get_time() <= udp_fd->uf_exp_tim) - return udp_packet2user (udp_fd); + { + pack= bf_cut(udp_fd->uf_rdbuf_head, 0, + sizeof(udp_io_hdr_t)); + result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, + (size_t)0, pack, TRUE); + + udp_fd->uf_flags &= ~UFF_IOCTL_IP; + result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, + result, (acc_t *)0, TRUE); + assert (result == 0); + return result; + } tmp_acc= udp_fd->uf_rdbuf_head; while (tmp_acc) { @@ -788,7 +776,7 @@ size_t count; } udp_fd->uf_rdbuf_head= NULL; } - udp_fd->uf_flags |= UFF_READ_IP; + udp_fd->uf_flags |= UFF_PEEK_IP; return NW_SUSPEND; } @@ -847,7 +835,7 @@ udp_fd_t *udp_fd; udp_fd->uf_flags &= ~UFF_READ_IP; result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, result, (acc_t *)0, FALSE); -assert (result == 0); + assert (result == 0); return result; } @@ -864,12 +852,12 @@ size_t pack_size; udp_hdr_t *udp_hdr; udp_io_hdr_t *udp_io_hdr; size_t ip_hdr_size, udp_size, data_size, opt_size; - ipaddr_t src_addr, dst_addr; + ipaddr_t src_addr, dst_addr, ipaddr; udpport_t src_port, dst_port; u8_t u16[2]; u16_t chksum; unsigned long dst_type, flags; - time_t exp_tim; + clock_t exp_tim; int i, delivered, hash; udp_port= &udp_port_table[port]; @@ -886,19 +874,29 @@ size_t pack_size; ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_hdr_acc); } - udp_acc= bf_delhead(pack, ip_hdr_size); - pack= NULL; - pack_size -= ip_hdr_size; if (pack_size < UDP_HDR_SIZE) { - DBLOCK(1, printf("packet too small\n")); + if (pack_size == 0 && ip_hdr->ih_proto == 0) + { + /* IP layer reports new IP address */ + ipaddr= ip_hdr->ih_src; + udp_port->up_ipaddr= ipaddr; + DBLOCK(1, printf("udp_ip_arrived: using address "); + writeIpAddr(ipaddr); printf("\n")); + } + else + DBLOCK(1, printf("packet too small\n")); bf_afree(ip_hdr_acc); - bf_afree(udp_acc); + bf_afree(pack); return; } + udp_acc= bf_delhead(pack, ip_hdr_size); + pack= NULL; + + udp_acc= bf_packIffLess(udp_acc, UDP_HDR_SIZE); udp_hdr= (udp_hdr_t *)ptr2acc_data(udp_acc); udp_size= ntohs(udp_hdr->uh_length); @@ -1262,12 +1260,10 @@ assert (!udp_port->up_wr_pack); ip_hdr->ih_vers_ihl= (IP_MIN_HDR_SIZE+ip_opt_size) >> 2; ip_hdr->ih_tos= UDP_TOS; ip_hdr->ih_flags_fragoff= HTONS(UDP_IP_FLAGS); - ip_hdr->ih_ttl= UDP_TTL; + ip_hdr->ih_ttl= IP_DEF_TTL; ip_hdr->ih_proto= IPPROTO_UDP; if (flags & NWUO_RA_SET) { - DBLOCK(1, printf("NWUO_RA_SET\n")); - ip_hdr->ih_dst= udp_fd->uf_udpopt.nwuo_remaddr; } else @@ -1445,12 +1441,11 @@ assert (udp_fd->uf_flags & UFF_WRITE_IP); case SR_CANCEL_IOCTL: assert (udp_fd->uf_flags & UFF_IOCTL_IP); udp_fd->uf_flags &= ~UFF_IOCTL_IP; + udp_fd->uf_flags &= ~UFF_PEEK_IP; reply_thr_get(udp_fd, EINTR, TRUE); break; -#if !CRAMPED default: ip_panic(( "got unknown cancel request" )); -#endif } return NW_OK; } @@ -1459,9 +1454,8 @@ PRIVATE void udp_buffree (priority) int priority; { int i; - time_t curr_tim; udp_fd_t *udp_fd; - acc_t *tmp_acc, *next_acc; + acc_t *tmp_acc; if (priority == UDP_PRI_FDBUFS_EXTRA) { @@ -1494,9 +1488,10 @@ int priority; PRIVATE void udp_rd_enqueue(udp_fd, pack, exp_tim) udp_fd_t *udp_fd; acc_t *pack; -time_t exp_tim; +clock_t exp_tim; { acc_t *tmp_acc; + int result; if (pack->acc_linkC != 1) { @@ -1513,6 +1508,20 @@ time_t exp_tim; else udp_fd->uf_rdbuf_tail->acc_ext_link= pack; udp_fd->uf_rdbuf_tail= pack; + + if (udp_fd->uf_flags & UFF_PEEK_IP) + { + pack= bf_cut(udp_fd->uf_rdbuf_head, 0, + sizeof(udp_io_hdr_t)); + result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, + (size_t)0, pack, TRUE); + + udp_fd->uf_flags &= ~UFF_IOCTL_IP; + udp_fd->uf_flags &= ~UFF_PEEK_IP; + result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, + result, (acc_t *)0, TRUE); + assert (result == 0); + } } PRIVATE void hash_fd(udp_fd) @@ -1581,7 +1590,7 @@ PRIVATE void udp_bufcheck() udp_fd_t *udp_fd; acc_t *tmp_acc; - for (i= 0, udp_port= udp_port_table; iup_wr_pack) bf_check_acc(udp_port->up_wr_pack); @@ -1599,5 +1608,5 @@ PRIVATE void udp_bufcheck() #endif /* - * $PchId: udp.c,v 1.10 1996/08/06 06:48:05 philip Exp $ + * $PchId: udp.c,v 1.25 2005/06/28 14:14:44 philip Exp $ */ diff --git a/servers/inet/generic/udp.h b/servers/inet/generic/udp.h index b2190ca78..4a0b58290 100644 --- a/servers/inet/generic/udp.h +++ b/servers/inet/generic/udp.h @@ -12,7 +12,6 @@ Copyright 1995 Philip Homburg #define UDP_READ_EXP_TIME (10L * HZ) #define UDP_TOS 0 #define UDP_IP_FLAGS 0 -#define UDP_TTL 30 #define UDP0 0 @@ -22,7 +21,7 @@ void udp_prep ARGS(( void )); void udp_init ARGS(( void )); int udp_open ARGS(( int port, int srfd, get_userdata_t get_userdata, put_userdata_t put_userdata, - put_pkt_t put_pkt )); + put_pkt_t put_pkt, select_res_t select_res )); int udp_ioctl ARGS(( int fd, ioreq_t req )); int udp_read ARGS(( int fd, size_t count )); int udp_write ARGS(( int fd, size_t count )); @@ -33,5 +32,5 @@ int udp_cancel ARGS(( int fd, int which_operation )); /* - * $PchId: udp.h,v 1.6 1996/05/07 20:53:31 philip Exp $ + * $PchId: udp.h,v 1.9 2005/06/28 14:12:05 philip Exp $ */ diff --git a/servers/inet/generic/udp_int.h b/servers/inet/generic/udp_int.h new file mode 100644 index 000000000..9977f5c48 --- /dev/null +++ b/servers/inet/generic/udp_int.h @@ -0,0 +1,70 @@ +/* +generic/udp_int.h + +Created: March 2001 by Philip Homburg + +Some internals of the UDP module +*/ + +#define UDP_FD_NR (4*IP_PORT_MAX) +#define UDP_PORT_HASH_NR 16 /* Must be a power of 2 */ + +typedef struct udp_port +{ + int up_flags; + int up_state; + int up_ipfd; + int up_ipdev; + acc_t *up_wr_pack; + ipaddr_t up_ipaddr; + struct udp_fd *up_next_fd; + struct udp_fd *up_write_fd; + struct udp_fd *up_port_any; + struct udp_fd *up_port_hash[UDP_PORT_HASH_NR]; +} udp_port_t; + +#define UPF_EMPTY 0x0 +#define UPF_WRITE_IP 0x1 +#define UPF_WRITE_SP 0x2 +#define UPF_READ_IP 0x4 +#define UPF_READ_SP 0x8 +#define UPF_SUSPEND 0x10 +#define UPF_MORE2WRITE 0x20 + +#define UPS_EMPTY 0 +#define UPS_SETPROTO 1 +#define UPS_GETCONF 2 +#define UPS_MAIN 3 +#define UPS_ERROR 4 + +typedef struct udp_fd +{ + int uf_flags; + udp_port_t *uf_port; + ioreq_t uf_ioreq; + int uf_srfd; + nwio_udpopt_t uf_udpopt; + get_userdata_t uf_get_userdata; + put_userdata_t uf_put_userdata; + acc_t *uf_rdbuf_head; + acc_t *uf_rdbuf_tail; + size_t uf_rd_count; + size_t uf_wr_count; + clock_t uf_exp_tim; + struct udp_fd *uf_port_next; +} udp_fd_t; + +#define UFF_EMPTY 0x0 +#define UFF_INUSE 0x1 +#define UFF_IOCTL_IP 0x2 +#define UFF_READ_IP 0x4 +#define UFF_WRITE_IP 0x8 +#define UFF_OPTSET 0x10 +#define UFF_PEEK_IP 0x20 + +EXTERN udp_port_t *udp_port_table; +EXTERN udp_fd_t udp_fd_table[UDP_FD_NR]; + +/* + * $PchId: udp_int.h,v 1.4 2004/08/03 11:12:01 philip Exp $ + */ diff --git a/servers/inet/inet.c b/servers/inet/inet.c index 7cc6114d7..1970af81d 100644 --- a/servers/inet/inet.c +++ b/servers/inet/inet.c @@ -3,12 +3,6 @@ Copyright 1995 Philip Homburg -Changes: - Oct 10, 2004 Get own process number with SYS_GETINFO (Jorrit N. Herder) - Sep 30, 2004 Updated system calls done in clock.c. (Jorrit N. Herder) - Sep 15, 2004 Exit on HARD_STOP notification (Jorrit N. Herder) - Aug 24, 2004 Alarms no longer from SYNALRM task (Jorrit N. Herder) - The valid messages and their parameters are: from FS: @@ -35,65 +29,175 @@ from FS: | NW_CANCEL | minor dev | proc nr | | | | |_______________|___________|_________|_______|__________|_________| -from the Ethernet task: +from DL_ETH: _______________________________________________________________________ | | | | | | | | m_type | DL_PORT | DL_PROC | DL_COUNT | DL_STAT | DL_TIME | |_______________|___________|_________|__________|____________|_________| | | | | | | | -| DL_TASK_INT | minor dev | proc nr | rd_count | 0 | stat | time | +| DL_INIT_REPLY | minor dev | proc nr | rd_count | 0 | stat | time | |_______________|___________|_________|__________|____________|_________| | | | | | | | -| DL_TASK_REPLY | minor dev | proc nr | rd_count | err | stat | time | | +| DL_TASK_REPLY | minor dev | proc nr | rd_count | err | stat | time | |_______________|___________|_________|__________|____________|_________| */ #include "inet.h" -#define _MINIX 1 +#define _MINIX_SOURCE 1 +#include +#include #include #include -#include -#include #include "mq.h" +#include "qp.h" #include "proto.h" #include "generic/type.h" +#include "generic/arp.h" #include "generic/assert.h" #include "generic/buf.h" #include "generic/clock.h" #include "generic/eth.h" #include "generic/event.h" -#if !CRAMPED -#include "generic/arp.h" #include "generic/ip.h" #include "generic/psip.h" +#include "generic/rand256.h" #include "generic/sr.h" #include "generic/tcp.h" #include "generic/udp.h" -#endif THIS_FILE +#define RANDOM_DEV_NAME "/dev/random" + int this_proc; /* Process number of this server. */ +#ifdef __minix_vmd +static int synal_tasknr= ANY; +#endif + +/* Killing Solaris */ +int killer_inet= 0; + #ifdef BUF_CONSISTENCY_CHECK extern int inet_buf_debug; #endif _PROTOTYPE( void main, (void) ); +FORWARD _PROTOTYPE( void nw_conf, (void) ); FORWARD _PROTOTYPE( void nw_init, (void) ); PUBLIC void main() { mq_t *mq; int r; - int source; + int source, timerand, fd; + struct fssignon device; +#ifdef __minix_vmd + struct systaskinfo info; +#endif + u8_t randbits[32]; + struct timeval tv; - DBLOCK(1, printf("%s\n", version)); + printf("Hello, in inet\n"); +#if DEBUG + printf("Starting inet...\n"); + printf("%s\n", version); +#endif + + /* Read configuration. */ + nw_conf(); + + /* Get a random number */ + timerand= 1; + fd= open(RANDOM_DEV_NAME, O_RDONLY | O_NONBLOCK); + if (fd != -1) + { + r= read(fd, randbits, sizeof(randbits)); + if (r == sizeof(randbits)) + timerand= 0; + else + { + printf("unable to read random data from %s: %s\n", + RANDOM_DEV_NAME, r == -1 ? strerror(errno) : + r == 0 ? "EOF" : "not enough data"); + } + close(fd); + } + else + { + printf("unable to open random device %s: %s\n", + RANDOM_DEV_NAME, strerror(errno)); + } + if (timerand) + { + printf("using current time for random-number seed\n"); +#ifdef __minix_vmd + r= sysutime(UTIME_TIMEOFDAY, &tv); +#else /* Minix 3 */ + r= gettimeofday(&tv, NULL); +#endif + if (r == -1) + { + printf("sysutime failed: %s\n", strerror(errno)); + exit(1); + } + memcpy(randbits, &tv, sizeof(tv)); + } + init_rand256(randbits); + + /* Sign on as a server at all offices in the proper order. */ + if (svrctl(MMSIGNON, (void *) NULL) == -1) { + printf("inet: server signon failed\n"); + exit(1); + } +#ifdef __minix_vmd + if (svrctl(SYSSIGNON, (void *) &info) == -1) pause(); + + /* Our new identity as a server. */ + this_proc = info.proc_nr; +#else /* Minix 3 */ + if (svrctl(SYSSIGNON, (void *) NULL) == -1) pause(); + + /* Our new identity as a server. */ + if (getprocnr(&this_proc) != OK) + ip_panic(( "unable to get own process nr\n")); +#endif + + /* Register the device group. */ + device.dev= ip_dev; + device.style= STYLE_CLONE; + if (svrctl(FSSIGNON, (void *) &device) == -1) { + printf("inet: error %d on registering ethernet devices\n", + errno); + pause(); + } + +#ifdef BUF_CONSISTENCY_CHECK + inet_buf_debug= (getenv("inetbufdebug") && + (strcmp(getenv("inetbufdebug"), "on") == 0)); + inet_buf_debug= 100; + if (inet_buf_debug) + { + ip_warning(( "buffer consistency check enabled" )); + } +#endif + + if (getenv("killerinet")) + { + ip_warning(( "killer inet active" )); + killer_inet= 1; + } + +#ifdef __minix_vmd + r= sys_findproc(SYN_AL_NAME, &synal_tasknr, 0); + if (r != OK) + ip_panic(( "unable to find synchronous alarm task: %d\n", r )); +#endif nw_init(); while (TRUE) @@ -103,7 +207,7 @@ PUBLIC void main() { static int buf_debug_count= 0; - if (buf_debug_count++ > inet_buf_debug) + if (++buf_debug_count >= inet_buf_debug) { buf_debug_count= 0; if (!bf_consistency_check()) @@ -126,22 +230,36 @@ PUBLIC void main() ip_panic(("out of messages")); r= receive (ANY, &mq->mq_mess); - if (r<0) { + if (r<0) + { ip_panic(("unable to receive: %d", r)); } reset_time(); source= mq->mq_mess.m_source; - if (source == FS_PROC_NR) { - sr_rec(mq); - } else if (mq->mq_mess.m_type & NOTIFICATION ) + if (source == FS_PROC_NR) { - if (mq->mq_mess.m_type == SYN_ALARM) { + sr_rec(mq); + } +#ifdef __minix_vmd + else if (source == synal_tasknr) + { + clck_tick (&mq->mq_mess); + mq_free(mq); + } +#else /* Minix 3 */ + else if (mq->mq_mess.m_type & NOTIFICATION) + { + if (mq->mq_mess.m_type == SYN_ALARM) + { clck_tick(&mq->mq_mess); mq_free(mq); - } else if (mq->mq_mess.m_type == HARD_STOP) { + } + else if (mq->mq_mess.m_type == HARD_STOP) + { sys_exit(0); } - } + } +#endif else { compare(mq->mq_mess.m_type, ==, DL_TASK_REPLY); @@ -152,12 +270,8 @@ compare(mq->mq_mess.m_type, ==, DL_TASK_REPLY); ip_panic(("task is not allowed to terminate")); } -PRIVATE void nw_init() +PRIVATE void nw_conf() { - struct fssignon device; - int pnr; - - /* Read configuration. */ read_conf(); eth_prep(); arp_prep(); @@ -165,59 +279,23 @@ PRIVATE void nw_init() ip_prep(); tcp_prep(); udp_prep(); +} - - /* Sign on as a server at all offices in the proper order. */ - if (svrctl(MMSIGNON, (void *) NULL) == -1) { - printf("inet: server signon failed\n"); - exit(1); - } - if (svrctl(SYSSIGNON, (void *) NULL) == -1) pause(); - - /* Our new identity as a server. */ - if (getprocnr(&this_proc) != OK) - ip_panic(( "unable to get own process nr\n")); - - /* Register the device group. */ - device.dev= ip_dev; - device.style= STYLE_CLONE; - if (svrctl(FSSIGNON, (void *) &device) == -1) { - printf("inet: error %d on registering ethernet devices\n", - errno); - pause(); - } - - -#ifdef BUF_CONSISTENCY_CHECK - inet_buf_debug= 100; - if (inet_buf_debug) - { - ip_warning(( "buffer consistency check enabled" )); - } -#endif +PRIVATE void nw_init() +{ mq_init(); + qp_init(); bf_init(); clck_init(); sr_init(); eth_init(); -#if ENABLE_ARP arp_init(); -#endif -#if ENABLE_PSIP psip_init(); -#endif -#if ENABLE_IP ip_init(); -#endif -#if ENABLE_TCP tcp_init(); -#endif -#if ENABLE_UDP udp_init(); -#endif } -#if !CRAMPED PUBLIC void panic0(file, line) char *file; int line; @@ -229,19 +307,13 @@ PUBLIC void inet_panic() { printf("\ninet stacktrace: "); stacktrace(); - panic("INET","aborted due to a panic",NO_NUM); -} - -#else /* CRAMPED */ - -PUBLIC void inet_panic(file, line) -char *file; -int line; -{ - printf("panic at %s, %d\n", file, line); - panic("INET","aborted due to a panic",NO_NUM); -} +#ifdef __minix_vmd + sys_abort(RBT_PANIC); +#else /* Minix 3 */ + (panic)("INET","aborted due to a panic",NO_NUM); #endif + for(;;); +} #if !NDEBUG PUBLIC void bad_assertion(file, line, what) @@ -251,7 +323,7 @@ char *what; { panic0(file, line); printf("assertion \"%s\" failed", what); - inet_panic(); + panic(); } @@ -264,10 +336,10 @@ int rhs; { panic0(file, line); printf("compare (%d) %s (%d) failed", lhs, what, rhs); - inet_panic(); + panic(); } #endif /* !NDEBUG */ /* - * $PchId: inet.c,v 1.12 1996/12/17 07:58:19 philip Exp $ + * $PchId: inet.c,v 1.23 2005/06/28 14:27:22 philip Exp $ */ diff --git a/servers/inet/inet.h b/servers/inet/inet.h index 3e5840fea..0b1ace85e 100644 --- a/servers/inet/inet.h +++ b/servers/inet/inet.h @@ -11,24 +11,35 @@ Copyright 1995 Philip Homburg #define _SYSTEM 1 /* get OK and negative error codes */ -#include - -#define CRAMPED (_EM_WSIZE==2) /* 64K code and data is quite cramped. */ -#define ZERO 0 /* Used to comment out initialization code that does nothing. */ - #include -#include #include #include #include #include +#ifdef __minix_vmd + +#include +#include +#include + +#else /* Assume at least Minix 3.x */ + +#include +#include #include #include +#include + +#define _NORETURN /* Should be non empty for GCC */ + +typedef int ioreq_t; + +#endif + #include #include #include -#include #include #include #include @@ -43,11 +54,14 @@ Copyright 1995 Philip Homburg #include #include #include +#include #include #include #include #include #include + +#include #include #include "const.h" @@ -58,25 +72,14 @@ Copyright 1995 Philip Homburg #define PRIVATE static #define FORWARD static -typedef int ioreq_t; - #define THIS_FILE static char *this_file= __FILE__; -#if CRAMPED - -/* Minimum panic info. */ -#define ip_panic(print_list) inet_panic(this_file, __LINE__) -_PROTOTYPE( void inet_panic, (char *file, int line) ); - -#else /* !CRAMPED */ - -/* Maximum panic info. */ -#define ip_panic(print_list) \ - (panic0(this_file, __LINE__), printf print_list, inet_panic()) _PROTOTYPE( void panic0, (char *file, int line) ); -_PROTOTYPE( void inet_panic, (void) ); +_PROTOTYPE( void inet_panic, (void) ) _NORETURN; -#endif /* !CRAMPED */ +#define ip_panic(print_list) \ + (panic0(this_file, __LINE__), printf print_list, panic()) +#define panic() inet_panic() #if DEBUG #define ip_warning(print_list) \ @@ -86,6 +89,9 @@ _PROTOTYPE( void inet_panic, (void) ); printf("\ninet stacktrace: "), \ stacktrace() \ ) +#else +#define ip_warning(print_list) ((void) 0) +#endif #define DBLOCK(level, code) \ do { if ((level) & DEBUG) { where(); code; } } while(0) @@ -93,21 +99,19 @@ _PROTOTYPE( void inet_panic, (void) ); do { if (((level) & DEBUG) && (condition)) \ { where(); code; } } while(0) -#else /* !DEBUG */ -#define ip_warning(print_list) 0 -#define DBLOCK(level, code) 0 -#define DIFBLOCK(level, condition, code) 0 -#endif +#if _ANSI +#define ARGS(x) x +#else /* _ANSI */ +#define ARGS(x) () +#endif /* _ANSI */ -#define ARGS(x) _ARGS(x) - -extern char version[]; extern int this_proc; +extern char version[]; void stacktrace ARGS(( void )); #endif /* INET__INET_H */ /* - * $PchId: inet.h,v 1.8 1996/05/07 21:05:04 philip Exp $ + * $PchId: inet.h,v 1.16 2005/06/28 14:27:54 philip Exp $ */ diff --git a/servers/inet/inet_config.c b/servers/inet/inet_config.c index c2554f341..735232e79 100644 --- a/servers/inet/inet_config.c +++ b/servers/inet/inet_config.c @@ -9,42 +9,41 @@ Modified: Apr 07, 2001 by Kees J. Bot Copyright 1995 Philip Homburg */ -#define _MINIX 1 +#define _MINIX_SOURCE 1 +#define _POSIX_SOURCE 1 #include +#include #include #include #include #include #include -#include #include #include -#include -#include #include "inet_config.h" -#define CRAMPED (_EM_WSIZE==2) /* 64K code and data is quite cramped. */ -#if CRAMPED -#endif - struct eth_conf eth_conf[IP_PORT_MAX]; struct psip_conf psip_conf[IP_PORT_MAX]; struct ip_conf ip_conf[IP_PORT_MAX]; +struct tcp_conf tcp_conf[IP_PORT_MAX]; +struct udp_conf udp_conf[IP_PORT_MAX]; dev_t ip_dev; int eth_conf_nr; -#if ENABLE_PSIP int psip_conf_nr; -#endif int ip_conf_nr; +int tcp_conf_nr; +int udp_conf_nr; + +int ip_forward_directed_bcast= 0; /* Default is off */ static u8_t iftype[IP_PORT_MAX]; /* Interface in use as? */ static int ifdefault= -1; /* Default network interface. */ static void fatal(char *label) { - printf("init: %s: Error %d\n", label, errno); + printf("init: %s: %s\n", label, strerror(errno)); exit(1); } @@ -206,7 +205,7 @@ static unsigned number(char *str, unsigned max) void read_conf(void) { - int i, j, ifno, type, port; + int i, j, ifno, type, port, enable; struct eth_conf *ecp; struct psip_conf *pcp; struct ip_conf *icp; @@ -226,13 +225,25 @@ void read_conf(void) type= NETTYPE_ETH; port= eth_conf_nr; token(1); - ecp->ec_task= alloc(strlen(word)+1); - strcpy(ecp->ec_task, word); - token(1); - ecp->ec_port= number(word, IP_PORT_MAX-1); + if (strcmp(word, "vlan") == 0) { + token(1); + ecp->ec_vlan= number(word, (1<<12)-1); + token(1); + if (strncmp(word, "eth", 3) != 0) { + printf( + "inet: VLAN eth%d can't be built on %s\n", + ifno, word); + exit(1); + } + ecp->ec_port= number(word+3, IP_PORT_MAX-1); + } else { + ecp->ec_task= alloc(strlen(word)+1); + strcpy(ecp->ec_task, word); + token(1); + ecp->ec_port= number(word, IP_PORT_MAX-1); + } ecp++; eth_conf_nr++; -#if ENABLE_PSIP } else if (strncmp(word, "psip", 4) == 0) { pcp->pc_ifno= ifno= number(word+4, IP_PORT_MAX-1); @@ -240,7 +251,6 @@ void read_conf(void) port= psip_conf_nr; pcp++; psip_conf_nr++; -#endif } else { printf("inet: Unknown device '%s'\n", word); error(); @@ -249,27 +259,58 @@ void read_conf(void) icp->ic_ifno= ifno; icp->ic_devtype= type; icp->ic_port= port; + tcp_conf[tcp_conf_nr].tc_port= ip_conf_nr; + udp_conf[udp_conf_nr].uc_port= ip_conf_nr; + + enable= 7; /* 1 = IP, 2 = TCP, 4 = UDP */ token(0); if (word[0] == '{') { token(0); - if (strcmp(word, "default") == 0) { - if (ifdefault != -1) { - printf( - "inet: ip%d and ip%d can't both be default\n", - ifdefault, ifno); - error(); + while (word[0] != '}') { + if (strcmp(word, "default") == 0) { + if (ifdefault != -1) { + printf( + "inet: ip%d and ip%d can't both be default\n", + ifdefault, ifno); + error(); + } + ifdefault= ifno; + token(0); + } else + if (strcmp(word, "no") == 0) { + token(1); + if (strcmp(word, "ip") == 0) { + enable= 0; + } else + if (strcmp(word, "tcp") == 0) { + enable &= ~2; + } else + if (strcmp(word, "udp") == 0) { + enable &= ~4; + } else { + printf( + "inet: Can't do 'no %s'\n", + word); + exit(1); + } + token(0); + } else { + printf("inet: Unknown option '%s'\n", + word); + exit(1); } - ifdefault= ifno; - token(0); + if (word[0] == ';') token(0); + else + if (word[0] != '}') error(); } - if (word[0] == ';') token(0); - if (word[0] != '}') error(); token(0); } if (word[0] != ';' && word[0] != 0) error(); - icp++; - ip_conf_nr++; + + if (enable & 1) icp++, ip_conf_nr++; + if (enable & 2) tcp_conf_nr++; + if (enable & 4) udp_conf_nr++; } if (ifdefault == -1) { @@ -277,6 +318,27 @@ void read_conf(void) exit(1); } + /* Translate VLAN network references to port numbers. */ + for (i= 0; i < eth_conf_nr; i++) { + ecp= ð_conf[i]; + if (eth_is_vlan(ecp)) { + for (j= 0; j < eth_conf_nr; j++) { + if (eth_conf[j].ec_ifno == ecp->ec_port + && !eth_is_vlan(ð_conf[j]) + ) { + ecp->ec_port= j; + break; + } + } + if (j == eth_conf_nr) { + printf( + "inet: VLAN eth%d can't be built on eth%d\n", + ecp->ec_ifno, ecp->ec_port); + exit(1); + } + } + } + /* Set umask 0 so we can creat mode 666 devices. */ (void) umask(0); @@ -300,5 +362,5 @@ void *alloc(size_t size) } /* - * $PchId: inet_config.c,v 1.6 1998/10/23 20:15:27 philip Exp $ + * $PchId: inet_config.c,v 1.10 2003/08/21 09:26:02 philip Exp $ */ diff --git a/servers/inet/inet_config.h b/servers/inet/inet_config.h index 3f0ba6e7f..b4d6fce48 100644 --- a/servers/inet/inet_config.h +++ b/servers/inet/inet_config.h @@ -12,28 +12,26 @@ Copyright 1995 Philip Homburg #ifndef INET__INET_CONFIG_H #define INET__INET_CONFIG_H -#define ENABLE_ARP 1 -#define ENABLE_IP 1 -#define ENABLE_PSIP 1 -#define ENABLE_TCP 1 -#define ENABLE_UDP 1 - /* Inet configuration file. */ #define PATH_INET_CONF "/etc/inet.conf" -#define IP_PORT_MAX (1*sizeof(char*)) /* Up to this many network devices */ +#define IP_PORT_MAX 32 /* Up to this many network devices */ extern int eth_conf_nr; /* Number of ethernets */ extern int psip_conf_nr; /* Number of Pseudo IP networks */ -extern int ip_conf_nr; /* Number of configured TCP/IP layers */ +extern int ip_conf_nr; /* Number of configured IP layers */ +extern int tcp_conf_nr; /* Number of configured TCP layers */ +extern int udp_conf_nr; /* Number of configured UDP layers */ extern dev_t ip_dev; /* Device number of /dev/ip */ struct eth_conf { - char *ec_task; /* Kernel ethernet task name */ - u8_t ec_port; /* Task port */ + char *ec_task; /* Kernel ethernet task name if nonnull */ + u8_t ec_port; /* Task port (!vlan) or Ethernet port (vlan) */ u8_t ec_ifno; /* Interface number of /dev/eth* */ + u16_t ec_vlan; /* VLAN number of this net if task == NULL */ }; +#define eth_is_vlan(ecp) ((ecp)->ec_task == NULL) struct psip_conf { @@ -47,6 +45,16 @@ struct ip_conf u8_t ic_ifno; /* Interface number of /dev/ip*, tcp*, udp* */ }; +struct tcp_conf +{ + u8_t tc_port; /* IP port number */ +}; + +struct udp_conf +{ + u8_t uc_port; /* IP port number */ +}; + /* Types of networks. */ #define NETTYPE_ETH 1 #define NETTYPE_PSIP 2 @@ -64,12 +72,17 @@ struct ip_conf extern struct eth_conf eth_conf[IP_PORT_MAX]; extern struct psip_conf psip_conf[IP_PORT_MAX]; extern struct ip_conf ip_conf[IP_PORT_MAX]; +extern struct tcp_conf tcp_conf[IP_PORT_MAX]; +extern struct udp_conf udp_conf[IP_PORT_MAX]; void read_conf(void); extern char *sbrk(int); void *alloc(size_t size); +/* Options */ +extern int ip_forward_directed_bcast; + #endif /* INET__INET_CONFIG_H */ /* - * $PchId: inet_config.h,v 1.6 1998/10/23 20:14:28 philip Exp $ + * $PchId: inet_config.h,v 1.10 2003/08/21 09:24:33 philip Exp $ */ diff --git a/servers/inet/minix3/queryparam.c b/servers/inet/minix3/queryparam.c new file mode 100644 index 000000000..d8b37c163 --- /dev/null +++ b/servers/inet/minix3/queryparam.c @@ -0,0 +1,151 @@ +/* queryparam() - allow program parameters to be queried + * Author: Kees J. Bot + * 21 Apr 1994 + */ +#define nil 0 +#include +#include +#include +#include +#include + +#if EXAMPLE +struct stat st[2]; + +struct export_param_list ex_st_list[]= { + QP_VARIABLE(st), + QP_ARRAY(st), + QP_FIELD(st_dev, struct stat), + QP_FIELD(st_ino, struct stat), + ... + QP_END() +}; + +struct buf { block_t b_blocknr; ... } *buf; +size_t nr_bufs; + +struct export_param_list ex_buf_list[]= + QP_VECTOR(buf, buf, nr_bufs), + QP_FIELD(b_blocknr), + ... + QP_END() +}; + +struct export_params ex_st= { ex_st_list, 0 }; +struct export_params ex_buf= { ex_buf_list, 0 }; +#endif + +#define between(a, c, z) ((unsigned) ((c) - (a)) <= (unsigned) ((z) - (a))) + +static int isvar(int c) +{ + return between('a', c, 'z') || between('A', c, 'Z') + || between('0', c, '9') || c == '_'; +} + +static struct export_params *params; + +void qp_export(struct export_params *ex_params) +{ + /* Add a set of exported parameters. */ + + if (ex_params->next == nil) { + ex_params->next= params; + params= ex_params; + } +} + +int queryparam(int qgetc(void), void **poffset, size_t *psize) +{ + char *prefix; + struct export_params *ep; + struct export_param_list *epl; + size_t offset= 0; + size_t size= -1; + size_t n; + static size_t retval; + int c, firstc; + + firstc= c= (*qgetc)(); + if (c == '&' || c == '$') c= (*qgetc)(); + if (!isvar(c)) goto fail; + + if ((ep= params) == nil) goto fail; + epl= ep->list; + + while (c != 0 && c != ',') { + prefix= "x"; + n= 0; + + for (;;) { + while (epl->name == nil) { + if ((ep= ep->next) == nil) goto fail; + epl= ep->list; + } + if (strncmp(prefix, epl->name, n) == 0) { + prefix= epl->name; + while (prefix[n] != 0 && c == prefix[n]) { + n++; + c= (*qgetc)(); + } + } + if (prefix[n] == 0 && (!isvar(c) || prefix[0] == '[')) { + /* Got a match. */ + break; + } + epl++; + } + + if (prefix[0] == '[') { + /* Array reference. */ + size_t idx= 0, cnt= 1, max= size / epl->size; + + while (between('0', c, '9')) { + idx= idx * 10 + (c - '0'); + if (idx > max) goto fail; + c= (*qgetc)(); + } + if (c == ':') { + cnt= 0; + while (between('0', (c= (*qgetc)()), '9')) { + cnt= cnt * 10 + (c - '0'); + } + } + if (c != ']') goto fail; + if (idx + cnt > max) cnt= max - idx; + offset+= idx * epl->size; + size= cnt * epl->size; + c= (*qgetc)(); + } else + if (epl->size == -1) { + /* Vector. */ + offset= (size_t) * (void **) epl->offset; + size= (* (size_t *) epl[1].offset) * epl[1].size; + } else { + /* Variable or struct field. */ + offset+= (size_t) epl->offset; + if ((size_t) epl->offset > size) goto fail; + size-= (size_t) epl->offset; + if (size < epl->size) goto fail; + size= epl->size; + } + } + if (firstc == '&' || firstc == '$') { + retval= firstc == '&' ? offset : size; + offset= (size_t) &retval; + size= sizeof(retval); + } + if (c != 0 && c != ',') goto fail; + *poffset= (void *) offset; + *psize= size; + return c != 0; +fail: + while (c != 0 && c != ',') c= (*qgetc)(); + *poffset= nil; + *psize= 0; + return c != 0; +} + +/* + * $PchId: queryparam.c,v 1.1 2005/06/28 14:30:56 philip Exp $ + */ diff --git a/servers/inet/minix3/queryparam.h b/servers/inet/minix3/queryparam.h new file mode 100644 index 000000000..7415fab43 --- /dev/null +++ b/servers/inet/minix3/queryparam.h @@ -0,0 +1,45 @@ +/* queryparam.h - query program parameters Author: Kees J. Bot + * 22 Apr 1994 + */ +#ifndef _MINIX__QUERYPARAM_H +#define _MINIX__QUERYPARAM_H + +#include + +typedef size_t _mnx_size_t; + +struct export_param_list { + char *name; /* "variable", "[", ".field", or NULL. */ + void *offset; /* Address of a variable or field offset. */ + size_t size; /* Size of the resulting object. */ +}; + +struct export_params { + struct export_param_list *list; /* List of exported parameters. */ + struct export_params *next; /* Link several sets of parameters. */ +}; + +#ifdef __STDC__ +#define qp_stringize(var) #var +#define qp_dotstringize(var) "." #var +#else +#define qp_stringize(var) "var" +#define qp_dotstringize(var) ".var" +#endif +#define QP_VARIABLE(var) { qp_stringize(var), &(var), sizeof(var) } +#define QP_ARRAY(var) { "[", 0, sizeof((var)[0]) } +#define QP_VECTOR(var,ptr,len) { qp_stringize(var), &(ptr), -1 },\ + { "[", &(len), sizeof(*(ptr)) } +#define QP_FIELD(field, type) { qp_dotstringize(field), \ + (void *)offsetof(type, field), \ + sizeof(((type *)0)->field) } +#define QP_END() { 0, 0, 0 } + +void qp_export _ARGS((struct export_params *_ex_params)); +int queryparam _ARGS((int (*_qgetc) _ARGS((void)), void **_paddress, + _mnx_size_t *_psize)); +_mnx_size_t paramvalue _ARGS((char **_value, void *_address, + _mnx_size_t _size)); +#endif /* _MINIX__QUERYPARAM_H */ + +/* $PchId: queryparam.h,v 1.1 2005/06/28 14:31:26 philip Exp $ */ diff --git a/servers/inet/mnx_eth.c b/servers/inet/mnx_eth.c index f8bd24a43..bdc7d54c7 100644 --- a/servers/inet/mnx_eth.c +++ b/servers/inet/mnx_eth.c @@ -18,12 +18,10 @@ Copyright 1995 Philip Homburg #include "generic/eth_int.h" #include "generic/sr.h" -#include -#define _MINIX -#include - THIS_FILE +static int recv_debug= 0; + FORWARD _PROTOTYPE( void setup_read, (eth_port_t *eth_port) ); FORWARD _PROTOTYPE( void read_int, (eth_port_t *eth_port, int count) ); FORWARD _PROTOTYPE( void write_int, (eth_port_t *eth_port) ); @@ -33,26 +31,30 @@ FORWARD _PROTOTYPE( eth_port_t *find_port, (message *m) ); PUBLIC void osdep_eth_init() { - int i, r, tasknr; + int i, r, tasknr, rport; struct eth_conf *ecp; - eth_port_t *eth_port; - message mess, repl_mess; + eth_port_t *eth_port, *rep; + message mess; - for (i= 0, eth_port= eth_port_table, ecp= eth_conf; - iec_task, strlen(ecp->ec_task)); -#endif + if (eth_is_vlan(ecp)) + continue; +#ifdef __minix_vmd + r= sys_findproc(ecp->ec_task, &tasknr, 0); +#else /* Minix 3 */ r = findproc(ecp->ec_task, &tasknr); +#endif if (r != OK) { - ip_panic(( "unable to find task %s: %d\n", - ecp->ec_task, r )); + printf("eth%d: unable to find task %s: %d\n", + i, ecp->ec_task, r); + continue; } - - eth_port->etp_osdep.etp_port= ecp->ec_port; + eth_port->etp_osdep.etp_port= ecp->ec_port; eth_port->etp_osdep.etp_task= tasknr; ev_init(ð_port->etp_osdep.etp_recvev); @@ -64,11 +66,9 @@ PUBLIC void osdep_eth_init() r= send(eth_port->etp_osdep.etp_task, &mess); if (r<0) { -#if !CRAMPED printf( "osdep_eth_init: unable to send to ethernet task, error= %d\n", r); -#endif continue; } @@ -77,29 +77,79 @@ PUBLIC void osdep_eth_init() if (mess.m3_i1 == ENXIO) { -#if !CRAMPED printf( "osdep_eth_init: no ethernet device at task=%d,port=%d\n", - eth_port->etp_osdep.etp_task, + eth_port->etp_osdep.etp_task, eth_port->etp_osdep.etp_port); -#endif continue; } - if (mess.m3_i1 != eth_port->etp_osdep.etp_port) - ip_panic(("osdep_eth_init: DL_INIT error or wrong port: %d\n", + if (mess.m3_i1 < 0) + ip_panic(("osdep_eth_init: DL_INIT returned error %d\n", mess.m3_i1)); + + if (mess.m3_i1 != eth_port->etp_osdep.etp_port) + { + ip_panic(( + "osdep_eth_init: got reply for wrong port (got %d, expected %d)\n", + mess.m3_i1, eth_port->etp_osdep.etp_port)); + } eth_port->etp_ethaddr= *(ether_addr_t *)mess.m3_ca1; sr_add_minor(if2minor(ecp->ec_ifno, ETH_DEV_OFF), i, eth_open, eth_close, eth_read, - eth_write, eth_ioctl, eth_cancel); + eth_write, eth_ioctl, eth_cancel, eth_select); eth_port->etp_flags |= EPF_ENABLED; + eth_port->etp_vlan= 0; + eth_port->etp_vlan_port= NULL; eth_port->etp_wr_pack= 0; eth_port->etp_rd_pack= 0; setup_read (eth_port); - eth_port++; + } + + /* And now come the VLANs */ + for (i= 0, ecp= eth_conf, eth_port= eth_port_table; + ietp_osdep.etp_port= ecp->ec_port; + eth_port->etp_osdep.etp_task= ANY; + ev_init(ð_port->etp_osdep.etp_recvev); + + rport= eth_port->etp_osdep.etp_port; + assert(rport >= 0 && rport < eth_conf_nr); + rep= ð_port_table[rport]; + if (!rep->etp_flags & EPF_ENABLED) + { + printf( + "eth%d: underlying ethernet device %d not enabled", + i, rport); + continue; + } + if (rep->etp_vlan != 0) + { + printf( + "eth%d: underlying ethernet device %d is a VLAN", + i, rport); + continue; + } + + eth_port->etp_ethaddr= rep->etp_ethaddr; + + sr_add_minor(if2minor(ecp->ec_ifno, ETH_DEV_OFF), + i, eth_open, eth_close, eth_read, + eth_write, eth_ioctl, eth_cancel, eth_select); + + eth_port->etp_flags |= EPF_ENABLED; + eth_port->etp_vlan= ecp->ec_vlan; + eth_port->etp_vlan_port= rep; + assert(eth_port->etp_vlan != 0); + eth_port->etp_wr_pack= 0; + eth_port->etp_rd_pack= 0; + eth_reg_vlan(rep, eth_port); } } @@ -116,6 +166,9 @@ acc_t *pack; int multicast, r; ev_arg_t ev_arg; + assert(!no_ethWritePort); + assert(!eth_port->etp_vlan); + assert(eth_port->etp_wr_pack == NULL); eth_port->etp_wr_pack= pack; @@ -173,7 +226,8 @@ acc_t *pack; ip_panic(("unable to receive")); loc_port= eth_port; - if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT) + if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT || + loc_port->etp_osdep.etp_task != block_msg.m_source) { loc_port= find_port(&block_msg); } @@ -187,6 +241,12 @@ acc_t *pack; } if (block_msg.DL_STAT & DL_PACK_RECV) { + if (recv_debug) + { + printf( + "eth_write_port(block_msg): eth%d got DL_PACK_RECV\n", + loc_port-eth_port_table); + } loc_port->etp_osdep.etp_recvrepl= block_msg; ev_arg.ev_ptr= loc_port; ev_enqueue(&loc_port->etp_osdep.etp_recvev, @@ -202,12 +262,18 @@ acc_t *pack; ip_panic(("unable to receive")); assert(mess1.m_type == DL_TASK_REPLY && - mess1.DL_PORT == mess1.DL_PORT && + mess1.DL_PORT == eth_port->etp_osdep.etp_port && mess1.DL_PROC == this_proc); assert((mess1.DL_STAT >> 16) == OK); if (mess1.DL_STAT & DL_PACK_RECV) { + if (recv_debug) + { + printf( + "eth_write_port(mess1): eth%d got DL_PACK_RECV\n", + mess1.DL_PORT); + } eth_port->etp_osdep.etp_recvrepl= mess1; ev_arg.ev_ptr= eth_port; ev_enqueue(ð_port->etp_osdep.etp_recvev, eth_recvev, @@ -220,7 +286,7 @@ acc_t *pack; } /* If the port is in promiscuous mode or the packet is - * broadcasted/multicasted, enqueue the reply packet. + * broad- or multicast, enqueue the reply packet. */ eth_dst_ptr= (u8_t *)ptr2acc_data(pack); multicast= (*eth_dst_ptr & 1); /* low order bit indicates multicast */ @@ -268,18 +334,25 @@ message *m; if (stat & DL_PACK_SEND) write_int(loc_port); if (stat & DL_PACK_RECV) + { + if (recv_debug) + { + printf("eth_rec: eth%d got DL_PACK_RECV\n", + m->DL_PORT); + } read_int(loc_port, m->DL_COUNT); + } } -#ifndef notdef PUBLIC int eth_get_stat(eth_port, eth_stat) eth_port_t *eth_port; eth_stat_t *eth_stat; { - acc_t *acc; int result; message mess, mlocked; + assert(!eth_port->etp_vlan); + mess.m_type= DL_GETSTAT; mess.DL_PORT= eth_port->etp_osdep.etp_port; mess.DL_PROC= this_proc; @@ -314,9 +387,7 @@ assert (result == 0); } return OK; } -#endif -#ifndef notdef PUBLIC void eth_set_rec_conf (eth_port, flags) eth_port_t *eth_port; u32_t flags; @@ -325,6 +396,8 @@ u32_t flags; unsigned dl_flags; message mess, repl_mess; + assert(!eth_port->etp_vlan); + dl_flags= DL_NOMODE; if (flags & NWEO_EN_BROAD) dl_flags |= DL_BROAD_REQ; @@ -341,10 +414,10 @@ u32_t flags; do { result= send (eth_port->etp_osdep.etp_task, &mess); - if (result == ELOCKED) - /* Ethernet task is sending to this task, I hope */ + if (result == ELOCKED) /* etp_task is sending to this task, + I hope */ { - if (receive (eth_port->etp_osdep.etp_task, + if (receive (eth_port->etp_osdep.etp_task, &repl_mess)< 0) { ip_panic(("unable to receive")); @@ -368,7 +441,6 @@ u32_t flags; } eth_port->etp_osdep.etp_recvconf= flags; } -#endif PRIVATE void write_int(eth_port) eth_port_t *eth_port; @@ -383,7 +455,13 @@ eth_port_t *eth_port; eth_dst_ptr= (u8_t *)ptr2acc_data(pack); multicast= (*eth_dst_ptr & 1); /* low order bit indicates multicast */ if (multicast || (eth_port->etp_osdep.etp_recvconf & NWEO_EN_PROMISC)) + { + assert(!no_ethWritePort); + no_ethWritePort= 1; eth_arrive(eth_port, pack, bf_bufsize(pack)); + assert(no_ethWritePort); + no_ethWritePort= 0; + } else bf_afree(pack); @@ -402,7 +480,11 @@ int count; cut_pack= bf_cut(pack, 0, count); bf_afree(pack); + assert(!no_ethWritePort); + no_ethWritePort= 1; eth_arrive(eth_port, cut_pack, count); + assert(no_ethWritePort); + no_ethWritePort= 0; eth_port->etp_flags &= ~(EPF_READ_IP|EPF_READ_SP); setup_read(eth_port); @@ -418,6 +500,7 @@ eth_port_t *eth_port; ev_arg_t ev_arg; int i, r; + assert(!eth_port->etp_vlan); assert(!(eth_port->etp_flags & (EPF_READ_IP|EPF_READ_SP))); do @@ -425,7 +508,7 @@ eth_port_t *eth_port; assert (!eth_port->etp_rd_pack); iovec= eth_port->etp_osdep.etp_rd_iovec; - pack= bf_memreq (ETH_MAX_PACK_SIZE); + pack= bf_memreq (ETH_MAX_PACK_SIZE_TAGGED); for (i=0, pack_ptr= pack; iacc_next) @@ -443,6 +526,11 @@ eth_port_t *eth_port; for (;;) { + if (recv_debug) + { + printf("eth%d: sending DL_READV\n", + mess1.DL_PORT); + } r= send (eth_port->etp_osdep.etp_task, &mess1); if (r != ELOCKED) break; @@ -453,7 +541,9 @@ eth_port_t *eth_port; ip_panic(("unable to receive")); loc_port= eth_port; - if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT) + if (loc_port->etp_osdep.etp_port != block_msg.DL_PORT || + loc_port->etp_osdep.etp_task != + block_msg.m_source) { loc_port= find_port(&block_msg); } @@ -468,6 +558,12 @@ eth_port_t *eth_port; } if (block_msg.DL_STAT & DL_PACK_RECV) { + if (recv_debug) + { + printf( + "setup_read(block_msg): eth%d got DL_PACK_RECV\n", + block_msg.DL_PORT); + } assert(loc_port != eth_port); loc_port->etp_osdep.etp_recvrepl= block_msg; ev_arg.ev_ptr= loc_port; @@ -490,11 +586,21 @@ eth_port_t *eth_port; if (mess1.DL_STAT & DL_PACK_RECV) { + if (recv_debug) + { + printf( + "setup_read(mess1): eth%d: got DL_PACK_RECV\n", + mess1.DL_PORT); + } /* packet received */ pack_ptr= bf_cut(pack, 0, mess1.DL_COUNT); bf_afree(pack); + assert(!no_ethWritePort); + no_ethWritePort= 1; eth_arrive(eth_port, pack_ptr, mess1.DL_COUNT); + assert(no_ethWritePort); + no_ethWritePort= 0; } else { @@ -525,11 +631,17 @@ ev_arg_t ev_arg; m_ptr= ð_port->etp_osdep.etp_recvrepl; assert(m_ptr->m_type == DL_TASK_REPLY); - assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT); + assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT && + eth_port->etp_osdep.etp_task == m_ptr->m_source); assert(m_ptr->DL_STAT & DL_PACK_RECV); m_ptr->DL_STAT &= ~DL_PACK_RECV; + if (recv_debug) + { + printf("eth_recvev: eth%d got DL_PACK_RECV\n", m_ptr->DL_PORT); + } + read_int(eth_port, m_ptr->DL_COUNT); } @@ -545,7 +657,8 @@ ev_arg_t ev_arg; m_ptr= ð_port->etp_osdep.etp_sendrepl; assert (m_ptr->m_type == DL_TASK_REPLY); - assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT); + assert(eth_port->etp_osdep.etp_port == m_ptr->DL_PORT && + eth_port->etp_osdep.etp_task == m_ptr->m_source); assert(m_ptr->DL_STAT & DL_PACK_SEND); m_ptr->DL_STAT &= ~DL_PACK_SEND; @@ -562,7 +675,8 @@ message *m; for (i=0, loc_port= eth_port_table; ietp_osdep.etp_port == m->DL_PORT) + if (loc_port->etp_osdep.etp_port == m->DL_PORT && + loc_port->etp_osdep.etp_task == m->m_source) break; } assert (i +*/ + +#include "inet.h" + +#include +#ifdef __minix_vmd +#include +#else /* Minix 3 */ +#include +#endif + +#include "generic/buf.h" +#include "generic/clock.h" +#include "generic/event.h" +#include "generic/type.h" +#include "generic/sr.h" + +#include "generic/tcp_int.h" +#include "generic/udp_int.h" +#include "mq.h" +#include "qp.h" +#include "sr_int.h" + +FORWARD int get_userdata ARGS(( int proc, vir_bytes vaddr, vir_bytes vlen, + void *buffer )); +FORWARD int put_userdata ARGS(( int proc, vir_bytes vaddr, vir_bytes vlen, + void *buffer )); +FORWARD int iqp_getc ARGS(( void )); +FORWARD void iqp_putc ARGS(( int c )); + +PRIVATE struct export_param_list inet_ex_list[]= +{ + QP_VARIABLE(sr_fd_table), + QP_VARIABLE(ip_dev), + QP_VARIABLE(tcp_fd_table), + QP_VARIABLE(tcp_conn_table), + QP_VARIABLE(tcp_cancel_f), + QP_VECTOR(udp_port_table, udp_port_table, ip_conf_nr), + QP_VARIABLE(udp_fd_table), + QP_END() +}; + +PRIVATE struct export_params inet_ex_params= { inet_ex_list, NULL }; + +PRIVATE struct queryvars { + int proc; + struct svrqueryparam qpar; + char parbuf[256], valbuf[256]; + char *param, *value; + int r; +} *qvars; + +PUBLIC void qp_init() +{ + qp_export(&inet_ex_params); +} + +PUBLIC int qp_query(proc, argp) +int proc; +vir_bytes argp; +{ + /* Return values, sizes, or addresses of variables in MM space. */ + + struct queryvars qv; + void *addr; + size_t n, size; + int byte; + int more; + static char hex[]= "0123456789ABCDEF"; + + qv.r= get_userdata(proc, argp, sizeof(qv.qpar), &qv.qpar); + + /* Export these to mq_getc() and mq_putc(). */ + qvars= &qv; + qv.proc= proc; + qv.param= qv.parbuf + sizeof(qv.parbuf); + qv.value= qv.valbuf; + + do { + more= queryparam(iqp_getc, &addr, &size); + for (n= 0; n < size; n++) { + byte= ((u8_t *) addr)[n]; + iqp_putc(hex[byte >> 4]); + iqp_putc(hex[byte & 0x0F]); + } + iqp_putc(more ? ',' : 0); + } while (more); + return qv.r; +} + + +PRIVATE int iqp_getc() +{ + /* Return one character of the names to search for. */ + struct queryvars *qv= qvars; + size_t n; + + if (qv->r != OK || qv->qpar.psize == 0) return 0; + if (qv->param == qv->parbuf + sizeof(qv->parbuf)) { + /* Need to fill the parameter buffer. */ + n= sizeof(qv->parbuf); + if (qv->qpar.psize < n) n= qv->qpar.psize; + qv->r= get_userdata(qv->proc, (vir_bytes) qv->qpar.param, n, + qv->parbuf); + if (qv->r != OK) return 0; + qv->qpar.param+= n; + qv->param= qv->parbuf; + } + qv->qpar.psize--; + return (u8_t) *qv->param++; +} + + +PRIVATE void iqp_putc(c) +int c; +{ + /* Send one character back to the user. */ + struct queryvars *qv= qvars; + size_t n; + + if (qv->r != OK || qv->qpar.vsize == 0) return; + *qv->value++= c; + qv->qpar.vsize--; + if (qv->value == qv->valbuf + sizeof(qv->valbuf) + || c == 0 || qv->qpar.vsize == 0) { + /* Copy the value buffer to user space. */ + n= qv->value - qv->valbuf; + qv->r= put_userdata(qv->proc, (vir_bytes) qv->qpar.value, n, + qv->valbuf); + qv->qpar.value+= n; + qv->value= qv->valbuf; + } +} + +PRIVATE int get_userdata(proc, vaddr, vlen, buffer) +int proc; +vir_bytes vaddr; +vir_bytes vlen; +void *buffer; +{ +#ifdef __minix_vmd + return sys_copy(proc, SEG_D, (phys_bytes)vaddr, this_proc, SEG_D, + (phys_bytes)buffer, (phys_bytes)vlen); +#else /* Minix 3 */ + return sys_vircopy(proc, D, vaddr, SELF, D, (vir_bytes)buffer, vlen); +#endif +} + + +PRIVATE int put_userdata(proc, vaddr, vlen, buffer) +int proc; +vir_bytes vaddr; +vir_bytes vlen; +void *buffer; +{ +#ifdef __minix_vmd + return sys_copy(this_proc, SEG_D, (phys_bytes)buffer, + proc, SEG_D, (phys_bytes)vaddr, (phys_bytes)vlen); +#else /* Minix 3 */ + return sys_vircopy(SELF, D, (vir_bytes)buffer, proc, D, vaddr, vlen); +#endif +} + + + +/* + * $PchId: qp.c,v 1.7 2005/06/28 14:25:25 philip Exp $ + */ diff --git a/servers/inet/qp.h b/servers/inet/qp.h new file mode 100644 index 000000000..f4e67165e --- /dev/null +++ b/servers/inet/qp.h @@ -0,0 +1,21 @@ +/* +inet/qp.h + +Handle queryparams requests + +Created: June 1995 by Philip Homburg + +Copyright 1995 Philip Homburg +*/ + +#ifndef INET__QP_H +#define INET__QP_H + +void qp_init ARGS(( void )); +int qp_query ARGS(( int proc, vir_bytes argp )); + +#endif /* INET__QP_H */ + +/* + * $PchId: qp.h,v 1.4 2005/01/29 18:08:06 philip Exp $ + */ diff --git a/servers/inet/sha2.c b/servers/inet/sha2.c new file mode 100644 index 000000000..1e14d9fe7 --- /dev/null +++ b/servers/inet/sha2.c @@ -0,0 +1,1095 @@ +/* $FreeBSD: src/sys/crypto/sha2/sha2.c,v 1.2.2.2 2002/03/05 08:36:47 ume Exp $ */ +/* $KAME: sha2.c,v 1.8 2001/11/08 01:07:52 itojun Exp $ */ + +/* + * sha2.c + * + * Version 1.0.0beta1 + * + * Written by Aaron D. Gifford + * + * Copyright 2000 Aaron D. Gifford. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + + +#include +/* #include */ +/* #include */ +/* #include */ +#include "sha2.h" + +/* + * ASSERT NOTE: + * Some sanity checking code is included using assert(). On my FreeBSD + * system, this additional code can be removed by compiling with NDEBUG + * defined. Check your own systems manpage on assert() to see how to + * compile WITHOUT the sanity checking code on your system. + * + * UNROLLED TRANSFORM LOOP NOTE: + * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform + * loop version for the hash transform rounds (defined using macros + * later in this file). Either define on the command line, for example: + * + * cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c + * + * or define below: + * + * #define SHA2_UNROLL_TRANSFORM + * + */ + +#if defined(__bsdi__) || defined(__FreeBSD__) +#define assert(x) +#endif + + +/*** SHA-256/384/512 Machine Architecture Definitions *****************/ +/* + * SHA2_BYTE_ORDER NOTE: + * + * Please make sure that your system defines SHA2_BYTE_ORDER. If your + * architecture is little-endian, make sure it also defines + * SHA2_LITTLE_ENDIAN and that the two (SHA2_BYTE_ORDER and SHA2_LITTLE_ENDIAN) are + * equivilent. + * + * If your system does not define the above, then you can do so by + * hand like this: + * + * #define SHA2_LITTLE_ENDIAN 1234 + * #define SHA2_BIG_ENDIAN 4321 + * + * And for little-endian machines, add: + * + * #define SHA2_BYTE_ORDER SHA2_LITTLE_ENDIAN + * + * Or for big-endian machines: + * + * #define SHA2_BYTE_ORDER SHA2_BIG_ENDIAN + * + * The FreeBSD machine this was written on defines BYTE_ORDER + * appropriately by including (which in turn includes + * where the appropriate definitions are actually + * made). + */ +#if !defined(SHA2_BYTE_ORDER) || (SHA2_BYTE_ORDER != SHA2_LITTLE_ENDIAN && SHA2_BYTE_ORDER != SHA2_BIG_ENDIAN) +#error Define SHA2_BYTE_ORDER to be equal to either SHA2_LITTLE_ENDIAN or SHA2_BIG_ENDIAN +#endif + +/* + * Define the followingsha2_* types to types of the correct length on + * the native archtecture. Most BSD systems and Linux define u_intXX_t + * types. Machines with very recent ANSI C headers, can use the + * uintXX_t definintions from inttypes.h by defining SHA2_USE_INTTYPES_H + * during compile or in the sha.h header file. + * + * Machines that support neither u_intXX_t nor inttypes.h's uintXX_t + * will need to define these three typedefs below (and the appropriate + * ones in sha.h too) by hand according to their system architecture. + * + * Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t + * types and pointing out recent ANSI C support for uintXX_t in inttypes.h. + */ +#if 0 /*def SHA2_USE_INTTYPES_H*/ + +typedef uint8_t sha2_byte; /* Exactly 1 byte */ +typedef uint32_t sha2_word32; /* Exactly 4 bytes */ +typedef uint64_t sha2_word64; /* Exactly 8 bytes */ + +#else /* SHA2_USE_INTTYPES_H */ + +typedef u_int8_t sha2_byte; /* Exactly 1 byte */ +typedef u_int32_t sha2_word32; /* Exactly 4 bytes */ +typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ + +#endif /* SHA2_USE_INTTYPES_H */ + + +/*** SHA-256/384/512 Various Length Definitions ***********************/ +/* NOTE: Most of these are in sha2.h */ +#define SHA256_SHORT_BLOCK_LENGTH (SHA256_BLOCK_LENGTH - 8) +#define SHA384_SHORT_BLOCK_LENGTH (SHA384_BLOCK_LENGTH - 16) +#define SHA512_SHORT_BLOCK_LENGTH (SHA512_BLOCK_LENGTH - 16) + + +/*** ENDIAN REVERSAL MACROS *******************************************/ +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN +#define REVERSE32(w,x) { \ + sha2_word32 tmp = (w); \ + tmp = (tmp >> 16) | (tmp << 16); \ + (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \ +} +#define REVERSE64(w,x) { \ + sha2_word64 tmp = (w); \ + tmp = (tmp >> 32) | (tmp << 32); \ + tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \ + ((tmp & 0x00ff00ff00ff00ffULL) << 8); \ + (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \ + ((tmp & 0x0000ffff0000ffffULL) << 16); \ +} +#if MINIX_64BIT +#undef REVERSE64 +#define REVERSE64(w,x) { \ + u32_t hi, lo; \ + REVERSE32(ex64hi((w)), lo); \ + REVERSE32(ex64lo((w)), hi); \ + (x) = make64(lo, hi); \ +} +#endif /* MINIX_64BIT */ +#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + +/* + * Macro for incrementally adding the unsigned 64-bit integer n to the + * unsigned 128-bit integer (represented using a two-element array of + * 64-bit words): + */ +#define ADDINC128(w,n) { \ + (w)[0] += (sha2_word64)(n); \ + if ((w)[0] < (n)) { \ + (w)[1]++; \ + } \ +} + +/*** THE SIX LOGICAL FUNCTIONS ****************************************/ +/* + * Bit shifting and rotation (used by the six SHA-XYZ logical functions: + * + * NOTE: The naming of R and S appears backwards here (R is a SHIFT and + * S is a ROTATION) because the SHA-256/384/512 description document + * (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this + * same "backwards" definition. + */ +/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */ +#define R(b,x) ((x) >> (b)) +/* 32-bit Rotate-right (used in SHA-256): */ +#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b)))) +/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */ +#define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b)))) + +/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */ +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +/* Four of six logical functions used in SHA-256: */ +#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x))) +#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x))) +#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x))) +#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x))) + +/* Four of six logical functions used in SHA-384 and SHA-512: */ +#define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x))) +#define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x))) +#define sigma0_512(x) (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7, (x))) +#define sigma1_512(x) (S64(19, (x)) ^ S64(61, (x)) ^ R( 6, (x))) + +/*** INTERNAL FUNCTION PROTOTYPES *************************************/ +/* NOTE: These should not be accessed directly from outside this + * library -- they are intended for private internal visibility/use + * only. + */ +void SHA512_Last(SHA512_CTX*); +void SHA256_Transform(SHA256_CTX*, const sha2_word32*); +void SHA512_Transform(SHA512_CTX*, const sha2_word64*); + + +/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/ +/* Hash constant words K for SHA-256: */ +const static sha2_word32 K256[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + +/* Initial hash value H for SHA-256: */ +const static sha2_word32 sha256_initial_hash_value[8] = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL +}; + +#if !NO_64BIT +/* Hash constant words K for SHA-384 and SHA-512: */ +const static sha2_word64 K512[80] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + +/* Initial hash value H for SHA-384 */ +const static sha2_word64 sha384_initial_hash_value[8] = { + 0xcbbb9d5dc1059ed8ULL, + 0x629a292a367cd507ULL, + 0x9159015a3070dd17ULL, + 0x152fecd8f70e5939ULL, + 0x67332667ffc00b31ULL, + 0x8eb44a8768581511ULL, + 0xdb0c2e0d64f98fa7ULL, + 0x47b5481dbefa4fa4ULL +}; + +/* Initial hash value H for SHA-512 */ +const static sha2_word64 sha512_initial_hash_value[8] = { + 0x6a09e667f3bcc908ULL, + 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, + 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, + 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, + 0x5be0cd19137e2179ULL +}; +#endif /* !NO_64BIT */ + +/* + * Constant used by SHA256/384/512_End() functions for converting the + * digest to a readable hexadecimal character string: + */ +static const char *sha2_hex_digits = "0123456789abcdef"; + + +/*** SHA-256: *********************************************************/ +void SHA256_Init(SHA256_CTX* context) { + if (context == (SHA256_CTX*)0) { + return; + } + bcopy(sha256_initial_hash_value, context->state, SHA256_DIGEST_LENGTH); + bzero(context->buffer, SHA256_BLOCK_LENGTH); +#if MINIX_64BIT + context->bitcount= cvu64(0); +#else /* !MINIX_64BIT */ + context->bitcount = 0; +#endif /* MINIX_64BIT */ +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-256 round macros: */ + +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + +#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \ + REVERSE32(*data++, W256[j]); \ + T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \ + K256[j] + W256[j]; \ + (d) += T1; \ + (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ + j++ + + +#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + +#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \ + T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \ + K256[j] + (W256[j] = *data++); \ + (d) += T1; \ + (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ + j++ + +#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + +#define ROUND256(a,b,c,d,e,f,g,h) \ + s0 = W256[(j+1)&0x0f]; \ + s0 = sigma0_256(s0); \ + s1 = W256[(j+14)&0x0f]; \ + s1 = sigma1_256(s1); \ + T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \ + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ + j++ + +void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) { + sha2_word32 a, b, c, d, e, f, g, h, s0, s1; + sha2_word32 T1, *W256; + int j; + + W256 = (sha2_word32*)context->buffer; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do { + /* Rounds 0 to 15 (unrolled): */ + ROUND256_0_TO_15(a,b,c,d,e,f,g,h); + ROUND256_0_TO_15(h,a,b,c,d,e,f,g); + ROUND256_0_TO_15(g,h,a,b,c,d,e,f); + ROUND256_0_TO_15(f,g,h,a,b,c,d,e); + ROUND256_0_TO_15(e,f,g,h,a,b,c,d); + ROUND256_0_TO_15(d,e,f,g,h,a,b,c); + ROUND256_0_TO_15(c,d,e,f,g,h,a,b); + ROUND256_0_TO_15(b,c,d,e,f,g,h,a); + } while (j < 16); + + /* Now for the remaining rounds to 64: */ + do { + ROUND256(a,b,c,d,e,f,g,h); + ROUND256(h,a,b,c,d,e,f,g); + ROUND256(g,h,a,b,c,d,e,f); + ROUND256(f,g,h,a,b,c,d,e); + ROUND256(e,f,g,h,a,b,c,d); + ROUND256(d,e,f,g,h,a,b,c); + ROUND256(c,d,e,f,g,h,a,b); + ROUND256(b,c,d,e,f,g,h,a); + } while (j < 64); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} + +#else /* SHA2_UNROLL_TRANSFORM */ + +void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) { + sha2_word32 a, b, c, d, e, f, g, h, s0, s1; + sha2_word32 T1, T2, *W256; + int j; + + W256 = (sha2_word32*)context->buffer; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do { +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + /* Copy data while converting to host byte order */ + REVERSE32(*data++,W256[j]); + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j]; +#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + /* Apply the SHA-256 compression function to update a..h with copy */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = *data++); +#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do { + /* Part of the message block expansion: */ + s0 = W256[(j+1)&0x0f]; + s0 = sigma0_256(s0); + s1 = W256[(j+14)&0x0f]; + s1 = sigma1_256(s1); + + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 64); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} + +#endif /* SHA2_UNROLL_TRANSFORM */ + +void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) { + unsigned int freespace, usedspace; + + if (len == 0) { + /* Calling with no data is valid - we do nothing */ + return; + } + + /* Sanity check: */ + assert(context != (SHA256_CTX*)0 && data != (sha2_byte*)0); + +#if MINIX_64BIT + usedspace= rem64u(context->bitcount, SHA256_BLOCK_LENGTH*8)/8; +#else /* !MINIX_64BIT */ + usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH; +#endif /* MINIX_64BIT */ + if (usedspace > 0) { + /* Calculate how much free space is available in the buffer */ + freespace = SHA256_BLOCK_LENGTH - usedspace; + + if (len >= freespace) { + /* Fill the buffer completely and process it */ + bcopy(data, &context->buffer[usedspace], freespace); +#if MINIX_64BIT + context->bitcount= add64u(context->bitcount, + freespace << 3); +#else /* !MINIX_64BIT */ + context->bitcount += freespace << 3; +#endif /* MINIX_64BIT */ + len -= freespace; + data += freespace; + SHA256_Transform(context, (sha2_word32*)context->buffer); + } else { + /* The buffer is not yet full */ + bcopy(data, &context->buffer[usedspace], len); +#if MINIX_64BIT + context->bitcount= add64u(context->bitcount, len << 3); +#else /* !MINIX_64BIT */ + context->bitcount += len << 3; +#endif /* MINIX_64BIT */ + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= SHA256_BLOCK_LENGTH) { + /* Process as many complete blocks as we can */ + SHA256_Transform(context, (const sha2_word32*)data); +#if MINIX_64BIT + context->bitcount= add64u(context->bitcount, + SHA256_BLOCK_LENGTH << 3); +#else /* !MINIX_64BIT */ + context->bitcount += SHA256_BLOCK_LENGTH << 3; +#endif /* MINIX_64BIT */ + len -= SHA256_BLOCK_LENGTH; + data += SHA256_BLOCK_LENGTH; + } + if (len > 0) { + /* There's left-overs, so save 'em */ + bcopy(data, context->buffer, len); +#if MINIX_64BIT + context->bitcount= add64u(context->bitcount, len << 3); +#else /* !MINIX_64BIT */ + context->bitcount += len << 3; +#endif /* MINIX_64BIT */ + } + /* Clean up: */ + usedspace = freespace = 0; +} + +void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) { + sha2_word32 *d = (sha2_word32*)digest; + unsigned int usedspace; + + /* Sanity check: */ + assert(context != (SHA256_CTX*)0); + + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != (sha2_byte*)0) { +#if MINIX_64BIT + usedspace= rem64u(context->bitcount, SHA256_BLOCK_LENGTH*8)/8; +#else /* !MINIX_64BIT */ + usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH; +#endif /* MINIX_64BIT */ +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + /* Convert FROM host byte order */ + REVERSE64(context->bitcount,context->bitcount); +#endif + if (usedspace > 0) { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) { + /* Set-up for the last transform: */ + bzero(&context->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace); + } else { + if (usedspace < SHA256_BLOCK_LENGTH) { + bzero(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA256_Transform(context, (sha2_word32*)context->buffer); + + /* And set-up for the last transform: */ + bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH); + } + } else { + /* Set-up for the last transform: */ + bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Set the bit count: */ + *(sha2_word64*)&context->buffer[SHA256_SHORT_BLOCK_LENGTH] = context->bitcount; + + /* Final transform: */ + SHA256_Transform(context, (sha2_word32*)context->buffer); + +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + { + /* Convert TO host byte order */ + int j; + for (j = 0; j < 8; j++) { + REVERSE32(context->state[j],context->state[j]); + *d++ = context->state[j]; + } + } +#else + bcopy(context->state, d, SHA256_DIGEST_LENGTH); +#endif + } + + /* Clean up state data: */ + bzero(context, sizeof(context)); + usedspace = 0; +} + +char *SHA256_End(SHA256_CTX* context, char buffer[]) { + sha2_byte digest[SHA256_DIGEST_LENGTH], *d = digest; + int i; + + /* Sanity check: */ + assert(context != (SHA256_CTX*)0); + + if (buffer != (char*)0) { + SHA256_Final(digest, context); + + for (i = 0; i < SHA256_DIGEST_LENGTH; i++) { + *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4]; + *buffer++ = sha2_hex_digits[*d & 0x0f]; + d++; + } + *buffer = (char)0; + } else { + bzero(context, sizeof(context)); + } + bzero(digest, SHA256_DIGEST_LENGTH); + return buffer; +} + +char* SHA256_Data(const sha2_byte* data, size_t len, char digest[SHA256_DIGEST_STRING_LENGTH]) { + SHA256_CTX context; + + SHA256_Init(&context); + SHA256_Update(&context, data, len); + return SHA256_End(&context, digest); +} + +#if !NO_64BIT + +/*** SHA-512: *********************************************************/ +void SHA512_Init(SHA512_CTX* context) { + if (context == (SHA512_CTX*)0) { + return; + } + bcopy(sha512_initial_hash_value, context->state, SHA512_DIGEST_LENGTH); + bzero(context->buffer, SHA512_BLOCK_LENGTH); + context->bitcount[0] = context->bitcount[1] = 0; +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-512 round macros: */ +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + +#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) \ + REVERSE64(*data++, W512[j]); \ + T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \ + K512[j] + W512[j]; \ + (d) += T1, \ + (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)), \ + j++ + + +#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + +#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) \ + T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \ + K512[j] + (W512[j] = *data++); \ + (d) += T1; \ + (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \ + j++ + +#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + +#define ROUND512(a,b,c,d,e,f,g,h) \ + s0 = W512[(j+1)&0x0f]; \ + s0 = sigma0_512(s0); \ + s1 = W512[(j+14)&0x0f]; \ + s1 = sigma1_512(s1); \ + T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + \ + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \ + j++ + +void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) { + sha2_word64 a, b, c, d, e, f, g, h, s0, s1; + sha2_word64 T1, *W512 = (sha2_word64*)context->buffer; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do { + ROUND512_0_TO_15(a,b,c,d,e,f,g,h); + ROUND512_0_TO_15(h,a,b,c,d,e,f,g); + ROUND512_0_TO_15(g,h,a,b,c,d,e,f); + ROUND512_0_TO_15(f,g,h,a,b,c,d,e); + ROUND512_0_TO_15(e,f,g,h,a,b,c,d); + ROUND512_0_TO_15(d,e,f,g,h,a,b,c); + ROUND512_0_TO_15(c,d,e,f,g,h,a,b); + ROUND512_0_TO_15(b,c,d,e,f,g,h,a); + } while (j < 16); + + /* Now for the remaining rounds up to 79: */ + do { + ROUND512(a,b,c,d,e,f,g,h); + ROUND512(h,a,b,c,d,e,f,g); + ROUND512(g,h,a,b,c,d,e,f); + ROUND512(f,g,h,a,b,c,d,e); + ROUND512(e,f,g,h,a,b,c,d); + ROUND512(d,e,f,g,h,a,b,c); + ROUND512(c,d,e,f,g,h,a,b); + ROUND512(b,c,d,e,f,g,h,a); + } while (j < 80); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} + +#else /* SHA2_UNROLL_TRANSFORM */ + +void SHA512_Transform(SHA512_CTX* context, const sha2_word64* data) { + sha2_word64 a, b, c, d, e, f, g, h, s0, s1; + sha2_word64 T1, T2, *W512 = (sha2_word64*)context->buffer; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do { +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + /* Convert TO host byte order */ + REVERSE64(*data++, W512[j]); + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j]; +#else /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + /* Apply the SHA-512 compression function to update a..h with copy */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + (W512[j] = *data++); +#endif /* SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN */ + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do { + /* Part of the message block expansion: */ + s0 = W512[(j+1)&0x0f]; + s0 = sigma0_512(s0); + s1 = W512[(j+14)&0x0f]; + s1 = sigma1_512(s1); + + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 80); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} + +#endif /* SHA2_UNROLL_TRANSFORM */ + +void SHA512_Update(SHA512_CTX* context, const sha2_byte *data, size_t len) { + unsigned int freespace, usedspace; + + if (len == 0) { + /* Calling with no data is valid - we do nothing */ + return; + } + + /* Sanity check: */ + assert(context != (SHA512_CTX*)0 && data != (sha2_byte*)0); + + usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH; + if (usedspace > 0) { + /* Calculate how much free space is available in the buffer */ + freespace = SHA512_BLOCK_LENGTH - usedspace; + + if (len >= freespace) { + /* Fill the buffer completely and process it */ + bcopy(data, &context->buffer[usedspace], freespace); + ADDINC128(context->bitcount, freespace << 3); + len -= freespace; + data += freespace; + SHA512_Transform(context, (sha2_word64*)context->buffer); + } else { + /* The buffer is not yet full */ + bcopy(data, &context->buffer[usedspace], len); + ADDINC128(context->bitcount, len << 3); + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= SHA512_BLOCK_LENGTH) { + /* Process as many complete blocks as we can */ + SHA512_Transform(context, (const sha2_word64*)data); + ADDINC128(context->bitcount, SHA512_BLOCK_LENGTH << 3); + len -= SHA512_BLOCK_LENGTH; + data += SHA512_BLOCK_LENGTH; + } + if (len > 0) { + /* There's left-overs, so save 'em */ + bcopy(data, context->buffer, len); + ADDINC128(context->bitcount, len << 3); + } + /* Clean up: */ + usedspace = freespace = 0; +} + +void SHA512_Last(SHA512_CTX* context) { + unsigned int usedspace; + + usedspace = (context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH; +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + /* Convert FROM host byte order */ + REVERSE64(context->bitcount[0],context->bitcount[0]); + REVERSE64(context->bitcount[1],context->bitcount[1]); +#endif + if (usedspace > 0) { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= SHA512_SHORT_BLOCK_LENGTH) { + /* Set-up for the last transform: */ + bzero(&context->buffer[usedspace], SHA512_SHORT_BLOCK_LENGTH - usedspace); + } else { + if (usedspace < SHA512_BLOCK_LENGTH) { + bzero(&context->buffer[usedspace], SHA512_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA512_Transform(context, (sha2_word64*)context->buffer); + + /* And set-up for the last transform: */ + bzero(context->buffer, SHA512_BLOCK_LENGTH - 2); + } + } else { + /* Prepare for final transform: */ + bzero(context->buffer, SHA512_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Store the length of input data (in bits): */ + *(sha2_word64*)&context->buffer[SHA512_SHORT_BLOCK_LENGTH] = context->bitcount[1]; + *(sha2_word64*)&context->buffer[SHA512_SHORT_BLOCK_LENGTH+8] = context->bitcount[0]; + + /* Final transform: */ + SHA512_Transform(context, (sha2_word64*)context->buffer); +} + +void SHA512_Final(sha2_byte digest[], SHA512_CTX* context) { + sha2_word64 *d = (sha2_word64*)digest; + + /* Sanity check: */ + assert(context != (SHA512_CTX*)0); + + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != (sha2_byte*)0) { + SHA512_Last(context); + + /* Save the hash data for output: */ +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + { + /* Convert TO host byte order */ + int j; + for (j = 0; j < 8; j++) { + REVERSE64(context->state[j],context->state[j]); + *d++ = context->state[j]; + } + } +#else + bcopy(context->state, d, SHA512_DIGEST_LENGTH); +#endif + } + + /* Zero out state data */ + bzero(context, sizeof(context)); +} + +char *SHA512_End(SHA512_CTX* context, char buffer[]) { + sha2_byte digest[SHA512_DIGEST_LENGTH], *d = digest; + int i; + + /* Sanity check: */ + assert(context != (SHA512_CTX*)0); + + if (buffer != (char*)0) { + SHA512_Final(digest, context); + + for (i = 0; i < SHA512_DIGEST_LENGTH; i++) { + *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4]; + *buffer++ = sha2_hex_digits[*d & 0x0f]; + d++; + } + *buffer = (char)0; + } else { + bzero(context, sizeof(context)); + } + bzero(digest, SHA512_DIGEST_LENGTH); + return buffer; +} + +char* SHA512_Data(const sha2_byte* data, size_t len, char digest[SHA512_DIGEST_STRING_LENGTH]) { + SHA512_CTX context; + + SHA512_Init(&context); + SHA512_Update(&context, data, len); + return SHA512_End(&context, digest); +} + + +/*** SHA-384: *********************************************************/ +void SHA384_Init(SHA384_CTX* context) { + if (context == (SHA384_CTX*)0) { + return; + } + bcopy(sha384_initial_hash_value, context->state, SHA512_DIGEST_LENGTH); + bzero(context->buffer, SHA384_BLOCK_LENGTH); + context->bitcount[0] = context->bitcount[1] = 0; +} + +void SHA384_Update(SHA384_CTX* context, const sha2_byte* data, size_t len) { + SHA512_Update((SHA512_CTX*)context, data, len); +} + +void SHA384_Final(sha2_byte digest[], SHA384_CTX* context) { + sha2_word64 *d = (sha2_word64*)digest; + + /* Sanity check: */ + assert(context != (SHA384_CTX*)0); + + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != (sha2_byte*)0) { + SHA512_Last((SHA512_CTX*)context); + + /* Save the hash data for output: */ +#if SHA2_BYTE_ORDER == SHA2_LITTLE_ENDIAN + { + /* Convert TO host byte order */ + int j; + for (j = 0; j < 6; j++) { + REVERSE64(context->state[j],context->state[j]); + *d++ = context->state[j]; + } + } +#else + bcopy(context->state, d, SHA384_DIGEST_LENGTH); +#endif + } + + /* Zero out state data */ + bzero(context, sizeof(context)); +} + +char *SHA384_End(SHA384_CTX* context, char buffer[]) { + sha2_byte digest[SHA384_DIGEST_LENGTH], *d = digest; + int i; + + /* Sanity check: */ + assert(context != (SHA384_CTX*)0); + + if (buffer != (char*)0) { + SHA384_Final(digest, context); + + for (i = 0; i < SHA384_DIGEST_LENGTH; i++) { + *buffer++ = sha2_hex_digits[(*d & 0xf0) >> 4]; + *buffer++ = sha2_hex_digits[*d & 0x0f]; + d++; + } + *buffer = (char)0; + } else { + bzero(context, sizeof(context)); + } + bzero(digest, SHA384_DIGEST_LENGTH); + return buffer; +} + +char* SHA384_Data(const sha2_byte* data, size_t len, char digest[SHA384_DIGEST_STRING_LENGTH]) { + SHA384_CTX context; + + SHA384_Init(&context); + SHA384_Update(&context, data, len); + return SHA384_End(&context, digest); +} + +#endif /* !NO_64BIT */ + +/* + * $PchId: sha2.c,v 1.1 2005/06/28 14:29:23 philip Exp $ + */ diff --git a/servers/inet/sha2.h b/servers/inet/sha2.h new file mode 100644 index 000000000..85f8a2a82 --- /dev/null +++ b/servers/inet/sha2.h @@ -0,0 +1,168 @@ +/* $FreeBSD: src/sys/crypto/sha2/sha2.h,v 1.1.2.1 2001/07/03 11:01:36 ume Exp $ */ +/* $KAME: sha2.h,v 1.3 2001/03/12 08:27:48 itojun Exp $ */ + +/* + * sha2.h + * + * Version 1.0.0beta1 + * + * Written by Aaron D. Gifford + * + * Copyright 2000 Aaron D. Gifford. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef __SHA2_H__ +#define __SHA2_H__ + +#ifdef __cplusplus +extern "C" { +#endif + + +/*** SHA-256/384/512 Various Length Definitions ***********************/ +#define SHA256_BLOCK_LENGTH 64 +#define SHA256_DIGEST_LENGTH 32 +#define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) +#define SHA384_BLOCK_LENGTH 128 +#define SHA384_DIGEST_LENGTH 48 +#define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) +#define SHA512_BLOCK_LENGTH 128 +#define SHA512_DIGEST_LENGTH 64 +#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) + +#ifdef __minix +#include +#include +#include +#include + +typedef u8_t u_int8_t; /* 1-byte (8-bits) */ +typedef u32_t u_int32_t; /* 4-bytes (32-bits) */ +typedef u64_t u_int64_t; /* 8-bytes (64-bits) */ + +#ifndef __P +#define __P(x) x +#endif + +#define NO_64BIT 1 +#define MINIX_64BIT 1 + +#define SHA2_BYTE_ORDER 0x04030201 +#define SHA2_LITTLE_ENDIAN 0x04030201 +#define SHA2_BIG_ENDIAN 0x01020204 +#define bcopy(s,d,l) (memmove((d),(s),(l))) +#define bzero(d,l) (memset((d),'\0',(l))) +#endif + +/*** SHA-256/384/512 Context Structures *******************************/ +/* NOTE: If your architecture does not define either u_intXX_t types or + * uintXX_t (from inttypes.h), you may need to define things by hand + * for your system: + */ +#if 0 +typedef unsigned char u_int8_t; /* 1-byte (8-bits) */ +typedef unsigned int u_int32_t; /* 4-bytes (32-bits) */ +typedef unsigned long long u_int64_t; /* 8-bytes (64-bits) */ +#endif +/* + * Most BSD systems already define u_intXX_t types, as does Linux. + * Some systems, however, like Compaq's Tru64 Unix instead can use + * uintXX_t types defined by very recent ANSI C standards and included + * in the file: + * + * #include + * + * If you choose to use then please define: + * + * #define SHA2_USE_INTTYPES_H + * + * Or on the command line during compile: + * + * cc -DSHA2_USE_INTTYPES_H ... + */ +#if 0 /*def SHA2_USE_INTTYPES_H*/ + +typedef struct _SHA256_CTX { + uint32_t state[8]; + uint64_t bitcount; + uint8_t buffer[SHA256_BLOCK_LENGTH]; +} SHA256_CTX; +typedef struct _SHA512_CTX { + uint64_t state[8]; + uint64_t bitcount[2]; + uint8_t buffer[SHA512_BLOCK_LENGTH]; +} SHA512_CTX; + +#else /* SHA2_USE_INTTYPES_H */ + +typedef struct _SHA256_CTX { + u_int32_t state[8]; + u_int64_t bitcount; + u_int8_t buffer[SHA256_BLOCK_LENGTH]; +} SHA256_CTX; +typedef struct _SHA512_CTX { + u_int64_t state[8]; + u_int64_t bitcount[2]; + u_int8_t buffer[SHA512_BLOCK_LENGTH]; +} SHA512_CTX; + +#endif /* SHA2_USE_INTTYPES_H */ + +typedef SHA512_CTX SHA384_CTX; + + +/*** SHA-256/384/512 Function Prototypes ******************************/ + +void SHA256_Init __P((SHA256_CTX *)); +void SHA256_Update __P((SHA256_CTX*, const u_int8_t*, size_t)); +void SHA256_Final __P((u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*)); +char* SHA256_End __P((SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH])); +char* SHA256_Data __P((const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH])); + +void SHA384_Init __P((SHA384_CTX*)); +void SHA384_Update __P((SHA384_CTX*, const u_int8_t*, size_t)); +void SHA384_Final __P((u_int8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*)); +char* SHA384_End __P((SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH])); +char* SHA384_Data __P((const u_int8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH])); + +void SHA512_Init __P((SHA512_CTX*)); +void SHA512_Update __P((SHA512_CTX*, const u_int8_t*, size_t)); +void SHA512_Final __P((u_int8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*)); +char* SHA512_End __P((SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH])); +char* SHA512_Data __P((const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH])); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __SHA2_H__ */ + + +/* + * $PchId: sha2.h,v 1.1 2005/06/28 14:29:33 philip Exp $ + */ diff --git a/servers/inet/sr.c b/servers/inet/sr.c index 8f9e3da20..4fdc0257c 100644 --- a/servers/inet/sr.c +++ b/servers/inet/sr.c @@ -41,82 +41,88 @@ #include "inet.h" +#ifndef __minix_vmd /* Minix 3 */ +#include +#endif +#include #include #include "mq.h" +#include "qp.h" #include "proto.h" #include "generic/type.h" #include "generic/assert.h" #include "generic/buf.h" +#include "generic/event.h" #include "generic/sr.h" +#include "sr_int.h" + +#ifndef __minix_vmd /* Minix 3 */ +#define DEV_CANCEL NW_CANCEL +#define DEVICE_REPLY REVIVE +#define DEV_IOCTL3 DEV_IOCTL +#define NDEV_BUFFER ADDRESS +#define NDEV_COUNT COUNT +#define NDEV_IOCTL REQUEST +#define NDEV_MINOR DEVICE +#define NDEV_PROC PROC_NR +#endif THIS_FILE -#define FD_NR (16*IP_PORT_MAX) +PUBLIC sr_fd_t sr_fd_table[FD_NR]; -typedef struct sr_fd -{ - int srf_flags; - int srf_fd; - int srf_port; - sr_open_t srf_open; - sr_close_t srf_close; - sr_write_t srf_write; - sr_read_t srf_read; - sr_ioctl_t srf_ioctl; - sr_cancel_t srf_cancel; - mq_t *srf_ioctl_q, *srf_ioctl_q_tail; - mq_t *srf_read_q, *srf_read_q_tail; - mq_t *srf_write_q, *srf_write_q_tail; -} sr_fd_t; - -#define SFF_FLAGS 0x0F -# define SFF_FREE 0x00 -# define SFF_MINOR 0x01 -# define SFF_INUSE 0x02 -# define SFF_BUSY 0x3C -# define SFF_IOCTL_IP 0x04 -# define SFF_READ_IP 0x08 -# define SFF_WRITE_IP 0x10 -# define SFF_PENDING_REQ 0x30 -# define SFF_SUSPENDED 0x1C0 -# define SFF_IOCTL_SUSP 0x40 -# define SFF_READ_SUSP 0x80 -# define SFF_WRITE_SUSP 0x100 +PRIVATE mq_t *repl_queue, *repl_queue_tail; +#ifdef __minix_vmd +PRIVATE cpvec_t cpvec[CPVEC_NR]; +#else /* Minix 3 */ +PRIVATE struct vir_cp_req vir_cp_req[CPVEC_NR]; +#endif FORWARD _PROTOTYPE ( int sr_open, (message *m) ); FORWARD _PROTOTYPE ( void sr_close, (message *m) ); FORWARD _PROTOTYPE ( int sr_rwio, (mq_t *m) ); +FORWARD _PROTOTYPE ( int sr_restart_read, (sr_fd_t *fdp) ); +FORWARD _PROTOTYPE ( int sr_restart_write, (sr_fd_t *fdp) ); +FORWARD _PROTOTYPE ( int sr_restart_ioctl, (sr_fd_t *fdp) ); FORWARD _PROTOTYPE ( int sr_cancel, (message *m) ); +#ifndef __minix_vmd /* Minix 3 */ +FORWARD _PROTOTYPE ( int sr_select, (message *m) ); +#endif FORWARD _PROTOTYPE ( void sr_reply, (mq_t *m, int reply, int can_enqueue) ); FORWARD _PROTOTYPE ( sr_fd_t *sr_getchannel, (int minor)); FORWARD _PROTOTYPE ( acc_t *sr_get_userdata, (int fd, vir_bytes offset, vir_bytes count, int for_ioctl) ); FORWARD _PROTOTYPE ( int sr_put_userdata, (int fd, vir_bytes offset, acc_t *data, int for_ioctl) ); +#ifdef __minix_vmd +#define sr_select_res 0 +#else /* Minix 3 */ +FORWARD _PROTOTYPE (void sr_select_res, (int fd, unsigned ops) ); +#endif FORWARD _PROTOTYPE ( int sr_repl_queue, (int proc, int ref, int operation) ); FORWARD _PROTOTYPE ( int walk_queue, (sr_fd_t *sr_fd, mq_t *q_head, mq_t **q_tail_ptr, int type, int proc_nr, int ref) ); FORWARD _PROTOTYPE ( void process_req_q, (mq_t *mq, mq_t *tail, mq_t **tail_ptr) ); +FORWARD _PROTOTYPE ( void sr_event, (event_t *evp, ev_arg_t arg) ); FORWARD _PROTOTYPE ( int cp_u2b, (int proc, char *src, acc_t **var_acc_ptr, int size) ); FORWARD _PROTOTYPE ( int cp_b2u, (acc_t *acc_ptr, int proc, char *dest) ); -PRIVATE sr_fd_t sr_fd_table[FD_NR]; -PRIVATE mq_t *repl_queue, *repl_queue_tail; -PRIVATE struct vir_cp_req vir_cp_req[CPVEC_NR]; - PUBLIC void sr_init() { -#if ZERO int i; for (i=0; imq_mess.m_type == NW_CANCEL) + if (m->mq_mess.m_type == DEV_CANCEL) { - result= sr_repl_queue(m->mq_mess.PROC_NR, 0, 0); +#ifdef __minix_vmd + result= sr_repl_queue(m->mq_mess.NDEV_PROC, + m->mq_mess.NDEV_REF, + m->mq_mess.NDEV_OPERATION); +#else /* Minix 3 */ + result= sr_repl_queue(m->mq_mess.PROC_NR, 0, 0); +#endif if (result) { mq_free(m); @@ -155,24 +167,33 @@ mq_t *m; break; case DEV_READ: case DEV_WRITE: - case DEV_IOCTL: + case DEV_IOCTL3: result= sr_rwio(m); assert(result == OK || result == SUSPEND); send_reply= (result == SUSPEND); free_mess= 0; break; - case CANCEL: + case DEV_CANCEL: result= sr_cancel(&m->mq_mess); assert(result == OK || result == EINTR); send_reply= (result == EINTR); free_mess= 1; +#ifdef __minix_vmd + m->mq_mess.m_type= m->mq_mess.NDEV_OPERATION; +#else /* Minix 3 */ m->mq_mess.m_type= 0; +#endif break; -#if !CRAMPED +#ifndef __minix_vmd /* Minix 3 */ + case DEV_SELECT: + result= sr_select(&m->mq_mess); + send_reply= 1; + free_mess= 1; + break; +#endif default: ip_panic(("unknown message, from %d, type %d", m->mq_mess.m_source, m->mq_mess.m_type)); -#endif } if (send_reply) { @@ -183,7 +204,7 @@ mq_t *m; } PUBLIC void sr_add_minor(minor, port, openf, closef, readf, writef, - ioctlf, cancelf) + ioctlf, cancelf, selectf) int minor; int port; sr_open_t openf; @@ -192,6 +213,7 @@ sr_read_t readf; sr_write_t writef; sr_ioctl_t ioctlf; sr_cancel_t cancelf; +sr_select_t selectf; { sr_fd_t *sr_fd; @@ -209,6 +231,7 @@ sr_cancel_t cancelf; sr_fd->srf_read= readf; sr_fd->srf_ioctl= ioctlf; sr_fd->srf_cancel= cancelf; + sr_fd->srf_select= selectf; } PRIVATE int sr_open(m) @@ -216,7 +239,7 @@ message *m; { sr_fd_t *sr_fd; - int minor= m->DEVICE; + int minor= m->NDEV_MINOR; int i, fd; if (minor<0 || minor>FD_NR) @@ -241,7 +264,7 @@ message *m; *sr_fd= sr_fd_table[minor]; sr_fd->srf_flags= SFF_INUSE; fd= (*sr_fd->srf_open)(sr_fd->srf_port, i, sr_get_userdata, - sr_put_userdata, 0); + sr_put_userdata, 0 /* no put_pkt */, sr_select_res); if (fd<0) { sr_fd->srf_flags= SFF_FREE; @@ -257,10 +280,11 @@ message *m; { sr_fd_t *sr_fd; - sr_fd= sr_getchannel(m->DEVICE); + sr_fd= sr_getchannel(m->NDEV_MINOR); assert (sr_fd); - assert (!(sr_fd->srf_flags & SFF_BUSY)); + if (sr_fd->srf_flags & SFF_BUSY) + ip_panic(("close on busy channel")); assert (!(sr_fd->srf_flags & SFF_MINOR)); (*sr_fd->srf_close)(sr_fd->srf_fd); @@ -277,7 +301,7 @@ mq_t *m; ioreq_t request; size_t size; - sr_fd= sr_getchannel(m->mq_mess.DEVICE); + sr_fd= sr_getchannel(m->mq_mess.NDEV_MINOR); assert (sr_fd); switch(m->mq_mess.m_type) @@ -294,16 +318,14 @@ mq_t *m; ip_flag= SFF_WRITE_IP; susp_flag= SFF_WRITE_SUSP; break; - case DEV_IOCTL: + case DEV_IOCTL3: q_head_ptr= &sr_fd->srf_ioctl_q; q_tail_ptr= &sr_fd->srf_ioctl_q_tail; ip_flag= SFF_IOCTL_IP; susp_flag= SFF_IOCTL_SUSP; break; -#if !CRAMPED default: ip_panic(("illegal case entry")); -#endif } if (sr_fd->srf_flags & ip_flag) @@ -324,15 +346,26 @@ mq_t *m; { case DEV_READ: r= (*sr_fd->srf_read)(sr_fd->srf_fd, - m->mq_mess.COUNT); + m->mq_mess.NDEV_COUNT); break; case DEV_WRITE: r= (*sr_fd->srf_write)(sr_fd->srf_fd, - m->mq_mess.COUNT); + m->mq_mess.NDEV_COUNT); break; - case DEV_IOCTL: - request= m->mq_mess.REQUEST; -#ifdef _IOCPARM_MASK + case DEV_IOCTL3: + request= m->mq_mess.NDEV_IOCTL; + + /* There should be a better way to do this... */ + if (request == NWIOQUERYPARAM) + { + r= qp_query(m->mq_mess.NDEV_PROC, + (vir_bytes)m->mq_mess.NDEV_BUFFER); + r= sr_put_userdata(sr_fd-sr_fd_table, r, NULL, 1); + assert(r == OK); + return OK; + } + + /* And now, we continue with our regular program. */ size= (request >> 16) & _IOCPARM_MASK; if (size>MAX_IOCTL_S) { @@ -342,13 +375,10 @@ mq_t *m; assert(r == OK); return OK; } -#endif r= (*sr_fd->srf_ioctl)(sr_fd->srf_fd, request); break; -#if !CRAMPED default: ip_panic(("illegal case entry")); -#endif } assert(r == OK || r == SUSPEND || @@ -358,21 +388,106 @@ mq_t *m; return r; } +PRIVATE int sr_restart_read(sr_fd) +sr_fd_t *sr_fd; +{ + mq_t *mp; + int r; + + mp= sr_fd->srf_read_q; + assert(mp); + + if (sr_fd->srf_flags & SFF_READ_IP) + { + assert(sr_fd->srf_flags & SFF_READ_SUSP); + return SUSPEND; + } + sr_fd->srf_flags |= SFF_READ_IP; + + r= (*sr_fd->srf_read)(sr_fd->srf_fd, + mp->mq_mess.NDEV_COUNT); + + assert(r == OK || r == SUSPEND || + (printf("r= %d\n", r), 0)); + if (r == SUSPEND) + sr_fd->srf_flags |= SFF_READ_SUSP; + return r; +} + +PRIVATE int sr_restart_write(sr_fd) +sr_fd_t *sr_fd; +{ + mq_t *mp; + int r; + + mp= sr_fd->srf_write_q; + assert(mp); + + if (sr_fd->srf_flags & SFF_WRITE_IP) + { + assert(sr_fd->srf_flags & SFF_WRITE_SUSP); + return SUSPEND; + } + sr_fd->srf_flags |= SFF_WRITE_IP; + + r= (*sr_fd->srf_write)(sr_fd->srf_fd, + mp->mq_mess.NDEV_COUNT); + + assert(r == OK || r == SUSPEND || + (printf("r= %d\n", r), 0)); + if (r == SUSPEND) + sr_fd->srf_flags |= SFF_WRITE_SUSP; + return r; +} + +PRIVATE int sr_restart_ioctl(sr_fd) +sr_fd_t *sr_fd; +{ + mq_t *mp; + int r; + + mp= sr_fd->srf_ioctl_q; + assert(mp); + + if (sr_fd->srf_flags & SFF_IOCTL_IP) + { + assert(sr_fd->srf_flags & SFF_IOCTL_SUSP); + return SUSPEND; + } + sr_fd->srf_flags |= SFF_IOCTL_IP; + + r= (*sr_fd->srf_ioctl)(sr_fd->srf_fd, + mp->mq_mess.NDEV_COUNT); + + assert(r == OK || r == SUSPEND || + (printf("r= %d\n", r), 0)); + if (r == SUSPEND) + sr_fd->srf_flags |= SFF_IOCTL_SUSP; + return r; +} + PRIVATE int sr_cancel(m) message *m; { sr_fd_t *sr_fd; - int i, result; - mq_t *q_ptr, *q_ptr_prv; + int result; int proc_nr, ref, operation; result=EINTR; - proc_nr= m->PROC_NR; + proc_nr= m->NDEV_PROC; +#ifdef __minix_vmd + ref= m->NDEV_REF; + operation= m->NDEV_OPERATION; +#else /* Minix 3 */ ref= 0; operation= 0; - sr_fd= sr_getchannel(m->DEVICE); +#endif + sr_fd= sr_getchannel(m->NDEV_MINOR); assert (sr_fd); +#ifdef __minix_vmd + if (operation == CANCEL_ANY || operation == DEV_IOCTL3) +#endif { result= walk_queue(sr_fd, sr_fd->srf_ioctl_q, &sr_fd->srf_ioctl_q_tail, SR_CANCEL_IOCTL, @@ -380,6 +495,9 @@ message *m; if (result != EAGAIN) return result; } +#ifdef __minix_vmd + if (operation == CANCEL_ANY || operation == DEV_READ) +#endif { result= walk_queue(sr_fd, sr_fd->srf_read_q, &sr_fd->srf_read_q_tail, SR_CANCEL_READ, @@ -387,6 +505,9 @@ message *m; if (result != EAGAIN) return result; } +#ifdef __minix_vmd + if (operation == CANCEL_ANY || operation == DEV_WRITE) +#endif { result= walk_queue(sr_fd, sr_fd->srf_write_q, &sr_fd->srf_write_q_tail, SR_CANCEL_WRITE, @@ -394,14 +515,56 @@ message *m; if (result != EAGAIN) return result; } -#if !CRAMPED +#ifdef __minix_vmd ip_panic(( -"request not found: from %d, type %d, MINOR= %d, PROC= %d, REF= %d OPERATION= %d", - m->m_source, m->m_type, m->DEVICE, - m->PROC_NR, 0, 0)); +"request not found: from %d, type %d, MINOR= %d, PROC= %d, REF= %d OPERATION= %ld", + m->m_source, m->m_type, m->NDEV_MINOR, + m->NDEV_PROC, m->NDEV_REF, m->NDEV_OPERATION)); +#else /* Minix 3 */ + ip_panic(( +"request not found: from %d, type %d, MINOR= %d, PROC= %d", + m->m_source, m->m_type, m->NDEV_MINOR, + m->NDEV_PROC)); #endif } +#ifndef __minix_vmd /* Minix 3 */ +PRIVATE int sr_select(m) +message *m; +{ + sr_fd_t *sr_fd; + mq_t **q_head_ptr, **q_tail_ptr; + int ip_flag, susp_flag; + int r, ops; + unsigned m_ops, i_ops; + ioreq_t request; + size_t size; + + sr_fd= sr_getchannel(m->NDEV_MINOR); + assert (sr_fd); + + sr_fd->srf_select_proc= m->m_source; + + m_ops= m->PROC_NR; + i_ops= 0; + if (m_ops & SEL_RD) i_ops |= SR_SELECT_READ; + if (m_ops & SEL_WR) i_ops |= SR_SELECT_WRITE; + if (m_ops & SEL_ERR) i_ops |= SR_SELECT_EXCEPTION; + if (!(m_ops & SEL_NOTIFY)) i_ops |= SR_SELECT_POLL; + + printf("should select 0%o on fd %d\n", i_ops, m->NDEV_MINOR); + r= (*sr_fd->srf_select)(sr_fd->srf_fd, i_ops); + if (r < 0) + return r; + m_ops= 0; + if (r & SR_SELECT_READ) m_ops |= SEL_RD; + if (r & SR_SELECT_WRITE) m_ops |= SEL_WR; + if (r & SR_SELECT_EXCEPTION) m_ops |= SEL_ERR; + + return m_ops; +} +#endif + PRIVATE int walk_queue(sr_fd, q_head, q_tail_ptr, type, proc_nr, ref) sr_fd_t *sr_fd; mq_t *q_head, **q_tail_ptr; @@ -415,8 +578,12 @@ int ref; for(q_ptr_prv= NULL, q_ptr= q_head; q_ptr; q_ptr_prv= q_ptr, q_ptr= q_ptr->mq_next) { - if (q_ptr->mq_mess.PROC_NR != proc_nr) + if (q_ptr->mq_mess.NDEV_PROC != proc_nr) continue; +#ifdef __minix_vmd + if (q_ptr->mq_mess.NDEV_REF != ref) + continue; +#endif if (!q_ptr_prv) { result= (*sr_fd->srf_cancel)(sr_fd->srf_fd, type); @@ -456,21 +623,31 @@ int can_enqueue; int result, proc, ref,operation; message reply, *mp; - proc= mq->mq_mess.PROC_NR; + proc= mq->mq_mess.NDEV_PROC; +#ifdef __minix_vmd + ref= mq->mq_mess.NDEV_REF; +#else /* Minix 3 */ ref= 0; +#endif operation= mq->mq_mess.m_type; + assert(operation != DEV_CANCEL); if (can_enqueue) mp= &mq->mq_mess; else mp= &reply; - mp->m_type= REVIVE; + mp->m_type= DEVICE_REPLY; mp->REP_PROC_NR= proc; mp->REP_STATUS= status; +#ifdef __minix_vmd + mp->REP_REF= ref; + mp->REP_OPERATION= operation; +#endif result= send(mq->mq_mess.m_source, mp); if (result == ELOCKED && can_enqueue) { + mq->mq_next= NULL; if (repl_queue) repl_queue_tail->mq_next= mq; else @@ -491,26 +668,28 @@ vir_bytes count; int for_ioctl; { sr_fd_t *loc_fd; - mq_t **head_ptr, **tail_ptr, *m, *tail, *mq; + mq_t **head_ptr, *m, *mq; int ip_flag, susp_flag; int result; int suspended; char *src; acc_t *acc; + event_t *evp; + ev_arg_t arg; loc_fd= &sr_fd_table[fd]; if (for_ioctl) { head_ptr= &loc_fd->srf_ioctl_q; - tail_ptr= &loc_fd->srf_ioctl_q_tail; + evp= &loc_fd->srf_ioctl_ev; ip_flag= SFF_IOCTL_IP; susp_flag= SFF_IOCTL_SUSP; } else { head_ptr= &loc_fd->srf_write_q; - tail_ptr= &loc_fd->srf_write_q_tail; + evp= &loc_fd->srf_write_ev; ip_flag= SFF_WRITE_IP; susp_flag= SFF_WRITE_SUSP; } @@ -520,27 +699,26 @@ assert (loc_fd->srf_flags & ip_flag); if (!count) { m= *head_ptr; - *head_ptr= NULL; - tail= *tail_ptr; -assert(m); mq= m->mq_next; + *head_ptr= mq; result= (int)offset; sr_reply (m, result, 1); suspended= (loc_fd->srf_flags & susp_flag); loc_fd->srf_flags &= ~(ip_flag|susp_flag); if (suspended) { - process_req_q(mq, tail, tail_ptr); - } - else - { -assert(!mq); + if (mq) + { + { where(); printf("sr_get_userdata: enqueuing event\n"); } + arg.ev_ptr= loc_fd; + ev_enqueue(evp, sr_event, arg); + } } return NULL; } - src= (*head_ptr)->mq_mess.ADDRESS + offset; - result= cp_u2b ((*head_ptr)->mq_mess.PROC_NR, src, &acc, count); + src= (*head_ptr)->mq_mess.NDEV_BUFFER + offset; + result= cp_u2b ((*head_ptr)->mq_mess.NDEV_PROC, src, &acc, count); return result<0 ? NULL : acc; } @@ -552,25 +730,27 @@ acc_t *data; int for_ioctl; { sr_fd_t *loc_fd; - mq_t **head_ptr, **tail_ptr, *m, *tail, *mq; + mq_t **head_ptr, *m, *mq; int ip_flag, susp_flag; int result; int suspended; char *dst; + event_t *evp; + ev_arg_t arg; loc_fd= &sr_fd_table[fd]; if (for_ioctl) { head_ptr= &loc_fd->srf_ioctl_q; - tail_ptr= &loc_fd->srf_ioctl_q_tail; + evp= &loc_fd->srf_ioctl_ev; ip_flag= SFF_IOCTL_IP; susp_flag= SFF_IOCTL_SUSP; } else { head_ptr= &loc_fd->srf_read_q; - tail_ptr= &loc_fd->srf_read_q_tail; + evp= &loc_fd->srf_read_ev; ip_flag= SFF_READ_IP; susp_flag= SFF_READ_SUSP; } @@ -580,30 +760,55 @@ int for_ioctl; if (!data) { m= *head_ptr; - assert(m); - - *head_ptr= NULL; - tail= *tail_ptr; mq= m->mq_next; + *head_ptr= mq; result= (int)offset; sr_reply (m, result, 1); suspended= (loc_fd->srf_flags & susp_flag); loc_fd->srf_flags &= ~(ip_flag|susp_flag); if (suspended) { - process_req_q(mq, tail, tail_ptr); - } - else - { - assert(!mq); + if (mq) + { + { where(); printf("sr_put_userdata: enqueuing event\n"); } + arg.ev_ptr= loc_fd; + ev_enqueue(evp, sr_event, arg); + } } return OK; } - dst= (*head_ptr)->mq_mess.ADDRESS + offset; - return cp_b2u (data, (*head_ptr)->mq_mess.PROC_NR, dst); + dst= (*head_ptr)->mq_mess.NDEV_BUFFER + offset; + return cp_b2u (data, (*head_ptr)->mq_mess.NDEV_PROC, dst); } +#ifndef __minix_vmd /* Minix 3 */ +PRIVATE void sr_select_res(fd, ops) +int fd; +unsigned ops; +{ + unsigned m_ops; + sr_fd_t *sr_fd; + message m; + + sr_fd= &sr_fd_table[fd]; + + m_ops= 0; + if (ops & SR_SELECT_READ) m_ops |= SEL_RD; + if (ops & SR_SELECT_WRITE) m_ops |= SEL_WR; + if (ops & SR_SELECT_EXCEPTION) m_ops |= SEL_ERR; + + m.NOTIFY_TYPE= DEV_SELECTED; + m.NOTIFY_ARG= fd; + m.NOTIFY_FLAGS= m_ops; + + printf("sr_select_res: notifying caller %d with ops 0%o\n", + sr_fd->srf_select_proc, m_ops); + + notify(sr_fd->srf_select_proc, &m); +} +#endif + PRIVATE void process_req_q(mq, tail, tail_ptr) mq_t *mq, *tail, **tail_ptr; { @@ -631,6 +836,47 @@ mq_t *mq, *tail, **tail_ptr; return; } +PRIVATE void sr_event(evp, arg) +event_t *evp; +ev_arg_t arg; +{ + sr_fd_t *sr_fd; + int r; + + sr_fd= arg.ev_ptr; + if (evp == &sr_fd->srf_write_ev) + { + while(sr_fd->srf_write_q) + { + r= sr_restart_write(sr_fd); + if (r == SUSPEND) + return; + } + return; + } + if (evp == &sr_fd->srf_read_ev) + { + while(sr_fd->srf_read_q) + { + r= sr_restart_read(sr_fd); + if (r == SUSPEND) + return; + } + return; + } + if (evp == &sr_fd->srf_ioctl_ev) + { + while(sr_fd->srf_ioctl_q) + { + r= sr_restart_ioctl(sr_fd); + if (r == SUSPEND) + return; + } + return; + } + ip_panic(("sr_event: unkown event\n")); +} + PRIVATE int cp_u2b (proc, src, var_acc_ptr, size) int proc; char *src; @@ -650,6 +896,11 @@ int size; { size= (vir_bytes)acc->acc_length; +#ifdef __minix_vmd + cpvec[i].cpv_src= (vir_bytes)src; + cpvec[i].cpv_dst= (vir_bytes)ptr2acc_data(acc); + cpvec[i].cpv_size= size; +#else /* Minix 3 */ vir_cp_req[i].count= size; vir_cp_req[i].src.proc_nr = proc; vir_cp_req[i].src.segment = D; @@ -657,6 +908,7 @@ int size; vir_cp_req[i].dst.proc_nr = this_proc; vir_cp_req[i].dst.segment = D; vir_cp_req[i].dst.offset = (vir_bytes) ptr2acc_data(acc); +#endif src += size; acc= acc->acc_next; @@ -664,9 +916,17 @@ int size; if (i == CPVEC_NR || acc == NULL) { +#ifdef __minix_vmd + mess.m_type= SYS_VCOPY; + mess.m1_i1= proc; + mess.m1_i2= this_proc; + mess.m1_i3= i; + mess.m1_p1= (char *)cpvec; +#else /* Minix 3 */ mess.m_type= SYS_VIRVCOPY; - mess.VCP_VEC_SIZE = i; - mess.VCP_VEC_ADDR = (char *) vir_cp_req; + mess.VCP_VEC_SIZE= i; + mess.VCP_VEC_ADDR= (char *)vir_cp_req; +#endif if (sendrec(SYSTASK, &mess) <0) ip_panic(("unable to sendrec")); if (mess.m_type <0) @@ -699,6 +959,11 @@ char *dest; if (size) { +#ifdef __minix_vmd + cpvec[i].cpv_src= (vir_bytes)ptr2acc_data(acc); + cpvec[i].cpv_dst= (vir_bytes)dest; + cpvec[i].cpv_size= size; +#else /* Minix 3 */ vir_cp_req[i].src.proc_nr = this_proc; vir_cp_req[i].src.segment = D; vir_cp_req[i].src.offset= (vir_bytes)ptr2acc_data(acc); @@ -706,6 +971,7 @@ char *dest; vir_cp_req[i].dst.segment = D; vir_cp_req[i].dst.offset= (vir_bytes)dest; vir_cp_req[i].count= size; +#endif i++; } @@ -714,9 +980,17 @@ char *dest; if (i == CPVEC_NR || acc == NULL) { +#ifdef __minix_vmd + mess.m_type= SYS_VCOPY; + mess.m1_i1= this_proc; + mess.m1_i2= proc; + mess.m1_i3= i; + mess.m1_p1= (char *)cpvec; +#else /* Minix 3 */ mess.m_type= SYS_VIRVCOPY; - mess.VCP_VEC_SIZE = i; - mess.VCP_VEC_ADDR = (char *) vir_cp_req; + mess.VCP_VEC_SIZE= i; + mess.VCP_VEC_ADDR= (char *) vir_cp_req; +#endif if (sendrec(SYSTASK, &mess) <0) ip_panic(("unable to sendrec")); if (mess.m_type <0) @@ -743,15 +1017,20 @@ int operation; for (m= repl_queue; m;) { +#ifdef __minix_vmd + if (m->mq_mess.REP_PROC_NR == proc && + m->mq_mess.REP_REF ==ref && + (m->mq_mess.REP_OPERATION == operation || + operation == CANCEL_ANY)) +#else /* Minix 3 */ if (m->mq_mess.REP_PROC_NR == proc) +#endif { assert(!m_cancel); m_cancel= m; m= m->mq_next; continue; } -assert(m->mq_mess.m_source != PM_PROC_NR); -assert(m->mq_mess.m_type == REVIVE); result= send(m->mq_mess.m_source, &m->mq_mess); if (result != OK) ip_panic(("unable to send: %d", result)); @@ -762,8 +1041,6 @@ assert(m->mq_mess.m_type == REVIVE); repl_queue= NULL; if (m_cancel) { -assert(m_cancel->mq_mess.m_source != PM_PROC_NR); -assert(m_cancel->mq_mess.m_type == REVIVE); result= send(m_cancel->mq_mess.m_source, &m_cancel->mq_mess); if (result != OK) ip_panic(("unable to send: %d", result)); @@ -774,5 +1051,5 @@ assert(m_cancel->mq_mess.m_type == REVIVE); } /* - * $PchId: sr.c,v 1.9 1996/05/07 21:11:14 philip Exp $ + * $PchId: sr.c,v 1.17 2005/06/28 14:26:16 philip Exp $ */ diff --git a/servers/inet/sr_int.h b/servers/inet/sr_int.h new file mode 100644 index 000000000..6c4eed600 --- /dev/null +++ b/servers/inet/sr_int.h @@ -0,0 +1,48 @@ +/* +inet/sr_int.h + +SR internals + +Created: Aug 2004 by Philip Homburg +*/ + +#define FD_NR (16*IP_PORT_MAX) + +typedef struct sr_fd +{ + int srf_flags; + int srf_fd; + int srf_port; + int srf_select_proc; + sr_open_t srf_open; + sr_close_t srf_close; + sr_write_t srf_write; + sr_read_t srf_read; + sr_ioctl_t srf_ioctl; + sr_cancel_t srf_cancel; + sr_select_t srf_select; + mq_t *srf_ioctl_q, *srf_ioctl_q_tail; + mq_t *srf_read_q, *srf_read_q_tail; + mq_t *srf_write_q, *srf_write_q_tail; + event_t srf_ioctl_ev; + event_t srf_read_ev; + event_t srf_write_ev; +} sr_fd_t; + +# define SFF_FREE 0x00 +# define SFF_MINOR 0x01 +# define SFF_INUSE 0x02 +#define SFF_BUSY 0x1C +# define SFF_IOCTL_IP 0x04 +# define SFF_READ_IP 0x08 +# define SFF_WRITE_IP 0x10 +#define SFF_SUSPENDED 0x1C0 +# define SFF_IOCTL_SUSP 0x40 +# define SFF_READ_SUSP 0x80 +# define SFF_WRITE_SUSP 0x100 + +EXTERN sr_fd_t sr_fd_table[FD_NR]; + +/* + * $PchId: sr_int.h,v 1.2 2005/06/28 14:28:17 philip Exp $ + */ diff --git a/servers/inet/version.c b/servers/inet/version.c index 951133d22..9b7ba36e6 100644 --- a/servers/inet/version.c +++ b/servers/inet/version.c @@ -2,8 +2,10 @@ version.c */ -char version[]= "inet 0.35K, last compiled on " __DATE__ " " __TIME__; +#include "inet.h" + +char version[]= "inet 0.79, last compiled on " __DATE__ " " __TIME__; /* - * $PchId: version.c,v 1.9 1996/12/17 08:01:39 philip Exp philip $ + * $PchId: version.c,v 1.54 2005/06/28 14:35:01 philip Exp $ */