/* generic/ip_eth.c Ethernet specific part of the IP implementation Created: Apr 22, 1993 by Philip Homburg Copyright 1995 Philip Homburg */ #include "inet.h" #include "type.h" #include "arp.h" #include "assert.h" #include "buf.h" #include "clock.h" #include "eth.h" #include "event.h" #include "ip.h" #include "ip_int.h" THIS_FILE typedef struct xmit_hdr { time_t xh_time; ipaddr_t xh_ipaddr; } xmit_hdr_t; PRIVATE ether_addr_t broadcast_ethaddr= { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; PRIVATE ipaddr_t broadcast_ipaddr= 0xFFFFFFFFL; FORWARD void do_eth_read ARGS(( ip_port_t *port )); FORWARD acc_t *get_eth_data ARGS(( int fd, size_t offset, size_t count, int for_ioctl )); FORWARD int put_eth_data ARGS(( int fd, size_t offset, acc_t *data, int for_ioctl )); FORWARD void ipeth_main ARGS(( ip_port_t *port )); FORWARD void ipeth_set_ipaddr ARGS(( ip_port_t *port )); FORWARD void ipeth_restart_send ARGS(( ip_port_t *ip_port )); FORWARD int ipeth_send ARGS(( struct ip_port *ip_port, ipaddr_t dest, acc_t *pack, int broadcast )); FORWARD void ipeth_arp_reply ARGS(( int ip_port_nr, ipaddr_t ipaddr, ether_addr_t *dst_ether_ptr )); FORWARD int ipeth_update_ttl ARGS(( time_t enq_time, time_t now, acc_t *eth_pack )); FORWARD void ip_eth_arrived ARGS(( int port, acc_t *pack, size_t pack_size )); PUBLIC int ipeth_init(ip_port) ip_port_t *ip_port; { assert(BUF_S >= sizeof(xmit_hdr_t)); assert(BUF_S >= sizeof(eth_hdr_t)); ip_port->ip_dl.dl_eth.de_fd= eth_open(ip_port-> ip_dl.dl_eth.de_port, ip_port->ip_port, get_eth_data, put_eth_data, ip_eth_arrived); if (ip_port->ip_dl.dl_eth.de_fd < 0) { DBLOCK(1, printf("ip.c: unable to open eth port\n")); return -1; } ip_port->ip_dl.dl_eth.de_state= IES_EMPTY; ip_port->ip_dl.dl_eth.de_flags= IEF_EMPTY; ip_port->ip_dl.dl_eth.de_q_head= NULL; ip_port->ip_dl.dl_eth.de_q_tail= NULL; ip_port->ip_dl.dl_eth.de_arp_head= NULL; ip_port->ip_dl.dl_eth.de_arp_tail= NULL; ip_port->ip_dev_main= ipeth_main; ip_port->ip_dev_set_ipaddr= ipeth_set_ipaddr; ip_port->ip_dev_send= ipeth_send; ip_port->ip_mss= ETH_MAX_PACK_SIZE-ETH_HDR_SIZE; return 0; } PRIVATE void ipeth_main(ip_port) ip_port_t *ip_port; { int result, i; ip_fd_t *ip_fd; switch (ip_port->ip_dl.dl_eth.de_state) { case IES_EMPTY: ip_port->ip_dl.dl_eth.de_state= IES_SETPROTO; result= eth_ioctl(ip_port->ip_dl.dl_eth.de_fd, NWIOSETHOPT); if (result == NW_SUSPEND) ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND; if (result<0) { DBLOCK(1, printf("eth_ioctl(..,%lx)=%d\n", NWIOSETHOPT, result)); return; } if (ip_port->ip_dl.dl_eth.de_state != IES_SETPROTO) return; /* drops through */ case IES_SETPROTO: result= arp_set_cb(ip_port->ip_dl.dl_eth.de_port, ip_port->ip_port, ipeth_arp_reply); if (result != NW_OK) { #if !CRAMPED printf("ipeth_main: arp_set_cb failed: %d\n", result); #endif return; } /* Wait until the interface is configured up. */ ip_port->ip_dl.dl_eth.de_state= IES_GETIPADDR; if (!(ip_port->ip_flags & IPF_IPADDRSET)) { ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND; return; } /* fall through */ case IES_GETIPADDR: ip_port->ip_dl.dl_eth.de_state= IES_MAIN; for (i=0, ip_fd= ip_fd_table; iif_flags & IFF_INUSE)) { continue; } if (ip_fd->if_port != ip_port) { continue; } if (ip_fd->if_flags & IFF_GIPCONF_IP) { ip_ioctl (i, NWIOGIPCONF); } } do_eth_read(ip_port); return; #if !CRAMPED default: ip_panic(( "unknown state: %d", ip_port->ip_dl.dl_eth.de_state)); #endif } } PRIVATE acc_t *get_eth_data (fd, offset, count, for_ioctl) int fd; size_t offset; size_t count; int for_ioctl; { ip_port_t *ip_port; acc_t *data; int result; ip_port= &ip_port_table[fd]; switch (ip_port->ip_dl.dl_eth.de_state) { case IES_SETPROTO: if (!count) { result= (int)offset; if (result<0) { ip_port->ip_dl.dl_eth.de_state= IES_ERROR; break; } if (ip_port->ip_dl.dl_eth.de_flags & IEF_SUSPEND) ipeth_main(ip_port); return NW_OK; } assert ((!offset) && (count == sizeof(struct nwio_ethopt))); { struct nwio_ethopt *ethopt; acc_t *acc; acc= bf_memreq(sizeof(*ethopt)); ethopt= (struct nwio_ethopt *)ptr2acc_data(acc); ethopt->nweo_flags= NWEO_COPY|NWEO_EN_BROAD| NWEO_EN_MULTI|NWEO_TYPESPEC; ethopt->nweo_type= HTONS(ETH_IP_PROTO); return acc; } case IES_MAIN: if (!count) { result= (int)offset; if (result<0) ip_warning(( "error on write: %d\n", result )); bf_afree (ip_port->ip_dl.dl_eth.de_frame); ip_port->ip_dl.dl_eth.de_frame= 0; if (ip_port->ip_dl.dl_eth.de_flags & IEF_WRITE_SP) { ip_port->ip_dl.dl_eth.de_flags &= ~IEF_WRITE_SP; ipeth_restart_send(ip_port); } return NW_OK; } data= bf_cut (ip_port->ip_dl.dl_eth.de_frame, offset, count); assert (data); return data; default: #if !CRAMPED printf( "get_eth_data(%d, 0x%d, 0x%d) called but ip_state=0x%x\n", fd, offset, count, ip_port->ip_dl.dl_eth.de_state); #endif break; } return 0; } PRIVATE int put_eth_data (port, offset, data, for_ioctl) int port; size_t offset; acc_t *data; int for_ioctl; { ip_port_t *ip_port; acc_t *pack; int result; ip_port= &ip_port_table[port]; assert(0); if (ip_port->ip_dl.dl_eth.de_flags & IEF_READ_IP) { if (!data) { result= (int)offset; if (result<0) { DBLOCK(1, printf( "ip.c: put_eth_data(..,%d,..)\n", result)); return NW_OK; } if (ip_port->ip_dl.dl_eth.de_flags & IEF_READ_SP) { ip_port->ip_dl.dl_eth.de_flags &= ~(IEF_READ_IP|IEF_READ_SP); do_eth_read(ip_port); } else ip_port->ip_dl.dl_eth.de_flags &= ~IEF_READ_IP; return NW_OK; } assert (!offset); /* Warning: the above assertion is illegal; puts and gets of data can be brokenup in any piece the server likes. However we assume that the server is eth.c and it transfers only whole packets. */ ip_eth_arrived(port, data, bf_bufsize(data)); return NW_OK; } #if !CRAMPED printf("ip_port->ip_dl.dl_eth.de_state= 0x%x", ip_port->ip_dl.dl_eth.de_state); ip_panic (( "strange status" )); #endif } PRIVATE void ipeth_set_ipaddr(ip_port) ip_port_t *ip_port; { arp_set_ipaddr (ip_port->ip_dl.dl_eth.de_port, ip_port->ip_ipaddr); if (ip_port->ip_dl.dl_eth.de_state == IES_GETIPADDR) ipeth_main(ip_port); } PRIVATE int ipeth_send(ip_port, dest, pack, broadcast) struct ip_port *ip_port; ipaddr_t dest; acc_t *pack; int broadcast; { int r; acc_t *eth_pack, *tail; size_t pack_size; eth_hdr_t *eth_hdr; xmit_hdr_t *xmit_hdr; ipaddr_t hostpart; time_t t; /* Start optimistic: the arp will succeed without blocking and the * ethernet packet can be sent without blocking also. Start with * the allocation of the ethernet header. */ eth_pack= bf_memreq(sizeof(*eth_hdr)); assert(eth_pack->acc_next == NULL); eth_pack->acc_next= pack; pack_size= bf_bufsize(eth_pack); if (pack_sizeeh_dst= broadcast_ethaddr; else { if ((dest & ip_port->ip_subnetmask) != (ip_port->ip_ipaddr & ip_port->ip_subnetmask)) { #if !CRAMPED ip_panic(( "invalid destination" )); #endif } hostpart= (dest & ~ip_port->ip_subnetmask); assert(hostpart != 0); assert(dest != ip_port->ip_ipaddr); r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port, dest, ð_hdr->eh_dst); if (r == NW_SUSPEND) { /* Unfortunately, the arp takes some time, use * the ethernet header to store the next hop * ip address and the current time. */ xmit_hdr= (xmit_hdr_t *)eth_hdr; xmit_hdr->xh_time= get_time(); xmit_hdr->xh_ipaddr= dest; eth_pack->acc_ext_link= NULL; if (ip_port->ip_dl.dl_eth.de_arp_head == NULL) ip_port->ip_dl.dl_eth.de_arp_head= eth_pack; else { ip_port->ip_dl.dl_eth.de_arp_tail-> acc_ext_link= eth_pack; } ip_port->ip_dl.dl_eth.de_arp_tail= eth_pack; return NW_OK; } if (r == EDSTNOTRCH) { bf_afree(eth_pack); return EDSTNOTRCH; } assert(r == NW_OK); } /* If we have no write in progress, we can try to send the ethernet * packet using eth_send. If the IP packet is larger than mss, * unqueue the packet and let ipeth_restart_send deal with it. */ pack_size= bf_bufsize(eth_pack); if (ip_port->ip_dl.dl_eth.de_frame == NULL && pack_size <= ip_port->ip_mss + sizeof(*eth_hdr)) { r= eth_send(ip_port->ip_dl.dl_eth.de_fd, eth_pack, pack_size); if (r == NW_OK) return NW_OK; /* A non-blocking send is not possible, start a regular * send. */ assert(r == NW_WOULDBLOCK); ip_port->ip_dl.dl_eth.de_frame= eth_pack; r= eth_write(ip_port->ip_dl.dl_eth.de_fd, pack_size); if (r == NW_SUSPEND) { assert(!(ip_port->ip_dl.dl_eth.de_flags & IEF_WRITE_SP)); ip_port->ip_dl.dl_eth.de_flags |= IEF_WRITE_SP; } assert(r == NW_OK || r == NW_SUSPEND); return NW_OK; } /* Enqueue the packet, and store the current time, in the * room for the ethernet source address. */ t= get_time(); assert(sizeof(t) <= sizeof(eth_hdr->eh_src)); memcpy(ð_hdr->eh_src, &t, sizeof(t)); eth_pack->acc_ext_link= NULL; if (ip_port->ip_dl.dl_eth.de_q_head == NULL) ip_port->ip_dl.dl_eth.de_q_head= eth_pack; else { ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= eth_pack; } ip_port->ip_dl.dl_eth.de_q_tail= eth_pack; if (ip_port->ip_dl.dl_eth.de_frame == NULL) ipeth_restart_send(ip_port); return NW_OK; } PRIVATE void ipeth_restart_send(ip_port) ip_port_t *ip_port; { time_t now, enq_time; int r; acc_t *eth_pack, *ip_pack, *next_eth_pack, *next_part, *tail; size_t pack_size; eth_hdr_t *eth_hdr, *next_eth_hdr; now= get_time(); while (ip_port->ip_dl.dl_eth.de_q_head != NULL) { eth_pack= ip_port->ip_dl.dl_eth.de_q_head; ip_port->ip_dl.dl_eth.de_q_head= eth_pack->acc_ext_link; eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack); pack_size= bf_bufsize(eth_pack); if (pack_size > ip_port->ip_mss+sizeof(*eth_hdr)) { /* Split the IP packet */ ip_pack= eth_pack->acc_next; next_part= ip_pack; ip_pack= ip_split_pack(ip_port, &next_part, ip_port->ip_mss); if (ip_pack == NULL) { bf_afree(eth_pack); continue; } /* Allocate new ethernet header */ next_eth_pack= bf_memreq(sizeof(*next_eth_hdr)); next_eth_hdr= (eth_hdr_t *)ptr2acc_data(next_eth_pack); *next_eth_hdr= *eth_hdr; next_eth_pack->acc_next= next_part; next_eth_pack->acc_ext_link= NULL; if (ip_port->ip_dl.dl_eth.de_q_head == NULL) ip_port->ip_dl.dl_eth.de_q_head= next_eth_pack; else ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= next_eth_pack; ip_port->ip_dl.dl_eth.de_q_tail= next_eth_pack; eth_pack->acc_next= ip_pack; pack_size= bf_bufsize(eth_pack); } memcpy(&enq_time, ð_hdr->eh_src, sizeof(enq_time)); if (enq_time + HZ < now) { r= ipeth_update_ttl(enq_time, now, eth_pack); if (r == ETIMEDOUT) { ip_warning(( "should send ICMP ttl exceded" )); bf_afree(eth_pack); continue; } assert(r == NW_OK); } if (pack_sizeip_dl.dl_eth.de_frame == NULL); r= eth_send(ip_port->ip_dl.dl_eth.de_fd, eth_pack, pack_size); if (r == NW_OK) continue; /* A non-blocking send is not possible, start a regular * send. */ assert(r == NW_WOULDBLOCK); ip_port->ip_dl.dl_eth.de_frame= eth_pack; r= eth_write(ip_port->ip_dl.dl_eth.de_fd, pack_size); if (r == NW_SUSPEND) { assert(!(ip_port->ip_dl.dl_eth.de_flags & IEF_WRITE_SP)); ip_port->ip_dl.dl_eth.de_flags |= IEF_WRITE_SP; return; } assert(r == NW_OK); } } PRIVATE void ipeth_arp_reply(ip_port_nr, ipaddr, eth_addr) int ip_port_nr; ipaddr_t ipaddr; ether_addr_t *eth_addr; { acc_t *prev, *eth_pack; int r; xmit_hdr_t *xmit_hdr; ip_port_t *ip_port; time_t t; eth_hdr_t *eth_hdr; ether_addr_t tmp_eth_addr; assert (ip_port_nr >= 0 && ip_port_nr < ip_conf_nr); ip_port= &ip_port_table[ip_port_nr]; for (;;) { for (prev= 0, eth_pack= ip_port->ip_dl.dl_eth.de_arp_head; eth_pack; prev= eth_pack, eth_pack= eth_pack->acc_ext_link) { xmit_hdr= (xmit_hdr_t *)ptr2acc_data(eth_pack); if (xmit_hdr->xh_ipaddr == ipaddr) break; } if (eth_pack == NULL) { /* No packet found. */ break; } /* Delete packet from the queue. */ if (prev == NULL) { ip_port->ip_dl.dl_eth.de_arp_head= eth_pack->acc_ext_link; } else { prev->acc_ext_link= eth_pack->acc_ext_link; if (prev->acc_ext_link == NULL) ip_port->ip_dl.dl_eth.de_arp_tail= prev; } if (eth_addr == NULL) { /* Destination is unreachable, delete packet. */ bf_afree(eth_pack); continue; } /* Fill in the ethernet address and put the packet on the * transmit queue. */ t= xmit_hdr->xh_time; eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack); eth_hdr->eh_dst= *eth_addr; memcpy(ð_hdr->eh_src, &t, sizeof(t)); eth_pack->acc_ext_link= NULL; if (ip_port->ip_dl.dl_eth.de_q_head == NULL) ip_port->ip_dl.dl_eth.de_q_head= eth_pack; else { ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= eth_pack; } ip_port->ip_dl.dl_eth.de_q_tail= eth_pack; } /* Try to get some more ARPs in progress. */ while (ip_port->ip_dl.dl_eth.de_arp_head) { eth_pack= ip_port->ip_dl.dl_eth.de_arp_head; xmit_hdr= (xmit_hdr_t *)ptr2acc_data(eth_pack); r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port, xmit_hdr->xh_ipaddr, &tmp_eth_addr); if (r == NW_SUSPEND) break; /* Normal case */ /* Dequeue the packet */ ip_port->ip_dl.dl_eth.de_arp_head= eth_pack->acc_ext_link; if (r == EDSTNOTRCH) { bf_afree(eth_pack); continue; } assert(r == NW_OK); /* Fill in the ethernet address and put the packet on the * transmit queue. */ t= xmit_hdr->xh_time; eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack); eth_hdr->eh_dst= tmp_eth_addr; memcpy(ð_hdr->eh_src, &t, sizeof(t)); eth_pack->acc_ext_link= NULL; if (ip_port->ip_dl.dl_eth.de_q_head == NULL) ip_port->ip_dl.dl_eth.de_q_head= eth_pack; else { ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= eth_pack; } ip_port->ip_dl.dl_eth.de_q_tail= eth_pack; } /* Restart sending ethernet packets. */ if (ip_port->ip_dl.dl_eth.de_frame == NULL) ipeth_restart_send(ip_port); } PRIVATE int ipeth_update_ttl(enq_time, now, eth_pack) time_t enq_time; time_t now; acc_t *eth_pack; { int ttl_diff; ip_hdr_t *ip_hdr; u32_t sum; u16_t word; acc_t *ip_pack; ttl_diff= (now-enq_time)/HZ; enq_time += ttl_diff*HZ; assert(enq_time <= now && enq_time + HZ > now); ip_pack= eth_pack->acc_next; assert(ip_pack->acc_length >= sizeof(*ip_hdr)); assert(ip_pack->acc_linkC == 1 && ip_pack->acc_buffer->buf_linkC == 1); ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_pack); if (ip_hdr->ih_ttl <= ttl_diff) return ETIMEDOUT; sum= (u16_t)~ip_hdr->ih_hdr_chk; word= *(u16_t *)&ip_hdr->ih_ttl; if (word > sum) sum += 0xffff - word; else sum -= word; ip_hdr->ih_ttl -= ttl_diff; word= *(u16_t *)&ip_hdr->ih_ttl; sum += word; if (sum > 0xffff) sum -= 0xffff; assert(!(sum & 0xffff0000)); ip_hdr->ih_hdr_chk= ~sum; assert(ip_hdr->ih_ttl > 0); return NW_OK; } PRIVATE void do_eth_read(ip_port) ip_port_t *ip_port; { int result; assert(!(ip_port->ip_dl.dl_eth.de_flags & IEF_READ_IP)); for (;;) { ip_port->ip_dl.dl_eth.de_flags |= IEF_READ_IP; result= eth_read (ip_port->ip_dl.dl_eth.de_fd, ETH_MAX_PACK_SIZE); if (result == NW_SUSPEND) { assert(!(ip_port->ip_dl.dl_eth.de_flags & IEF_READ_SP)); ip_port->ip_dl.dl_eth.de_flags |= IEF_READ_SP; return; } ip_port->ip_dl.dl_eth.de_flags &= ~IEF_READ_IP; if (result<0) { return; } } } PRIVATE void ip_eth_arrived(port, pack, pack_size) int port; acc_t *pack; size_t pack_size; { int broadcast; ip_port_t *ip_port; ip_port= &ip_port_table[port]; broadcast= (*(u8_t *)ptr2acc_data(pack) & 1); pack= bf_delhead(pack, ETH_HDR_SIZE); if (broadcast) ip_arrived_broadcast(ip_port, pack); else ip_arrived(ip_port, pack); } /* * $PchId: ip_eth.c,v 1.9 1996/12/17 07:55:21 philip Exp $ */