diff -urN oldtree/Makefile newtree/Makefile --- oldtree/Makefile 2006-09-29 14:03:18.000000000 -0400 +++ newtree/Makefile 2006-09-30 05:21:41.000000000 -0400 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 18 -EXTRAVERSION = -git12 +EXTRAVERSION = -git13 NAME=Avast! A bilge rat! # *DOCUMENTATION* diff -urN oldtree/arch/sparc/lib/copy_user.S newtree/arch/sparc/lib/copy_user.S --- oldtree/arch/sparc/lib/copy_user.S 2006-09-29 13:50:42.000000000 -0400 +++ newtree/arch/sparc/lib/copy_user.S 2006-09-30 05:21:41.000000000 -0400 @@ -14,6 +14,7 @@ #include #include #include +#include /* Work around cpp -rob */ #define ALLOC #alloc @@ -366,6 +367,9 @@ blu 1f cmp %o1, %g1 bgeu 1f + ld [%g6 + TI_PREEMPT], %g1 + cmp %g1, 0 + bne 1f nop save %sp, -64, %sp mov %i0, %o0 diff -urN oldtree/arch/um/drivers/net_kern.c newtree/arch/um/drivers/net_kern.c --- oldtree/arch/um/drivers/net_kern.c 2006-09-29 14:03:19.000000000 -0400 +++ newtree/arch/um/drivers/net_kern.c 2006-09-30 05:21:41.000000000 -0400 @@ -310,7 +310,7 @@ return; random: - random_ether_addr(addr) + random_ether_addr(addr); } static DEFINE_SPINLOCK(devices_lock); diff -urN oldtree/drivers/infiniband/hw/amso1100/c2_ae.c newtree/drivers/infiniband/hw/amso1100/c2_ae.c --- oldtree/drivers/infiniband/hw/amso1100/c2_ae.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/amso1100/c2_ae.c 2006-09-30 05:21:41.000000000 -0400 @@ -197,7 +197,7 @@ "resource=%x, qp_state=%s\n", __FUNCTION__, to_event_str(event_id), - be64_to_cpu(wr->ae.ae_generic.user_context), + (unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context), be32_to_cpu(wr->ae.ae_generic.resource_type), be32_to_cpu(wr->ae.ae_generic.resource), to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state))); diff -urN oldtree/drivers/infiniband/hw/amso1100/c2_alloc.c newtree/drivers/infiniband/hw/amso1100/c2_alloc.c --- oldtree/drivers/infiniband/hw/amso1100/c2_alloc.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/amso1100/c2_alloc.c 2006-09-30 05:21:41.000000000 -0400 @@ -115,7 +115,7 @@ ((unsigned long) &(head->shared_ptr[mqsp]) - (unsigned long) head); pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__, - &(head->shared_ptr[mqsp]), (u64)*dma_addr); + &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr); return &(head->shared_ptr[mqsp]); } return NULL; diff -urN oldtree/drivers/infiniband/hw/amso1100/c2_provider.c newtree/drivers/infiniband/hw/amso1100/c2_provider.c --- oldtree/drivers/infiniband/hw/amso1100/c2_provider.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/amso1100/c2_provider.c 2006-09-30 05:21:44.000000000 -0400 @@ -390,14 +390,18 @@ } mr = kmalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) + if (!mr) { + vfree(page_list); return ERR_PTR(-ENOMEM); + } mr->pd = to_c2pd(ib_pd); pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " "*iova_start %llx, first pa %llx, last pa %llx\n", __FUNCTION__, page_shift, pbl_depth, total_len, - *iova_start, page_list[0], page_list[pbl_depth-1]); + (unsigned long long) *iova_start, + (unsigned long long) page_list[0], + (unsigned long long) page_list[pbl_depth-1]); err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, (1 << page_shift), pbl_depth, total_len, 0, iova_start, diff -urN oldtree/drivers/infiniband/hw/amso1100/c2_rnic.c newtree/drivers/infiniband/hw/amso1100/c2_rnic.c --- oldtree/drivers/infiniband/hw/amso1100/c2_rnic.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/amso1100/c2_rnic.c 2006-09-30 05:21:44.000000000 -0400 @@ -527,7 +527,7 @@ DMA_FROM_DEVICE); pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages, - (u64)c2dev->rep_vq.host_dma); + (unsigned long long) c2dev->rep_vq.host_dma); c2_mq_rep_init(&c2dev->rep_vq, 1, qsize, @@ -550,7 +550,7 @@ DMA_FROM_DEVICE); pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages, - (u64)c2dev->rep_vq.host_dma); + (unsigned long long) c2dev->rep_vq.host_dma); c2_mq_rep_init(&c2dev->aeq, 2, qsize, diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_common.h newtree/drivers/infiniband/hw/ipath/ipath_common.h --- oldtree/drivers/infiniband/hw/ipath/ipath_common.h 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_common.h 2006-09-30 05:21:44.000000000 -0400 @@ -141,8 +141,9 @@ * packets if ipath not configured, etc.) */ __u64 sps_krdrops; + __u64 sps_txeparity; /* PIO buffer parity error, recovered */ /* pad for future growth */ - __u64 __sps_pad[46]; + __u64 __sps_pad[45]; }; /* @@ -185,6 +186,9 @@ #define IPATH_RUNTIME_PCIE 0x2 #define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 #define IPATH_RUNTIME_RCVHDR_COPY 0x8 +#define IPATH_RUNTIME_MASTER 0x10 +#define IPATH_RUNTIME_PBC_REWRITE 0x20 +#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40 /* * This structure is returned by ipath_userinit() immediately after @@ -202,7 +206,8 @@ /* version of software, for feature checking. */ __u32 spi_sw_version; /* InfiniPath port assigned, goes into sent packets */ - __u32 spi_port; + __u16 spi_port; + __u16 spi_subport; /* * IB MTU, packets IB data must be less than this. * The MTU is in bytes, and will be a multiple of 4 bytes. @@ -218,7 +223,7 @@ __u32 spi_tidcnt; /* size of the TID Eager list in infinipath, in entries */ __u32 spi_tidegrcnt; - /* size of a single receive header queue entry. */ + /* size of a single receive header queue entry in words. */ __u32 spi_rcvhdrent_size; /* * Count of receive header queue entries allocated. @@ -310,6 +315,12 @@ __u32 spi_filler_for_align; /* address of readonly memory copy of the rcvhdrq tail register. */ __u64 spi_rcvhdr_tailaddr; + + /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ + __u64 spi_subport_uregbase; + __u64 spi_subport_rcvegrbuf; + __u64 spi_subport_rcvhdr_base; + } __attribute__ ((aligned(8))); @@ -328,12 +339,12 @@ /* * Minor version differences are always compatible - * a within a major version, however if if user software is larger + * a within a major version, however if user software is larger * than driver software, some new features and/or structure fields * may not be implemented; the user code must deal with this if it - * cares, or it must abort after initialization reports the difference + * cares, or it must abort after initialization reports the difference. */ -#define IPATH_USER_SWMINOR 2 +#define IPATH_USER_SWMINOR 3 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) @@ -379,7 +390,16 @@ */ __u32 spu_rcvhdrsize; - __u64 spu_unused; /* kept for compatible layout */ + /* + * If two or more processes wish to share a port, each process + * must set the spu_subport_cnt and spu_subport_id to the same + * values. The only restriction on the spu_subport_id is that + * it be unique for a given node. + */ + __u16 spu_subport_cnt; + __u16 spu_subport_id; + + __u32 spu_unused; /* kept for compatible layout */ /* * address of struct base_info to write to @@ -392,19 +412,25 @@ #define IPATH_CMD_MIN 16 -#define IPATH_CMD_USER_INIT 16 /* set up userspace */ +#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */ #define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */ #define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */ #define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ #define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ #define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ +#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ +#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ +#define IPATH_CMD_USER_INIT 24 /* set up userspace */ -#define IPATH_CMD_MAX 21 +#define IPATH_CMD_MAX 24 struct ipath_port_info { __u32 num_active; /* number of active units */ __u32 unit; /* unit (chip) assigned to caller */ - __u32 port; /* port on unit assigned to caller */ + __u16 port; /* port on unit assigned to caller */ + __u16 subport; /* subport on unit assigned to caller */ + __u16 num_ports; /* number of ports available on unit */ + __u16 num_subports; /* number of subport slaves opened on port */ }; struct ipath_tid_info { @@ -435,6 +461,8 @@ __u32 recv_ctrl; /* partition key to set */ __u16 part_key; + /* user address of __u32 bitmask of active slaves */ + __u64 slave_mask_addr; } cmd; }; @@ -596,6 +624,10 @@ /* K_PktFlags bits */ #define INFINIPATH_KPF_INTR 0x1 +#define INFINIPATH_KPF_SUBPORT_MASK 0x3 +#define INFINIPATH_KPF_SUBPORT_SHIFT 1 + +#define INFINIPATH_MAX_SUBPORT 4 /* SendPIO per-buffer control */ #define INFINIPATH_SP_TEST 0x40 @@ -610,7 +642,7 @@ /* * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, - * Port 3, TID 11, offset 14. + * Port 4, TID 11, offset 13. */ __le32 ver_port_tid_offset; __le16 chksum; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_cq.c newtree/drivers/infiniband/hw/ipath/ipath_cq.c --- oldtree/drivers/infiniband/hw/ipath/ipath_cq.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_cq.c 2006-09-30 05:21:44.000000000 -0400 @@ -46,7 +46,7 @@ */ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) { - struct ipath_cq_wc *wc = cq->queue; + struct ipath_cq_wc *wc; unsigned long flags; u32 head; u32 next; @@ -57,6 +57,7 @@ * Note that the head pointer might be writable by user processes. * Take care to verify it is a sane value. */ + wc = cq->queue; head = wc->head; if (head >= (unsigned) cq->ibcq.cqe) { head = cq->ibcq.cqe; @@ -109,21 +110,27 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *wc = cq->queue; + struct ipath_cq_wc *wc; unsigned long flags; int npolled; + u32 tail; spin_lock_irqsave(&cq->lock, flags); + wc = cq->queue; + tail = wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (wc->tail == wc->head) + if (tail == wc->head) break; - *entry = wc->queue[wc->tail]; - if (wc->tail >= cq->ibcq.cqe) - wc->tail = 0; + *entry = wc->queue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; else - wc->tail++; + tail++; } + wc->tail = tail; spin_unlock_irqrestore(&cq->lock, flags); @@ -177,11 +184,6 @@ goto done; } - if (dev->n_cqs_allocated == ib_ipath_max_cqs) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - /* Allocate the completion queue structure. */ cq = kmalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { @@ -237,6 +239,16 @@ } else cq->ip = NULL; + spin_lock(&dev->n_cqs_lock); + if (dev->n_cqs_allocated == ib_ipath_max_cqs) { + spin_unlock(&dev->n_cqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + + dev->n_cqs_allocated++; + spin_unlock(&dev->n_cqs_lock); + /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return @@ -253,7 +265,6 @@ ret = &cq->ibcq; - dev->n_cqs_allocated++; goto done; bail_wc: @@ -280,7 +291,9 @@ struct ipath_cq *cq = to_icq(ibcq); tasklet_kill(&cq->comptask); + spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; + spin_unlock(&dev->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, ipath_release_mmap_info); else @@ -316,10 +329,16 @@ return 0; } +/** + * ipath_resize_cq - change the size of the CQ + * @ibcq: the completion queue + * + * Returns 0 for success. + */ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *old_wc = cq->queue; + struct ipath_cq_wc *old_wc; struct ipath_cq_wc *wc; u32 head, tail, n; int ret; @@ -355,6 +374,7 @@ * Make sure head and tail are sane since they * might be user writable. */ + old_wc = cq->queue; head = old_wc->head; if (head > (u32) cq->ibcq.cqe) head = (u32) cq->ibcq.cqe; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_driver.c newtree/drivers/infiniband/hw/ipath/ipath_driver.c --- oldtree/drivers/infiniband/hw/ipath/ipath_driver.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_driver.c 2006-09-30 05:21:44.000000000 -0400 @@ -95,16 +95,6 @@ "RecovIdle", }; -/* - * These variables are initialized in the chip-specific files - * but are defined here. - */ -u16 ipath_gpio_sda_num, ipath_gpio_scl_num; -u64 ipath_gpio_sda, ipath_gpio_scl; -u64 infinipath_i_bitsextant; -ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; -u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; - static void __devexit ipath_remove_one(struct pci_dev *); static int __devinit ipath_init_one(struct pci_dev *, const struct pci_device_id *); @@ -527,28 +517,146 @@ return ret; } +static void __devexit cleanup_device(struct ipath_devdata *dd) +{ + int port; + + ipath_shutdown_device(dd); + + if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { + /* can't do anything more with chip; needs re-init */ + *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; + if (dd->ipath_kregbase) { + /* + * if we haven't already cleaned up before these are + * to ensure any register reads/writes "fail" until + * re-init + */ + dd->ipath_kregbase = NULL; + dd->ipath_uregbase = 0; + dd->ipath_sregbase = 0; + dd->ipath_cregbase = 0; + dd->ipath_kregsize = 0; + } + ipath_disable_wc(dd); + } + + if (dd->ipath_pioavailregs_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *) dd->ipath_pioavailregs_dma, + dd->ipath_pioavailregs_phys); + dd->ipath_pioavailregs_dma = NULL; + } + if (dd->ipath_dummy_hdrq) { + dma_free_coherent(&dd->pcidev->dev, + dd->ipath_pd[0]->port_rcvhdrq_size, + dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); + dd->ipath_dummy_hdrq = NULL; + } + + if (dd->ipath_pageshadow) { + struct page **tmpp = dd->ipath_pageshadow; + dma_addr_t *tmpd = dd->ipath_physshadow; + int i, cnt = 0; + + ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " + "locked\n"); + for (port = 0; port < dd->ipath_cfgports; port++) { + int port_tidbase = port * dd->ipath_rcvtidcnt; + int maxtid = port_tidbase + dd->ipath_rcvtidcnt; + for (i = port_tidbase; i < maxtid; i++) { + if (!tmpp[i]) + continue; + pci_unmap_page(dd->pcidev, tmpd[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + ipath_release_user_pages(&tmpp[i], 1); + tmpp[i] = NULL; + cnt++; + } + } + if (cnt) { + ipath_stats.sps_pageunlocks += cnt; + ipath_cdbg(VERBOSE, "There were still %u expTID " + "entries locked\n", cnt); + } + if (ipath_stats.sps_pagelocks || + ipath_stats.sps_pageunlocks) + ipath_cdbg(VERBOSE, "%llu pages locked, %llu " + "unlocked via ipath_m{un}lock\n", + (unsigned long long) + ipath_stats.sps_pagelocks, + (unsigned long long) + ipath_stats.sps_pageunlocks); + + ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", + dd->ipath_pageshadow); + vfree(dd->ipath_pageshadow); + dd->ipath_pageshadow = NULL; + } + + /* + * free any resources still in use (usually just kernel ports) + * at unload; we do for portcnt, not cfgports, because cfgports + * could have changed while we were loaded. + */ + for (port = 0; port < dd->ipath_portcnt; port++) { + struct ipath_portdata *pd = dd->ipath_pd[port]; + dd->ipath_pd[port] = NULL; + ipath_free_pddata(dd, pd); + } + kfree(dd->ipath_pd); + /* + * debuggability, in case some cleanup path tries to use it + * after this + */ + dd->ipath_pd = NULL; +} + static void __devexit ipath_remove_one(struct pci_dev *pdev) { - struct ipath_devdata *dd; + struct ipath_devdata *dd = pci_get_drvdata(pdev); - ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev); - if (!pdev) - return; + ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); + + if (dd->verbs_dev) + ipath_unregister_ib_device(dd->verbs_dev); - dd = pci_get_drvdata(pdev); - ipath_unregister_ib_device(dd->verbs_dev); ipath_diag_remove(dd); ipath_user_remove(dd); ipathfs_remove_device(dd); ipath_device_remove_group(&pdev->dev, dd); + ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " "unit %u\n", dd, (u32) dd->ipath_unit); - if (dd->ipath_kregbase) { - ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", - dd->ipath_kregbase); - iounmap((volatile void __iomem *) dd->ipath_kregbase); - dd->ipath_kregbase = NULL; - } + + cleanup_device(dd); + + /* + * turn off rcv, send, and interrupts for all ports, all drivers + * should also hard reset the chip here? + * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs + * for all versions of the driver, if they were allocated + */ + if (pdev->irq) { + ipath_cdbg(VERBOSE, + "unit %u free_irq of irq %x\n", + dd->ipath_unit, pdev->irq); + free_irq(pdev->irq, dd); + } else + ipath_dbg("irq is 0, not doing free_irq " + "for unit %u\n", dd->ipath_unit); + /* + * we check for NULL here, because it's outside + * the kregbase check, and we need to call it + * after the free_irq. Thus it's possible that + * the function pointers were never initialized. + */ + if (dd->ipath_f_cleanup) + /* clean up chip-specific stuff */ + dd->ipath_f_cleanup(dd); + + ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); + iounmap((volatile void __iomem *) dd->ipath_kregbase); pci_release_regions(pdev); ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); pci_disable_device(pdev); @@ -760,8 +868,8 @@ static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum, int err) { - return dd->ipath_port0_skbs ? - (void *)dd->ipath_port0_skbs[bufnum]->data : NULL; + return dd->ipath_port0_skbinfo ? + (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; } /** @@ -783,31 +891,34 @@ */ /* - * We need 4 extra bytes for unaligned transfer copying + * We need 2 extra bytes for ipath_ether data sent in the + * key header. In order to keep everything dword aligned, + * we'll reserve 4 bytes. */ + len = dd->ipath_ibmaxlen + 4; + if (dd->ipath_flags & IPATH_4BYTE_TID) { - /* we need a 4KB multiple alignment, and there is no way + /* We need a 2KB multiple alignment, and there is no way * to do it except to allocate extra and then skb_reserve * enough to bring it up to the right alignment. */ - len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1; + len += 2047; } - else - len = dd->ipath_ibmaxlen + 4; + skb = __dev_alloc_skb(len, gfp_mask); if (!skb) { ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", len); goto bail; } + + skb_reserve(skb, 4); + if (dd->ipath_flags & IPATH_4BYTE_TID) { - u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4); + u32 una = (unsigned long)skb->data & 2047; if (una) - skb_reserve(skb, 4 + (1 << 11) - una); - else - skb_reserve(skb, 4); - } else - skb_reserve(skb, 4); + skb_reserve(skb, 2048 - una); + } bail: return skb; @@ -1326,6 +1437,9 @@ "for port %u rcvhdrqtailaddr failed\n", pd->port_port); ret = -ENOMEM; + dma_free_coherent(&dd->pcidev->dev, amt, + pd->port_rcvhdrq, pd->port_rcvhdrq_phys); + pd->port_rcvhdrq = NULL; goto bail; } pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1347,12 +1461,13 @@ ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " "hdrtailaddr@%p %llx physical\n", pd->port_port, pd->port_rcvhdrq, - pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr, - (unsigned long long)pd->port_rcvhdrqtailaddr_phys); + (unsigned long long) pd->port_rcvhdrq_phys, + pd->port_rcvhdrtail_kvaddr, (unsigned long long) + pd->port_rcvhdrqtailaddr_phys); /* clear for security and sanity on each use */ memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); - memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); + memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); /* * tell chip each time we init it, even if we are re-using previous @@ -1805,7 +1920,7 @@ pd->port_rcvhdrq = NULL; if (pd->port_rcvhdrtail_kvaddr) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *)pd->port_rcvhdrtail_kvaddr, + pd->port_rcvhdrtail_kvaddr, pd->port_rcvhdrqtailaddr_phys); pd->port_rcvhdrtail_kvaddr = NULL; } @@ -1824,24 +1939,32 @@ dma_free_coherent(&dd->pcidev->dev, size, base, pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; pd->port_rcvegrbuf_chunks = 0; - } else if (pd->port_port == 0 && dd->ipath_port0_skbs) { + } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { unsigned e; - struct sk_buff **skbs = dd->ipath_port0_skbs; + struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; - dd->ipath_port0_skbs = NULL; - ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs " - "@ %p\n", pd->port_port, skbs); + dd->ipath_port0_skbinfo = NULL; + ipath_cdbg(VERBOSE, "free closed port %d " + "ipath_port0_skbinfo @ %p\n", pd->port_port, + skbinfo); for (e = 0; e < dd->ipath_rcvegrcnt; e++) - if (skbs[e]) - dev_kfree_skb(skbs[e]); - vfree(skbs); + if (skbinfo[e].skb) { + pci_unmap_single(dd->pcidev, skbinfo[e].phys, + dd->ipath_ibmaxlen, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(skbinfo[e].skb); + } + vfree(skbinfo); } kfree(pd->port_tid_pg_list); + vfree(pd->subport_uregbase); + vfree(pd->subport_rcvegrbuf); + vfree(pd->subport_rcvhdr_base); kfree(pd); } @@ -1907,150 +2030,12 @@ return ret; } -static void cleanup_device(struct ipath_devdata *dd) -{ - int port; - - ipath_shutdown_device(dd); - - if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { - /* can't do anything more with chip; needs re-init */ - *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; - if (dd->ipath_kregbase) { - /* - * if we haven't already cleaned up before these are - * to ensure any register reads/writes "fail" until - * re-init - */ - dd->ipath_kregbase = NULL; - dd->ipath_uregbase = 0; - dd->ipath_sregbase = 0; - dd->ipath_cregbase = 0; - dd->ipath_kregsize = 0; - } - ipath_disable_wc(dd); - } - - if (dd->ipath_pioavailregs_dma) { - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *) dd->ipath_pioavailregs_dma, - dd->ipath_pioavailregs_phys); - dd->ipath_pioavailregs_dma = NULL; - } - if (dd->ipath_dummy_hdrq) { - dma_free_coherent(&dd->pcidev->dev, - dd->ipath_pd[0]->port_rcvhdrq_size, - dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); - dd->ipath_dummy_hdrq = NULL; - } - - if (dd->ipath_pageshadow) { - struct page **tmpp = dd->ipath_pageshadow; - int i, cnt = 0; - - ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " - "locked\n"); - for (port = 0; port < dd->ipath_cfgports; port++) { - int port_tidbase = port * dd->ipath_rcvtidcnt; - int maxtid = port_tidbase + dd->ipath_rcvtidcnt; - for (i = port_tidbase; i < maxtid; i++) { - if (!tmpp[i]) - continue; - ipath_release_user_pages(&tmpp[i], 1); - tmpp[i] = NULL; - cnt++; - } - } - if (cnt) { - ipath_stats.sps_pageunlocks += cnt; - ipath_cdbg(VERBOSE, "There were still %u expTID " - "entries locked\n", cnt); - } - if (ipath_stats.sps_pagelocks || - ipath_stats.sps_pageunlocks) - ipath_cdbg(VERBOSE, "%llu pages locked, %llu " - "unlocked via ipath_m{un}lock\n", - (unsigned long long) - ipath_stats.sps_pagelocks, - (unsigned long long) - ipath_stats.sps_pageunlocks); - - ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", - dd->ipath_pageshadow); - vfree(dd->ipath_pageshadow); - dd->ipath_pageshadow = NULL; - } - - /* - * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, not cfgports, because cfgports - * could have changed while we were loaded. - */ - for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = dd->ipath_pd[port]; - dd->ipath_pd[port] = NULL; - ipath_free_pddata(dd, pd); - } - kfree(dd->ipath_pd); - /* - * debuggability, in case some cleanup path tries to use it - * after this - */ - dd->ipath_pd = NULL; -} - static void __exit infinipath_cleanup(void) { - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - ipath_diagpkt_remove(); - ipath_exit_ipathfs(); ipath_driver_remove_group(&ipath_driver.driver); - spin_lock_irqsave(&ipath_devs_lock, flags); - - /* - * turn off rcv, send, and interrupts for all ports, all drivers - * should also hard reset the chip here? - * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs - * for all versions of the driver, if they were allocated - */ - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - if (dd->ipath_kregbase) - cleanup_device(dd); - - if (dd->pcidev) { - if (dd->pcidev->irq) { - ipath_cdbg(VERBOSE, - "unit %u free_irq of irq %x\n", - dd->ipath_unit, dd->pcidev->irq); - free_irq(dd->pcidev->irq, dd); - } else - ipath_dbg("irq is 0, not doing free_irq " - "for unit %u\n", dd->ipath_unit); - - /* - * we check for NULL here, because it's outside - * the kregbase check, and we need to call it - * after the free_irq. Thus it's possible that - * the function pointers were never initialized. - */ - if (dd->ipath_f_cleanup) - /* clean up chip-specific stuff */ - dd->ipath_f_cleanup(dd); - - dd->pcidev = NULL; - } - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); pci_unregister_driver(&ipath_driver); diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_eeprom.c newtree/drivers/infiniband/hw/ipath/ipath_eeprom.c --- oldtree/drivers/infiniband/hw/ipath/ipath_eeprom.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_eeprom.c 2006-09-30 05:21:44.000000000 -0400 @@ -100,9 +100,9 @@ gpioval = &dd->ipath_gpio_out; read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); if (line == i2c_line_scl) - mask = ipath_gpio_scl; + mask = dd->ipath_gpio_scl; else - mask = ipath_gpio_sda; + mask = dd->ipath_gpio_sda; if (new_line_state == i2c_line_high) /* tri-state the output rather than force high */ @@ -119,12 +119,12 @@ write_val = 0x0UL; if (line == i2c_line_scl) { - write_val <<= ipath_gpio_scl_num; - *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num); + write_val <<= dd->ipath_gpio_scl_num; + *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num); *gpioval |= write_val; } else { - write_val <<= ipath_gpio_sda_num; - *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num); + write_val <<= dd->ipath_gpio_sda_num; + *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num); *gpioval |= write_val; } ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); @@ -157,9 +157,9 @@ read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); /* config line to be an input */ if (line == i2c_line_scl) - mask = ipath_gpio_scl; + mask = dd->ipath_gpio_scl; else - mask = ipath_gpio_sda; + mask = dd->ipath_gpio_sda; write_val = read_val & ~mask; ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); @@ -187,6 +187,7 @@ static void i2c_wait_for_writes(struct ipath_devdata *dd) { (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + rmb(); } static void scl_out(struct ipath_devdata *dd, u8 bit) diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_file_ops.c newtree/drivers/infiniband/hw/ipath/ipath_file_ops.c --- oldtree/drivers/infiniband/hw/ipath/ipath_file_ops.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_file_ops.c 2006-09-30 05:21:44.000000000 -0400 @@ -41,6 +41,12 @@ #include "ipath_kernel.h" #include "ipath_common.h" +/* + * mmap64 doesn't allow all 64 bits for 32-bit applications + * so only use the low 43 bits. + */ +#define MMAP64_MASK 0x7FFFFFFFFFFUL + static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, @@ -57,18 +63,35 @@ .mmap = ipath_mmap }; -static int ipath_get_base_info(struct ipath_portdata *pd, +static int ipath_get_base_info(struct file *fp, void __user *ubase, size_t ubase_size) { + struct ipath_portdata *pd = port_fp(fp); int ret = 0; struct ipath_base_info *kinfo = NULL; struct ipath_devdata *dd = pd->port_dd; + unsigned subport_cnt; + int shared, master; + size_t sz; + + subport_cnt = pd->port_subport_cnt; + if (!subport_cnt) { + shared = 0; + master = 0; + subport_cnt = 1; + } else { + shared = 1; + master = !subport_fp(fp); + } - if (ubase_size < sizeof(*kinfo)) { + sz = sizeof(*kinfo); + /* If port sharing is not requested, allow the old size structure */ + if (!shared) + sz -= 3 * sizeof(u64); + if (ubase_size < sz) { ipath_cdbg(PROC, - "Base size %lu, need %lu (version mismatch?)\n", - (unsigned long) ubase_size, - (unsigned long) sizeof(*kinfo)); + "Base size %zu, need %zu (version mismatch?)\n", + ubase_size, sz); ret = -EINVAL; goto bail; } @@ -95,7 +118,9 @@ kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / pd->port_rcvegrbuf_chunks; - kinfo->spi_tidcnt = dd->ipath_rcvtidcnt; + kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; + if (master) + kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; /* * for this use, may be ipath_cfgports summed over all chips that * are are configured and present @@ -118,31 +143,75 @@ * page_address() macro worked, but in 2.6.11, even that returns the * full 64 bit address (upper bits all 1's). So far, using the * physical addresses (or chip offsets, for chip mapping) works, but - * no doubt some future kernel release will chang that, and we'll be - * on to yet another method of dealing with this + * no doubt some future kernel release will change that, and we'll be + * on to yet another method of dealing with this. */ kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; - kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + (void *) dd->ipath_statusp - (void *) dd->ipath_pioavailregs_dma; - kinfo->spi_piobufbase = (u64) pd->port_piobufs; - kinfo->__spi_uregbase = - dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + if (!shared) { + kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_piobufbase = (u64) pd->port_piobufs; + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else if (master) { + kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + + (dd->ipath_pbufsport % subport_cnt); + /* Master's PIO buffers are after all the slave's */ + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * + (dd->ipath_pbufsport - kinfo->spi_piocnt); + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else { + unsigned slave = subport_fp(fp) - 1; + + kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * kinfo->spi_piocnt * slave; + kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + + PAGE_SIZE * slave) & MMAP64_MASK; + + kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * slave) & MMAP64_MASK; + kinfo->spi_rcvhdr_tailaddr = + (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; + kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + + dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & + MMAP64_MASK; + } - kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1); - kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / + dd->ipath_palign; kinfo->spi_pioalign = dd->ipath_palign; kinfo->spi_qpair = IPATH_KD_QP; kinfo->spi_piosize = dd->ipath_ibmaxlen; kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ kinfo->spi_port = pd->port_port; + kinfo->spi_subport = subport_fp(fp); kinfo->spi_sw_version = IPATH_KERN_SWVERSION; kinfo->spi_hw_version = dd->ipath_revision; + if (master) { + kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; + kinfo->spi_subport_uregbase = + (u64) pd->subport_uregbase & MMAP64_MASK; + kinfo->spi_subport_rcvegrbuf = + (u64) pd->subport_rcvegrbuf & MMAP64_MASK; + kinfo->spi_subport_rcvhdr_base = + (u64) pd->subport_rcvhdr_base & MMAP64_MASK; + ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", + kinfo->spi_port, kinfo->spi_runtime_flags, + (unsigned long long) kinfo->spi_subport_uregbase, + (unsigned long long) kinfo->spi_subport_rcvegrbuf, + (unsigned long long) kinfo->spi_subport_rcvhdr_base); + } + if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) ret = -EFAULT; @@ -154,6 +223,7 @@ /** * ipath_tid_update - update a port TID * @pd: the port + * @fp: the ipath device file * @ti: the TID information * * The new implementation as of Oct 2004 is that the driver assigns @@ -176,11 +246,11 @@ * virtually contiguous pages, that should change to improve * performance. */ -static int ipath_tid_update(struct ipath_portdata *pd, +static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, const struct ipath_tid_info *ti) { int ret = 0, ntids; - u32 tid, porttid, cnt, i, tidcnt; + u32 tid, porttid, cnt, i, tidcnt, tidoff; u16 *tidlist; struct ipath_devdata *dd = pd->port_dd; u64 physaddr; @@ -188,6 +258,7 @@ u64 __iomem *tidbase; unsigned long tidmap[8]; struct page **pagep = NULL; + unsigned subport = subport_fp(fp); if (!dd->ipath_pageshadow) { ret = -ENOMEM; @@ -204,20 +275,34 @@ ret = -EFAULT; goto done; } - tidcnt = dd->ipath_rcvtidcnt; - if (cnt >= tidcnt) { + porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) { + tidcnt = dd->ipath_rcvtidcnt; + tid = pd->port_tidcursor; + tidoff = 0; + } else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + tidoff = dd->ipath_rcvtidcnt - tidcnt; + porttid += tidoff; + tid = tidcursor_fp(fp); + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + tidoff = tidcnt * (subport - 1); + porttid += tidoff; + tid = tidcursor_fp(fp); + } + if (cnt > tidcnt) { /* make sure it all fits in port_tid_pg_list */ dev_info(&dd->pcidev->dev, "Process tried to allocate %u " "TIDs, only trying max (%u)\n", cnt, tidcnt); cnt = tidcnt; } - pagep = (struct page **)pd->port_tid_pg_list; - tidlist = (u16 *) (&pagep[cnt]); + pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; + tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; memset(tidmap, 0, sizeof(tidmap)); - tid = pd->port_tidcursor; /* before decrement; chip actual # */ - porttid = pd->port_port * tidcnt; ntids = tidcnt; tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + dd->ipath_rcvtidbase + @@ -274,16 +359,19 @@ ret = -ENOMEM; break; } - tidlist[i] = tid; + tidlist[i] = tid + tidoff; ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " - "vaddr %lx\n", i, tid, vaddr); + "vaddr %lx\n", i, tid + tidoff, vaddr); /* we "know" system pages and TID pages are same size */ dd->ipath_pageshadow[porttid + tid] = pagep[i]; + dd->ipath_physshadow[porttid + tid] = ipath_map_page( + dd->pcidev, pagep[i], 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); /* * don't need atomic or it's overhead */ __set_bit(tid, tidmap); - physaddr = page_to_phys(pagep[i]); + physaddr = dd->ipath_physshadow[porttid + tid]; ipath_stats.sps_pagelocks++; ipath_cdbg(VERBOSE, "TID %u, vaddr %lx, physaddr %llx pgp %p\n", @@ -317,6 +405,9 @@ tid); dd->ipath_f_put_tid(dd, &tidbase[tid], 1, dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); dd->ipath_pageshadow[porttid + tid] = NULL; ipath_stats.sps_pageunlocks++; } @@ -341,7 +432,10 @@ } if (tid == tidcnt) tid = 0; - pd->port_tidcursor = tid; + if (!pd->port_subport_cnt) + pd->port_tidcursor = tid; + else + tidcursor_fp(fp) = tid; } done: @@ -354,6 +448,7 @@ /** * ipath_tid_free - free a port TID * @pd: the port + * @subport: the subport * @ti: the TID info * * right now we are unlocking one page at a time, but since @@ -367,7 +462,7 @@ * they pass in to us. */ -static int ipath_tid_free(struct ipath_portdata *pd, +static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, const struct ipath_tid_info *ti) { int ret = 0; @@ -388,11 +483,20 @@ } porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) + tidcnt = dd->ipath_rcvtidcnt; + else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + porttid += dd->ipath_rcvtidcnt - tidcnt; + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + porttid += tidcnt * (subport - 1); + } tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + dd->ipath_rcvtidbase + porttid * sizeof(*tidbase)); - tidcnt = dd->ipath_rcvtidcnt; limit = sizeof(tidmap) * BITS_PER_BYTE; if (limit > tidcnt) /* just in case size changes in future */ @@ -417,6 +521,9 @@ pd->port_pid, tid); dd->ipath_f_put_tid(dd, &tidbase[tid], 1, dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); ipath_release_user_pages( &dd->ipath_pageshadow[porttid + tid], 1); dd->ipath_pageshadow[porttid + tid] = NULL; @@ -581,20 +688,24 @@ /** * ipath_manage_rcvq - manage a port's receive queue * @pd: the port + * @subport: the subport * @start_stop: action to carry out * * start_stop == 0 disables receive on the port, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */ -static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) +static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, + int start_stop) { struct ipath_devdata *dd = pd->port_dd; u64 tval; - ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n", + ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", start_stop ? "en" : "dis", dd->ipath_unit, - pd->port_port); + pd->port_port, subport); + if (subport) + goto bail; /* atomically clear receive enable port. */ if (start_stop) { /* @@ -609,7 +720,7 @@ * updated and correct itself, even in the face of software * bugs. */ - *pd->port_rcvhdrtail_kvaddr = 0; + *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0; set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, &dd->ipath_rcvctrl); } else @@ -630,6 +741,7 @@ tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); } /* always; new head should be equal to new tail; see above */ +bail: return 0; } @@ -687,6 +799,36 @@ } } +/* + * Initialize the port data with the receive buffer sizes + * so this can be done while the master port is locked. + * Otherwise, there is a race with a slave opening the port + * and seeing these fields uninitialized. + */ +static void init_user_egr_sizes(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned egrperchunk, egrcnt, size; + + /* + * to avoid wasting a lot of memory, we allocate 32KB chunks of + * physically contiguous memory, advance through it until used up + * and then allocate more. Of course, we need memory to store those + * extra pointers, now. Started out with 256KB, but under heavy + * memory pressure (creating large files and then copying them over + * NFS while doing lots of MPI jobs), we hit some allocation + * failures, even though we can sleep... (2.6.10) Still get + * failures at 64K. 32K is the lowest we can go without wasting + * additional memory. + */ + size = 0x8000; + egrperchunk = size / dd->ipath_rcvegrbufsize; + egrcnt = dd->ipath_rcvegrcnt; + pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; + pd->port_rcvegrbufs_perchunk = egrperchunk; + pd->port_rcvegrbuf_size = size; +} + /** * ipath_create_user_egr - allocate eager TID buffers * @pd: the port to allocate TID buffers for @@ -702,7 +844,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) { struct ipath_devdata *dd = pd->port_dd; - unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff; + unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; int ret; gfp_t gfp_flags; @@ -722,31 +864,18 @@ ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); - /* - * to avoid wasting a lot of memory, we allocate 32KB chunks of - * physically contiguous memory, advance through it until used up - * and then allocate more. Of course, we need memory to store those - * extra pointers, now. Started out with 256KB, but under heavy - * memory pressure (creating large files and then copying them over - * NFS while doing lots of MPI jobs), we hit some allocation - * failures, even though we can sleep... (2.6.10) Still get - * failures at 64K. 32K is the lowest we can go without wasting - * additional memory. - */ - size = 0x8000; - alloced = ALIGN(egrsize * egrcnt, size); - egrperchunk = size / egrsize; - chunk = (egrcnt + egrperchunk - 1) / egrperchunk; - pd->port_rcvegrbuf_chunks = chunk; - pd->port_rcvegrbufs_perchunk = egrperchunk; - pd->port_rcvegrbuf_size = size; - pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0])); + chunk = pd->port_rcvegrbuf_chunks; + egrperchunk = pd->port_rcvegrbufs_perchunk; + size = pd->port_rcvegrbuf_size; + pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf) { ret = -ENOMEM; goto bail; } pd->port_rcvegrbuf_phys = - vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0])); + kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf_phys) { ret = -ENOMEM; goto bail_rcvegrbuf; @@ -791,105 +920,23 @@ pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; bail_rcvegrbuf: - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; bail: return ret; } -static int ipath_do_user_init(struct ipath_portdata *pd, - const struct ipath_user_info *uinfo) -{ - int ret = 0; - struct ipath_devdata *dd = pd->port_dd; - u32 head32; - - /* for now, if major version is different, bail */ - if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { - dev_info(&dd->pcidev->dev, - "User major version %d not same as driver " - "major %d\n", uinfo->spu_userversion >> 16, - IPATH_USER_SWMAJOR); - ret = -ENODEV; - goto done; - } - - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) - ipath_dbg("User minor version %d not same as driver " - "minor %d\n", uinfo->spu_userversion & 0xffff, - IPATH_USER_SWMINOR); - - if (uinfo->spu_rcvhdrsize) { - ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); - if (ret) - goto done; - } - - /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ - - /* for right now, kernel piobufs are at end, so port 1 is at 0 */ - pd->port_piobufs = dd->ipath_piobufbase + - dd->ipath_pbufsport * (pd->port_port - - 1) * dd->ipath_palign; - ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", - pd->port_port, pd->port_piobufs); - - /* - * Now allocate the rcvhdr Q and eager TIDs; skip the TID - * array for time being. If pd->port_port > chip-supported, - * we need to do extra stuff here to handle by handling overflow - * through port 0, someday - */ - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = ipath_create_user_egr(pd); - if (ret) - goto done; - - /* - * set the eager head register for this port to the current values - * of the tail pointers, since we don't know if they were - * updated on last use of the port. - */ - head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); - ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - dd->ipath_lastegrheads[pd->port_port] = -1; - dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; - ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", - pd->port_port, head32); - pd->port_tidcursor = 0; /* start at beginning after open */ - /* - * now enable the port; the tail registers will be written to memory - * by the chip as soon as it sees the write to - * dd->ipath_kregs->kr_rcvctrl. The update only happens on - * transition from 0 to 1, so clear it first, then set it as part of - * enabling the port. This will (very briefly) affect any other - * open ports, but it shouldn't be long enough to be an issue. - * We explictly set the in-memory copy to 0 beforehand, so we don't - * have to wait to be sure the DMA update has happened. - */ - *pd->port_rcvhdrtail_kvaddr = 0ULL; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, - &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); -done: - return ret; -} - /* common code for the mappings on dma_alloc_coherent mem */ static int ipath_mmap_mem(struct vm_area_struct *vma, - struct ipath_portdata *pd, unsigned len, - int write_ok, dma_addr_t addr, char *what) + struct ipath_portdata *pd, unsigned len, int write_ok, + void *kvaddr, char *what) { struct ipath_devdata *dd = pd->port_dd; - unsigned pfn = (unsigned long)addr >> PAGE_SHIFT; + unsigned long pfn; int ret; if ((vma->vm_end - vma->vm_start) > len) { @@ -912,17 +959,17 @@ vma->vm_flags &= ~VM_MAYWRITE; } + pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - dev_info(&dd->pcidev->dev, - "%s port%u mmap of %lx, %x bytes r%c failed: %d\n", - what, pd->port_port, (unsigned long)addr, len, - write_ok?'w':'o', ret); + dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " + "bytes r%c failed: %d\n", what, pd->port_port, + pfn, len, write_ok?'w':'o', ret); else - ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n", - what, pd->port_port, (unsigned long)addr, len, - write_ok?'w':'o'); + ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " + "r%c\n", what, pd->port_port, pfn, len, + write_ok?'w':'o'); bail: return ret; } @@ -957,7 +1004,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, struct ipath_devdata *dd, - struct ipath_portdata *pd) + struct ipath_portdata *pd, + unsigned piobufs, unsigned piocnt) { unsigned long phys; int ret; @@ -968,16 +1016,15 @@ * process data, and catches users who might try to read the i/o * space due to a bug. */ - if ((vma->vm_end - vma->vm_start) > - (dd->ipath_pbufsport * dd->ipath_palign)) { + if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " "reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); - ret = -EFAULT; + ret = -EINVAL; goto bail; } - phys = dd->ipath_physaddr + pd->port_piobufs; + phys = dd->ipath_physaddr + piobufs; /* * Don't mark this as non-cached, or we don't get the @@ -1011,7 +1058,7 @@ struct ipath_devdata *dd = pd->port_dd; unsigned long start, size; size_t total_size, i; - dma_addr_t *phys; + unsigned long pfn; int ret; size = pd->port_rcvegrbuf_size; @@ -1021,7 +1068,7 @@ "reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); - ret = -EFAULT; + ret = -EINVAL; goto bail; } @@ -1035,11 +1082,11 @@ vma->vm_flags &= ~VM_MAYWRITE; start = vma->vm_start; - phys = pd->port_rcvegrbuf_phys; for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { - ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT, - size, vma->vm_page_prot); + pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, start, pfn, size, + vma->vm_page_prot); if (ret < 0) goto bail; } @@ -1049,6 +1096,122 @@ return ret; } +/* + * ipath_file_vma_nopage - handle a VMA page fault. + */ +static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *page = NOPAGE_SIGBUS; + void *pageptr; + + /* + * Convert the vmalloc address into a struct page. + */ + pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT)); + page = vmalloc_to_page(pageptr); + if (!page) + goto out; + + /* Increment the reference count. */ + get_page(page); + if (type) + *type = VM_FAULT_MINOR; +out: + return page; +} + +static struct vm_operations_struct ipath_file_vm_ops = { + .nopage = ipath_file_vma_nopage, +}; + +static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, + struct ipath_portdata *pd, unsigned subport) +{ + unsigned long len; + struct ipath_devdata *dd; + void *addr; + size_t size; + int ret; + + /* If the port is not shared, all addresses should be physical */ + if (!pd->port_subport_cnt) { + ret = -EINVAL; + goto bail; + } + + dd = pd->port_dd; + size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; + + /* + * Master has all the slave uregbase, rcvhdrq, and + * rcvegrbufs mmapped. + */ + if (subport == 0) { + unsigned num_slaves = pd->port_subport_cnt - 1; + + if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { + addr = pd->subport_uregbase; + size = PAGE_SIZE * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base; + size = pd->port_rcvhdrq_size * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & + MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf; + size *= num_slaves; + } else { + ret = -EINVAL; + goto bail; + } + } else if (pgaddr == (((u64) pd->subport_uregbase + + PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); + size = PAGE_SIZE; + } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1)) & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1); + size = pd->port_rcvhdrq_size; + } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + + size * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf + size * (subport - 1); + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; + } else { + ret = -EINVAL; + goto bail; + } + len = vma->vm_end - vma->vm_start; + if (len > size) { + ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); + ret = -EINVAL; + goto bail; + } + + vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; + vma->vm_ops = &ipath_file_vm_ops; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + ret = 0; + +bail: + return ret; +} + /** * ipath_mmap - mmap various structures into user space * @fp: the file pointer @@ -1064,73 +1227,99 @@ struct ipath_portdata *pd; struct ipath_devdata *dd; u64 pgaddr, ureg; + unsigned piobufs, piocnt; int ret; pd = port_fp(fp); + if (!pd) { + ret = -EINVAL; + goto bail; + } dd = pd->port_dd; /* * This is the ipath_do_user_init() code, mapping the shared buffers * into the user process. The address referred to by vm_pgoff is the - * virtual, not physical, address; we only do one mmap for each - * space mapped. + * file offset passed via mmap(). For shared ports, this is the + * kernel vmalloc() address of the pages to share with the master. + * For non-shared or master ports, this is a physical address. + * We only do one mmap for each space mapped. */ pgaddr = vma->vm_pgoff << PAGE_SHIFT; /* - * Must fit in 40 bits for our hardware; some checked elsewhere, - * but we'll be paranoid. Check for 0 is mostly in case one of the - * allocations failed, but user called mmap anyway. We want to catch - * that before it can match. + * Check for 0 in case one of the allocations failed, but user + * called mmap anyway. */ - if (!pgaddr || pgaddr >= (1ULL<<40)) { - ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n", - (unsigned long long)pgaddr, vma->vm_start, vma->vm_end); - return -EINVAL; + if (!pgaddr) { + ret = -EINVAL; + goto bail; } - /* just the offset of the port user registers, not physical addr */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; - - ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n", + ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", (unsigned long long) pgaddr, vma->vm_start, - vma->vm_end - vma->vm_start); + vma->vm_end - vma->vm_start, dd->ipath_unit, + pd->port_port, subport_fp(fp)); - if (vma->vm_start & (PAGE_SIZE-1)) { - ipath_dev_err(dd, - "vm_start not aligned: %lx, end=%lx phys %lx\n", - vma->vm_start, vma->vm_end, (unsigned long)pgaddr); - ret = -EINVAL; + /* + * Physical addresses must fit in 40 bits for our hardware. + * Check for kernel virtual addresses first, anything else must + * match a HW or memory address. + */ + if (pgaddr >= (1ULL<<40)) { + ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + goto bail; + } + + if (!pd->port_subport_cnt) { + /* port is not shared */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = dd->ipath_pbufsport; + piobufs = pd->port_piobufs; + } else if (!subport_fp(fp)) { + /* caller is the master */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + + (dd->ipath_pbufsport % pd->port_subport_cnt); + piobufs = pd->port_piobufs + + dd->ipath_palign * (dd->ipath_pbufsport - piocnt); + } else { + unsigned slave = subport_fp(fp) - 1; + + /* caller is a slave */ + ureg = 0; + piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; + piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; } - else if (pgaddr == ureg) + + if (pgaddr == ureg) ret = mmap_ureg(vma, dd, ureg); - else if (pgaddr == pd->port_piobufs) - ret = mmap_piobufs(vma, dd, pd); - else if (pgaddr == (u64) pd->port_rcvegr_phys) + else if (pgaddr == piobufs) + ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); + else if (pgaddr == dd->ipath_pioavailregs_phys) + /* in-memory copy of pioavail registers */ + ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, + (void *) dd->ipath_pioavailregs_dma, + "pioavail registers"); + else if (subport_fp(fp)) + /* Subports don't mmap the physical receive buffers */ + ret = -EINVAL; + else if (pgaddr == pd->port_rcvegr_phys) ret = mmap_rcvegrbufs(vma, pd); - else if (pgaddr == (u64) pd->port_rcvhdrq_phys) { + else if (pgaddr == (u64) pd->port_rcvhdrq_phys) /* * The rcvhdrq itself; readonly except on HT (so have * to allow writable mapping), multiple pages, contiguous * from an i/o perspective. */ - unsigned total_size = - ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize - * sizeof(u32), PAGE_SIZE); - ret = ipath_mmap_mem(vma, pd, total_size, 1, - pd->port_rcvhdrq_phys, + ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, + pd->port_rcvhdrq, "rcvhdrq"); - } - else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys) + else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) /* in-memory copy of rcvhdrq tail register */ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - pd->port_rcvhdrqtailaddr_phys, + pd->port_rcvhdrtail_kvaddr, "rcvhdrq tail"); - else if (pgaddr == dd->ipath_pioavailregs_phys) - /* in-memory copy of pioavail registers */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - dd->ipath_pioavailregs_phys, - "pioavail registers"); else ret = -EINVAL; @@ -1138,9 +1327,10 @@ if (ret < 0) dev_info(&dd->pcidev->dev, - "Failure %d on addr %lx, off %lx\n", - -ret, vma->vm_start, vma->vm_pgoff); - + "Failure %d on off %llx len %lx\n", + -ret, (unsigned long long)pgaddr, + vma->vm_end - vma->vm_start); +bail: return ret; } @@ -1154,6 +1344,8 @@ struct ipath_devdata *dd; pd = port_fp(fp); + if (!pd) + goto bail; dd = pd->port_dd; bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; @@ -1176,7 +1368,7 @@ if (tail == head) { set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ + if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ (void)ipath_write_ureg(dd, ur_rcvhdrhead, dd->ipath_rhdrhead_intr_off | head, pd->port_port); @@ -1200,18 +1392,80 @@ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); +bail: return pollflag; } +static int init_subports(struct ipath_devdata *dd, + struct ipath_portdata *pd, + const struct ipath_user_info *uinfo) +{ + int ret = 0; + unsigned num_slaves; + size_t size; + + /* Old user binaries don't know about subports */ + if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) + goto bail; + /* + * If the user is requesting zero or one port, + * skip the subport allocation. + */ + if (uinfo->spu_subport_cnt <= 1) + goto bail; + if (uinfo->spu_subport_cnt > 4) { + ret = -EINVAL; + goto bail; + } + + num_slaves = uinfo->spu_subport_cnt - 1; + pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); + if (!pd->subport_uregbase) { + ret = -ENOMEM; + goto bail; + } + /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ + size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * + sizeof(u32), PAGE_SIZE) * num_slaves; + pd->subport_rcvhdr_base = vmalloc(size); + if (!pd->subport_rcvhdr_base) { + ret = -ENOMEM; + goto bail_ureg; + } + + pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * + pd->port_rcvegrbuf_size * + num_slaves); + if (!pd->subport_rcvegrbuf) { + ret = -ENOMEM; + goto bail_rhdr; + } + + pd->port_subport_cnt = uinfo->spu_subport_cnt; + pd->port_subport_id = uinfo->spu_subport_id; + pd->active_slaves = 1; + goto bail; + +bail_rhdr: + vfree(pd->subport_rcvhdr_base); +bail_ureg: + vfree(pd->subport_uregbase); + pd->subport_uregbase = NULL; +bail: + return ret; +} + static int try_alloc_port(struct ipath_devdata *dd, int port, - struct file *fp) + struct file *fp, + const struct ipath_user_info *uinfo) { + struct ipath_portdata *pd; int ret; - if (!dd->ipath_pd[port]) { - void *p, *ptmp; + if (!(pd = dd->ipath_pd[port])) { + void *ptmp; - p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); + pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); /* * Allocate memory for use in ipath_tid_update() just once @@ -1221,34 +1475,36 @@ ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + dd->ipath_rcvtidcnt * sizeof(struct page **), GFP_KERNEL); - if (!p || !ptmp) { + if (!pd || !ptmp) { ipath_dev_err(dd, "Unable to allocate portdata " "memory, failing open\n"); ret = -ENOMEM; - kfree(p); + kfree(pd); kfree(ptmp); goto bail; } - dd->ipath_pd[port] = p; + dd->ipath_pd[port] = pd; dd->ipath_pd[port]->port_port = port; dd->ipath_pd[port]->port_dd = dd; dd->ipath_pd[port]->port_tid_pg_list = ptmp; init_waitqueue_head(&dd->ipath_pd[port]->port_wait); } - if (!dd->ipath_pd[port]->port_cnt) { - dd->ipath_pd[port]->port_cnt = 1; - fp->private_data = (void *) dd->ipath_pd[port]; + if (!pd->port_cnt) { + pd->userversion = uinfo->spu_userversion; + init_user_egr_sizes(pd); + if ((ret = init_subports(dd, pd, uinfo)) != 0) + goto bail; ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", current->comm, current->pid, dd->ipath_unit, port); - dd->ipath_pd[port]->port_pid = current->pid; - strncpy(dd->ipath_pd[port]->port_comm, current->comm, - sizeof(dd->ipath_pd[port]->port_comm)); + pd->port_cnt = 1; + port_fp(fp) = pd; + pd->port_pid = current->pid; + strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); ipath_stats.sps_ports++; ret = 0; - goto bail; - } - ret = -EBUSY; + } else + ret = -EBUSY; bail: return ret; @@ -1264,7 +1520,8 @@ | IPATH_LINKUNK)); } -static int find_free_port(int unit, struct file *fp) +static int find_free_port(int unit, struct file *fp, + const struct ipath_user_info *uinfo) { struct ipath_devdata *dd = ipath_lookup(unit); int ret, i; @@ -1279,8 +1536,8 @@ goto bail; } - for (i = 0; i < dd->ipath_cfgports; i++) { - ret = try_alloc_port(dd, i, fp); + for (i = 1; i < dd->ipath_cfgports; i++) { + ret = try_alloc_port(dd, i, fp, uinfo); if (ret != -EBUSY) goto bail; } @@ -1290,13 +1547,14 @@ return ret; } -static int find_best_unit(struct file *fp) +static int find_best_unit(struct file *fp, + const struct ipath_user_info *uinfo) { int ret = 0, i, prefunit = -1, devmax; int maxofallports, npresent, nup; int ndev; - (void) ipath_count_units(&npresent, &nup, &maxofallports); + devmax = ipath_count_units(&npresent, &nup, &maxofallports); /* * This code is present to allow a knowledgeable person to @@ -1343,8 +1601,6 @@ if (prefunit != -1) devmax = prefunit + 1; - else - devmax = ipath_count_units(NULL, NULL, NULL); recheck: for (i = 1; i < maxofallports; i++) { for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; @@ -1359,7 +1615,7 @@ * next. */ continue; - ret = try_alloc_port(dd, i, fp); + ret = try_alloc_port(dd, i, fp, uinfo); if (!ret) goto done; } @@ -1395,22 +1651,183 @@ return ret; } +static int find_shared_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int devmax, ndev, i; + int ret = 0; + + devmax = ipath_count_units(NULL, NULL, NULL); + + for (ndev = 0; ndev < devmax; ndev++) { + struct ipath_devdata *dd = ipath_lookup(ndev); + + if (!dd) + continue; + for (i = 1; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + /* Skip ports which are not yet open */ + if (!pd || !pd->port_cnt) + continue; + /* Skip port if it doesn't match the requested one */ + if (pd->port_subport_id != uinfo->spu_subport_id) + continue; + /* Verify the sharing process matches the master */ + if (pd->port_subport_cnt != uinfo->spu_subport_cnt || + pd->userversion != uinfo->spu_userversion || + pd->port_cnt >= pd->port_subport_cnt) { + ret = -EINVAL; + goto done; + } + port_fp(fp) = pd; + subport_fp(fp) = pd->port_cnt++; + tidcursor_fp(fp) = 0; + pd->active_slaves |= 1 << subport_fp(fp); + ipath_cdbg(PROC, + "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", + current->comm, current->pid, + subport_fp(fp), + pd->port_comm, pd->port_pid, + dd->ipath_unit, pd->port_port); + ret = 1; + goto done; + } + } + +done: + return ret; +} + static int ipath_open(struct inode *in, struct file *fp) { - int ret, user_minor; + /* The real work is performed later in ipath_assign_port() */ + fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); + return fp->private_data ? 0 : -ENOMEM; +} + + +/* Get port early, so can set affinity prior to memory allocation */ +static int ipath_assign_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + int i_minor; + unsigned swminor; + + /* Check to be sure we haven't already initialized this file */ + if (port_fp(fp)) { + ret = -EINVAL; + goto done; + } + + /* for now, if major version is different, bail */ + if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { + ipath_dbg("User major version %d not same as driver " + "major %d\n", uinfo->spu_userversion >> 16, + IPATH_USER_SWMAJOR); + ret = -ENODEV; + goto done; + } + + swminor = uinfo->spu_userversion & 0xffff; + if (swminor != IPATH_USER_SWMINOR) + ipath_dbg("User minor version %d not same as driver " + "minor %d\n", swminor, IPATH_USER_SWMINOR); mutex_lock(&ipath_mutex); - user_minor = iminor(in) - IPATH_USER_MINOR_BASE; + if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt && + (ret = find_shared_port(fp, uinfo))) { + mutex_unlock(&ipath_mutex); + if (ret > 0) + ret = 0; + goto done; + } + + i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE; ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)in->i_rdev, user_minor); + (long)fp->f_dentry->d_inode->i_rdev, i_minor); - if (user_minor) - ret = find_free_port(user_minor - 1, fp); + if (i_minor) + ret = find_free_port(i_minor - 1, fp, uinfo); else - ret = find_best_unit(fp); + ret = find_best_unit(fp, uinfo); mutex_unlock(&ipath_mutex); + +done: + return ret; +} + + +static int ipath_do_user_init(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + struct ipath_portdata *pd; + struct ipath_devdata *dd; + u32 head32; + + pd = port_fp(fp); + dd = pd->port_dd; + + if (uinfo->spu_rcvhdrsize) { + ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); + if (ret) + goto done; + } + + /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ + + /* for right now, kernel piobufs are at end, so port 1 is at 0 */ + pd->port_piobufs = dd->ipath_piobufbase + + dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; + ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", + pd->port_port, pd->port_piobufs); + + /* + * Now allocate the rcvhdr Q and eager TIDs; skip the TID + * array for time being. If pd->port_port > chip-supported, + * we need to do extra stuff here to handle by handling overflow + * through port 0, someday + */ + ret = ipath_create_rcvhdrq(dd, pd); + if (!ret) + ret = ipath_create_user_egr(pd); + if (ret) + goto done; + + /* + * set the eager head register for this port to the current values + * of the tail pointers, since we don't know if they were + * updated on last use of the port. + */ + head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); + ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); + dd->ipath_lastegrheads[pd->port_port] = -1; + dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; + ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", + pd->port_port, head32); + pd->port_tidcursor = 0; /* start at beginning after open */ + /* + * now enable the port; the tail registers will be written to memory + * by the chip as soon as it sees the write to + * dd->ipath_kregs->kr_rcvctrl. The update only happens on + * transition from 0 to 1, so clear it first, then set it as part of + * enabling the port. This will (very briefly) affect any other + * open ports, but it shouldn't be long enough to be an issue. + * We explictly set the in-memory copy to 0 beforehand, so we don't + * have to wait to be sure the DMA update has happened. + */ + *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL; + set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + &dd->ipath_rcvctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); +done: return ret; } @@ -1433,6 +1850,8 @@ if (!dd->ipath_pageshadow[i]) continue; + pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i], 1); dd->ipath_pageshadow[i] = NULL; @@ -1453,6 +1872,7 @@ static int ipath_close(struct inode *in, struct file *fp) { int ret = 0; + struct ipath_filedata *fd; struct ipath_portdata *pd; struct ipath_devdata *dd; unsigned port; @@ -1462,9 +1882,24 @@ mutex_lock(&ipath_mutex); - pd = port_fp(fp); - port = pd->port_port; + fd = (struct ipath_filedata *) fp->private_data; fp->private_data = NULL; + pd = fd->pd; + if (!pd) { + mutex_unlock(&ipath_mutex); + goto bail; + } + if (--pd->port_cnt) { + /* + * XXX If the master closes the port before the slave(s), + * revoke the mmap for the eager receive queue so + * the slave(s) don't wait for receive data forever. + */ + pd->active_slaves &= ~(1 << fd->subport); + mutex_unlock(&ipath_mutex); + goto bail; + } + port = pd->port_port; dd = pd->port_dd; if (pd->port_hdrqfull) { @@ -1503,8 +1938,6 @@ /* clean up the pkeys for this port user */ ipath_clean_part_key(pd, dd); - - /* * be paranoid, and never write 0's to these, just use an * unused part of the port 0 tail page. Of course, @@ -1523,39 +1956,49 @@ i = dd->ipath_pbufsport * (port - 1); ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); + dd->ipath_f_clear_tids(dd, pd->port_port); + if (dd->ipath_pageshadow) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", pd->port_comm, pd->port_pid, dd->ipath_unit, port); - - dd->ipath_f_clear_tids(dd, pd->port_port); } - pd->port_cnt = 0; pd->port_pid = 0; - dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ +bail: + kfree(fd); return ret; } -static int ipath_port_info(struct ipath_portdata *pd, +static int ipath_port_info(struct ipath_portdata *pd, u16 subport, struct ipath_port_info __user *uinfo) { struct ipath_port_info info; int nup; int ret; + size_t sz; (void) ipath_count_units(NULL, &nup, NULL); info.num_active = nup; info.unit = pd->port_dd->ipath_unit; info.port = pd->port_port; + info.subport = subport; + /* Don't return new fields if old library opened the port. */ + if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) { + /* Number of user ports available for this device. */ + info.num_ports = pd->port_dd->ipath_cfgports - 1; + info.num_subports = pd->port_subport_cnt; + sz = sizeof(info); + } else + sz = sizeof(info) - 2 * sizeof(u16); - if (copy_to_user(uinfo, &info, sizeof(info))) { + if (copy_to_user(uinfo, &info, sz)) { ret = -EFAULT; goto bail; } @@ -1565,6 +2008,16 @@ return ret; } +static int ipath_get_slave_info(struct ipath_portdata *pd, + void __user *slave_mask_addr) +{ + int ret = 0; + + if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) + ret = -EFAULT; + return ret; +} + static ssize_t ipath_write(struct file *fp, const char __user *data, size_t count, loff_t *off) { @@ -1591,6 +2044,8 @@ consumed = sizeof(cmd.type); switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + case __IPATH_CMD_USER_INIT: case IPATH_CMD_USER_INIT: copy = sizeof(cmd.cmd.user_info); dest = &cmd.cmd.user_info; @@ -1617,6 +2072,11 @@ dest = &cmd.cmd.part_key; src = &ucmd->cmd.part_key; break; + case IPATH_CMD_SLAVE_INFO: + copy = sizeof(cmd.cmd.slave_mask_addr); + dest = &cmd.cmd.slave_mask_addr; + src = &ucmd->cmd.slave_mask_addr; + break; default: ret = -EINVAL; goto bail; @@ -1634,34 +2094,55 @@ consumed += copy; pd = port_fp(fp); + if (!pd && cmd.type != __IPATH_CMD_USER_INIT && + cmd.type != IPATH_CMD_ASSIGN_PORT) { + ret = -EINVAL; + goto bail; + } switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + break; + case __IPATH_CMD_USER_INIT: + /* backwards compatibility, get port first */ + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + /* and fall through to current version. */ case IPATH_CMD_USER_INIT: - ret = ipath_do_user_init(pd, &cmd.cmd.user_info); - if (ret < 0) + ret = ipath_do_user_init(fp, &cmd.cmd.user_info); + if (ret) goto bail; ret = ipath_get_base_info( - pd, (void __user *) (unsigned long) + fp, (void __user *) (unsigned long) cmd.cmd.user_info.spu_base_info, cmd.cmd.user_info.spu_base_info_size); break; case IPATH_CMD_RECV_CTRL: - ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl); + ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); break; case IPATH_CMD_PORT_INFO: - ret = ipath_port_info(pd, + ret = ipath_port_info(pd, subport_fp(fp), (struct ipath_port_info __user *) (unsigned long) cmd.cmd.port_info); break; case IPATH_CMD_TID_UPDATE: - ret = ipath_tid_update(pd, &cmd.cmd.tid_info); + ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); break; case IPATH_CMD_TID_FREE: - ret = ipath_tid_free(pd, &cmd.cmd.tid_info); + ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); break; case IPATH_CMD_SET_PART_KEY: ret = ipath_set_part_key(pd, cmd.cmd.part_key); break; + case IPATH_CMD_SLAVE_INFO: + ret = ipath_get_slave_info(pd, + (void __user *) (unsigned long) + cmd.cmd.slave_mask_addr); + break; } if (ret >= 0) @@ -1858,4 +2339,3 @@ bail: return; } - diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_fs.c newtree/drivers/infiniband/hw/ipath/ipath_fs.c --- oldtree/drivers/infiniband/hw/ipath/ipath_fs.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_fs.c 2006-09-30 05:21:44.000000000 -0400 @@ -356,19 +356,16 @@ pos = *ppos; - if ( pos < 0) { + if (pos != 0) { ret = -EINVAL; goto bail; } - if (pos >= sizeof(struct ipath_flash)) { - ret = 0; + if (count != sizeof(struct ipath_flash)) { + ret = -EINVAL; goto bail; } - if (count > sizeof(struct ipath_flash) - pos) - count = sizeof(struct ipath_flash) - pos; - tmp = kmalloc(count, GFP_KERNEL); if (!tmp) { ret = -ENOMEM; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_iba6110.c newtree/drivers/infiniband/hw/ipath/ipath_iba6110.c --- oldtree/drivers/infiniband/hw/ipath/ipath_iba6110.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_iba6110.c 2006-09-30 05:21:44.000000000 -0400 @@ -252,8 +252,8 @@ }; /* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK 0x1FF -#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF +#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1) +#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1) /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ #define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0 @@ -338,7 +338,7 @@ if (crcbits) { u16 ctrl0, ctrl1; snprintf(bitsmsg, sizeof bitsmsg, - "[HT%s lane %s CRC (%llx); ignore till reload]", + "[HT%s lane %s CRC (%llx); powercycle to completely clear]", !(crcbits & _IPATH_HTLINK1_CRCBITS) ? "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS) ? "1 (B)" : "0+1 (A+B)"), @@ -389,17 +389,28 @@ _IPATH_HTLINK1_CRCBITS))); } +/* 6110 specific hardware errors... */ +static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"), + INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"), + INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"), + INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"), + INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), + INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), +}; + /** - * ipath_ht_handle_hwerrors - display hardware errors + * ipath_ht_handle_hwerrors - display hardware errors. * @dd: the infinipath device * @msg: the output buffer * @msgl: the size of the output buffer * - * Use same msg buffer as regular errors to avoid - * excessive stack use. Most hardware errors are catastrophic, but for - * right now, we'll print them and continue. - * We reuse the same message buffer as ipath_handle_errors() to avoid - * excessive stack usage. + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * ipath_handle_errors() to avoid excessive stack usage. */ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, size_t msgl) @@ -440,19 +451,49 @@ * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~dd->ipath_lasthwerror) || + if ((hwerrs & ~(dd->ipath_lasthwerror | + ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; - if (hwerrs & ~infinipath_hwe_bitsextant) + if (hwerrs & ~dd->ipath_hwe_bitsextant) ipath_dev_err(dd, "hwerror interrupt with unknown errors " "%llx set\n", (unsigned long long) - (hwerrs & ~infinipath_hwe_bitsextant)); + (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if (ctrl & INFINIPATH_C_FREEZEMODE) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { + ipath_stats.sps_txeparity++; + ipath_dbg("Recovering from TXE parity error (%llu), " + "hwerrstatus=%llx\n", + (unsigned long long) ipath_stats.sps_txeparity, + (unsigned long long) hwerrs); + ipath_disarm_senderrbufs(dd); + hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); + if (!hwerrs) { /* else leave in freeze mode */ + ipath_write_kreg(dd, + dd->ipath_kregs->kr_control, + dd->ipath_control); + return; + } + } if (hwerrs) { /* * if any set that we aren't ignoring; only @@ -499,44 +540,16 @@ bits); strlcat(msg, bitsmsg, msgl); } - if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_RXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_TXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) - strlcat(msg, "[IB2IPATH Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) - strlcat(msg, "[IPATH2IB Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR) - strlcat(msg, "[HTC Ireq Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR) - strlcat(msg, "[HTC Treq Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR) - strlcat(msg, "[HTC Tresp Parity]", msgl); + + ipath_format_hwerrors(hwerrs, + ipath_6110_hwerror_msgs, + sizeof(ipath_6110_hwerror_msgs) / + sizeof(ipath_6110_hwerror_msgs[0]), + msg, msgl); if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) hwerr_crcbits(dd, hwerrs, msg, msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR5) - strlcat(msg, "[HT core Misc5]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR6) - strlcat(msg, "[HT core Misc6]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR7) - strlcat(msg, "[HT core Misc7]", msgl); if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); @@ -573,11 +586,6 @@ dd->ipath_hwerrmask); } - if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) - strlcat(msg, "[Rx Dsync]", msgl); - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) - strlcat(msg, "[SerDes PLL]", msgl); - ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) /* @@ -1080,21 +1088,21 @@ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); } -static void ipath_init_ht_variables(void) +static void ipath_init_ht_variables(struct ipath_devdata *dd) { - ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - ipath_gpio_sda = IPATH_GPIO_SDA; - ipath_gpio_scl = IPATH_GPIO_SCL; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; + dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; + dd->ipath_gpio_sda = IPATH_GPIO_SDA; + dd->ipath_gpio_scl = IPATH_GPIO_SCL; - infinipath_i_bitsextant = + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << INFINIPATH_I_RCVAVAIL_SHIFT) | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - infinipath_e_bitsextant = + dd->ipath_e_bitsextant = INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | @@ -1112,7 +1120,7 @@ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE; - infinipath_hwe_bitsextant = + dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_HTCMEMPARITYERR_MASK << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) | (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << @@ -1141,8 +1149,8 @@ INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; + dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; } /** @@ -1607,5 +1615,5 @@ * do very early init that is needed before ipath_f_bus is * called */ - ipath_init_ht_variables(); + ipath_init_ht_variables(dd); } diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_iba6120.c newtree/drivers/infiniband/hw/ipath/ipath_iba6120.c --- oldtree/drivers/infiniband/hw/ipath/ipath_iba6120.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_iba6120.c 2006-09-30 05:21:44.000000000 -0400 @@ -263,8 +263,8 @@ }; /* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK 0x1F -#define INFINIPATH_I_RCVAVAIL_MASK 0x1F +#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1) +#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1) /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ #define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL @@ -294,6 +294,33 @@ #define IPATH_GPIO_SCL (1ULL << \ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) +/* + * Rev2 silicon allows suppressing check for ArmLaunch errors. + * this can speed up short packet sends on systems that do + * not guaranteee write-order. + */ +#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63) + +/* 6120 specific hardware errors... */ +static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), + INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"), + /* + * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * parity or memory parity error failures, because most likely we + * won't be able to talk to the core of the chip. Nonetheless, we + * might see them, if they are in parts of the PCIe core that aren't + * essential. + */ + INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"), + INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"), + INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), + INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), +}; + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -343,19 +370,49 @@ * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~dd->ipath_lasthwerror) || + if ((hwerrs & ~(dd->ipath_lasthwerror | + ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; - if (hwerrs & ~infinipath_hwe_bitsextant) + if (hwerrs & ~dd->ipath_hwe_bitsextant) ipath_dev_err(dd, "hwerror interrupt with unknown errors " "%llx set\n", (unsigned long long) - (hwerrs & ~infinipath_hwe_bitsextant)); + (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if (ctrl & INFINIPATH_C_FREEZEMODE) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { + ipath_stats.sps_txeparity++; + ipath_dbg("Recovering from TXE parity error (%llu), " + "hwerrstatus=%llx\n", + (unsigned long long) ipath_stats.sps_txeparity, + (unsigned long long) hwerrs); + ipath_disarm_senderrbufs(dd); + hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); + if (!hwerrs) { /* else leave in freeze mode */ + ipath_write_kreg(dd, + dd->ipath_kregs->kr_control, + dd->ipath_control); + return; + } + } if (hwerrs) { /* * if any set that we aren't ignoring only make the @@ -379,9 +436,8 @@ } else { ipath_dbg("Clearing freezemode on ignored hardware " "error\n"); - ctrl &= ~INFINIPATH_C_FREEZEMODE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - ctrl); + dd->ipath_control); } } @@ -396,24 +452,13 @@ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, dd->ipath_hwerrmask); } - if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_RXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_TXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } + + ipath_format_hwerrors(hwerrs, + ipath_6120_hwerror_msgs, + sizeof(ipath_6120_hwerror_msgs)/ + sizeof(ipath_6120_hwerror_msgs[0]), + msg, msgl); + if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) { bits = (u32) ((hwerrs >> @@ -423,10 +468,6 @@ "[PCIe Mem Parity Errs %x] ", bits); strlcat(msg, bitsmsg, msgl); } - if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) - strlcat(msg, "[IB2IPATH Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) - strlcat(msg, "[IPATH2IB Parity]", msgl); #define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ INFINIPATH_HWE_COREPLL_RFSLIP ) @@ -452,34 +493,6 @@ dd->ipath_hwerrmask); } - if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP) - strlcat(msg, "[PCIe Poisoned TLP]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT) - strlcat(msg, "[PCIe completion timeout]", msgl); - - /* - * In practice, it's unlikely wthat we'll see PCIe PLL, or bus - * parity or memory parity error failures, because most likely we - * won't be able to talk to the core of the chip. Nonetheless, we - * might see them, if they are in parts of the PCIe core that aren't - * essential. - */ - if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED) - strlcat(msg, "[PCIePLL1]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED) - strlcat(msg, "[PCIePLL0]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH) - strlcat(msg, "[PCIe XTLH core parity]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM) - strlcat(msg, "[PCIe ADM TX core parity]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM) - strlcat(msg, "[PCIe ADM RX core parity]", msgl); - - if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) - strlcat(msg, "[Rx Dsync]", msgl); - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) - strlcat(msg, "[SerDes PLL]", msgl); - ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { /* @@ -525,6 +538,9 @@ case 5: n = "InfiniPath_QMH7140"; break; + case 6: + n = "InfiniPath_QLE7142"; + break; default: ipath_dev_err(dd, "Don't yet know about board with ID %u\n", @@ -571,9 +587,12 @@ if (!dd->ipath_boardrev) // no PLL for Emulator val &= ~INFINIPATH_HWE_SERDESPLLFAILED; - /* workaround bug 9460 in internal interface bus parity checking */ - val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; - + if (dd->ipath_minrev < 2) { + /* workaround bug 9460 in internal interface bus parity + * checking. Fixed (HW bug 9490) in Rev2. + */ + val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; + } dd->ipath_hwerrmask = val; } @@ -583,8 +602,8 @@ */ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) { - u64 val, tmp, config1; - int ret = 0, change = 0; + u64 val, tmp, config1, prev_val; + int ret = 0; ipath_dbg("Trying to bringup serdes\n"); @@ -641,6 +660,7 @@ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + prev_val = val; if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { val &= @@ -648,11 +668,9 @@ INFINIPATH_XGXS_MDIOADDR_SHIFT); /* MDIO address 3 */ val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; - change = 1; } if (val & INFINIPATH_XGXS_RESET) { val &= ~INFINIPATH_XGXS_RESET; - change = 1; } if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { @@ -661,9 +679,19 @@ INFINIPATH_XGXS_RX_POL_SHIFT); val |= dd->ipath_rx_pol_inv << INFINIPATH_XGXS_RX_POL_SHIFT; - change = 1; } - if (change) + if (dd->ipath_minrev >= 2) { + /* Rev 2. can tolerate multiple writes to PBC, and + * allowing them can provide lower latency on some + * CPUs, but this feature is off by default, only + * turned on by setting D63 of XGXSconfig reg. + * May want to make this conditional more + * fine-grained in future. This is not exactly + * related to XGXS, but where the bit ended up. + */ + val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR; + } + if (val != prev_val) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); @@ -717,9 +745,25 @@ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); } -/* this is not yet needed on this chip, so just return 0. */ static int ipath_pe_intconfig(struct ipath_devdata *dd) { + u64 val; + u32 chiprev; + + /* + * If the chip supports added error indication via GPIO pins, + * enable interrupts on those bits so the interrupt routine + * can count the events. Also set flag so interrupt routine + * can know they are expected. + */ + chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT; + if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { + /* Rev2+ reports extra errors via internal GPIO pins */ + dd->ipath_flags |= IPATH_GPIO_ERRINTRS; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val |= IPATH_GPIO_ERRINTR_MASK; + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + } return 0; } @@ -853,21 +897,23 @@ return 0; } -static void ipath_init_pe_variables(void) +static void ipath_init_pe_variables(struct ipath_devdata *dd) { /* * bits for selecting i2c direction and values, * used for I2C serial flash */ - ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - ipath_gpio_sda = IPATH_GPIO_SDA; - ipath_gpio_scl = IPATH_GPIO_SCL; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; + dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; + dd->ipath_gpio_sda = IPATH_GPIO_SDA; + dd->ipath_gpio_scl = IPATH_GPIO_SCL; /* variables for sanity checking interrupt and errors */ - infinipath_hwe_bitsextant = + dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | + (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) | INFINIPATH_HWE_PCIE1PLLFAILED | @@ -883,13 +929,13 @@ INFINIPATH_HWE_SERDESPLLFAILED | INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - infinipath_i_bitsextant = + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << INFINIPATH_I_RCVAVAIL_SHIFT) | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - infinipath_e_bitsextant = + dd->ipath_e_bitsextant = INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | @@ -907,8 +953,8 @@ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE; - infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; + dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; } /* setup the MSI stuff again after a reset. I'd like to just call @@ -1082,6 +1128,45 @@ mmiowb(); spin_unlock_irqrestore(&dd->ipath_tid_lock, flags); } +/** + * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher + * @dd: the infinipath device + * @tidptr: pointer to the expected TID (in chip) to udpate + * @tidtype: 0 for eager, 1 for expected + * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing + * + * This exists as a separate routine to allow for selection of the + * appropriate "flavor". The static calls in cleanup just use the + * revision-agnostic form, as they are not performance critical. + */ +static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + u32 __iomem *tidp32 = (u32 __iomem *)tidptr; + + if (pa != dd->ipath_tidinvalid) { + if (pa & ((1U << 11) - 1)) { + dev_info(&dd->pcidev->dev, "BUG: physaddr %lx " + "not 2KB aligned!\n", pa); + return; + } + pa >>= 11; + /* paranoia check */ + if (pa & (7<<29)) + ipath_dev_err(dd, + "BUG: Physical page address 0x%lx " + "has bits set in 31-29\n", pa); + + if (type == 0) + pa |= dd->ipath_tidtemplate; + else /* for now, always full 4KB page */ + pa |= 2 << 29; + } + if (dd->ipath_kregbase) + writel(pa, tidp32); + mmiowb(); +} + /** * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager @@ -1203,7 +1288,7 @@ /** * ipath_init_pe_get_base_info - set chip-specific flags for user code - * @dd: the infinipath device + * @pd: the infinipath port * @kbase: ipath_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs @@ -1212,6 +1297,7 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) { struct ipath_base_info *kinfo = kbase; + struct ipath_devdata *dd; if (ipath_unordered_wc()) { kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER; @@ -1220,8 +1306,20 @@ else ipath_cdbg(PROC, "Not Intel processor, WC ordered\n"); - kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; + if (pd == NULL) + goto done; + dd = pd->port_dd; + + if (dd != NULL && dd->ipath_minrev >= 2) { + ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n"); + kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE; + ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n"); + kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN; + } + +done: + kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; return 0; } @@ -1244,7 +1342,10 @@ dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes; dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; dd->ipath_f_clear_tids = ipath_pe_clear_tids; - dd->ipath_f_put_tid = ipath_pe_put_tid; + if (dd->ipath_minrev >= 2) + dd->ipath_f_put_tid = ipath_pe_put_tid_2; + else + dd->ipath_f_put_tid = ipath_pe_put_tid; dd->ipath_f_cleanup = ipath_setup_pe_cleanup; dd->ipath_f_setextled = ipath_setup_pe_setextled; dd->ipath_f_get_base_info = ipath_pe_get_base_info; @@ -1259,6 +1360,6 @@ dd->ipath_kregs = &ipath_pe_kregs; dd->ipath_cregs = &ipath_pe_cregs; - ipath_init_pe_variables(); + ipath_init_pe_variables(dd); } diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_init_chip.c newtree/drivers/infiniband/hw/ipath/ipath_init_chip.c --- oldtree/drivers/infiniband/hw/ipath/ipath_init_chip.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_init_chip.c 2006-09-30 05:21:44.000000000 -0400 @@ -88,13 +88,13 @@ static int create_port0_egr(struct ipath_devdata *dd) { unsigned e, egrcnt; - struct sk_buff **skbs; + struct ipath_skbinfo *skbinfo; int ret; egrcnt = dd->ipath_rcvegrcnt; - skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt); - if (skbs == NULL) { + skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); + if (skbinfo == NULL) { ipath_dev_err(dd, "allocation error for eager TID " "skb array\n"); ret = -ENOMEM; @@ -109,13 +109,13 @@ * 4 bytes so that the data buffer stays word aligned. * See ipath_kreceive() for more details. */ - skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL); - if (!skbs[e]) { + skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL); + if (!skbinfo[e].skb) { ipath_dev_err(dd, "SKB allocation error for " "eager TID %u\n", e); while (e != 0) - dev_kfree_skb(skbs[--e]); - vfree(skbs); + dev_kfree_skb(skbinfo[--e].skb); + vfree(skbinfo); ret = -ENOMEM; goto bail; } @@ -124,14 +124,17 @@ * After loop above, so we can test non-NULL to see if ready * to use at receive, etc. */ - dd->ipath_port0_skbs = skbs; + dd->ipath_port0_skbinfo = skbinfo; for (e = 0; e < egrcnt; e++) { - unsigned long phys = - virt_to_phys(dd->ipath_port0_skbs[e]->data); + dd->ipath_port0_skbinfo[e].phys = + ipath_map_single(dd->pcidev, + dd->ipath_port0_skbinfo[e].skb->data, + dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); dd->ipath_f_put_tid(dd, e + (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), 0, phys); + dd->ipath_rcvegrbase), 0, + dd->ipath_port0_skbinfo[e].phys); } ret = 0; @@ -432,16 +435,33 @@ */ static void init_shadow_tids(struct ipath_devdata *dd) { - dd->ipath_pageshadow = (struct page **) - vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + struct page **pages; + dma_addr_t *addrs; + + pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * sizeof(struct page *)); - if (!dd->ipath_pageshadow) + if (!pages) { ipath_dev_err(dd, "failed to allocate shadow page * " "array, no expected sends!\n"); - else - memset(dd->ipath_pageshadow, 0, - dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(struct page *)); + dd->ipath_pageshadow = NULL; + return; + } + + addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(dma_addr_t)); + if (!addrs) { + ipath_dev_err(dd, "failed to allocate shadow dma handle " + "array, no expected sends!\n"); + vfree(dd->ipath_pageshadow); + dd->ipath_pageshadow = NULL; + return; + } + + memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(struct page *)); + + dd->ipath_pageshadow = pages; + dd->ipath_physshadow = addrs; } static void enable_chip(struct ipath_devdata *dd, diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_intr.c newtree/drivers/infiniband/hw/ipath/ipath_intr.c --- oldtree/drivers/infiniband/hw/ipath/ipath_intr.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_intr.c 2006-09-30 05:21:44.000000000 -0400 @@ -37,6 +37,50 @@ #include "ipath_verbs.h" #include "ipath_common.h" +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + */ +void ipath_disarm_senderrbufs(struct ipath_devdata *dd) +{ + u32 piobcnt; + unsigned long sbuf[4]; + /* + * it's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling + */ + piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + /* read these before writing errorclear */ + sbuf[0] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror); + sbuf[1] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 1); + if (piobcnt > 128) { + sbuf[2] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 2); + sbuf[3] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 3); + } + + if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { + int i; + if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) { + __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, + "SendbufErrs %lx %lx", sbuf[0], + sbuf[1]); + if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) + printk(" %lx %lx ", sbuf[2], sbuf[3]); + printk("\n"); + } + + for (i = 0; i < piobcnt; i++) + if (test_bit(i, sbuf)) + ipath_disarm_piobufs(dd, i, 1); + dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ + } +} + + /* These are all rcv-related errors which we want to count for stats */ #define E_SUM_PKTERRS \ (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ @@ -68,53 +112,9 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) { - unsigned long sbuf[4]; u64 ignore_this_time = 0; - u32 piobcnt; - /* if possible that sendbuffererror could be valid */ - piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - /* read these before writing errorclear */ - sbuf[0] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror); - sbuf[1] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 1); - if (piobcnt > 128) { - sbuf[2] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 2); - sbuf[3] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 3); - } - - if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { - int i; - - ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]); - if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) - printk("%lx %lx ", sbuf[2], sbuf[3]); - for (i = 0; i < piobcnt; i++) { - if (test_bit(i, sbuf)) { - u32 __iomem *piobuf; - if (i < dd->ipath_piobcnt2k) - piobuf = (u32 __iomem *) - (dd->ipath_pio2kbase + - i * dd->ipath_palign); - else - piobuf = (u32 __iomem *) - (dd->ipath_pio4kbase + - (i - dd->ipath_piobcnt2k) * - dd->ipath_4kalign); - - ipath_cdbg(PKT, - "PIObuf[%u] @%p pbc is %x; ", - i, piobuf, readl(piobuf)); - - ipath_disarm_piobufs(dd, i, 1); - } - } - if (ipath_debug & __IPATH_PKTDBG) - printk("\n"); - } + ipath_disarm_senderrbufs(dd); if ((errs & E_SUM_LINK_PKTERRS) && !(dd->ipath_flags & IPATH_LINKACTIVE)) { /* @@ -132,6 +132,82 @@ return ignore_this_time; } +/* generic hw error messages... */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \ + .msg = "TXE " #a " Memory Parity" \ + } +#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \ + .msg = "RXE " #a " Memory Parity" \ + } + +static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"), + INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"), + + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO), + + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO), +}; + +/** + * ipath_format_hwmsg - format a single hwerror message + * @msg message buffer + * @msgl length of message buffer + * @hwmsg message to add to message buffer + */ +static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) +{ + strlcat(msg, "[", msgl); + strlcat(msg, hwmsg, msgl); + strlcat(msg, "]", msgl); +} + +/** + * ipath_format_hwerrors - format hardware error messages for display + * @hwerrs hardware errors bit vector + * @hwerrmsgs hardware error descriptions + * @nhwerrmsgs number of hwerrmsgs + * @msg message buffer + * @msgl message buffer length + */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t msgl) +{ + int i; + const int glen = + sizeof(ipath_generic_hwerror_msgs) / + sizeof(ipath_generic_hwerror_msgs[0]); + + for (i=0; iipath_f_handle_hwerrors(dd, msg, sizeof msg); } - if (!noprint && (errs & ~infinipath_e_bitsextant)) + if (!noprint && (errs & ~dd->ipath_e_bitsextant)) ipath_dev_err(dd, "error interrupt with unknown errors " "%llx set\n", (unsigned long long) - (errs & ~infinipath_e_bitsextant)); + (errs & ~dd->ipath_e_bitsextant)); if (errs & E_SUM_ERRS) ignore_this_time = handle_e_sum_errs(dd, errs); @@ -478,6 +554,14 @@ ~(INFINIPATH_E_HARDWARE | INFINIPATH_E_IBSTATUSCHANGED); } + + /* likely due to cancel, so suppress */ + if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && + dd->ipath_lastcancel > jiffies) { + ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n"); + errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); + } + if (!errs) return 0; @@ -529,7 +613,7 @@ * don't report same point multiple times, * except kernel */ - tl = (u32) * pd->port_rcvhdrtail_kvaddr; + tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; if (tl == dd->ipath_lastrcvhdrqtails[i]) continue; hd = ipath_read_ureg32(dd, ur_rcvhdrhead, @@ -729,9 +813,9 @@ int rcvdint = 0; portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & - infinipath_i_rcvavail_mask) + dd->ipath_i_rcvavail_mask) | ((istat >> INFINIPATH_I_RCVURG_SHIFT) & - infinipath_i_rcvurg_mask); + dd->ipath_i_rcvurg_mask); for (i = 1; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; if (portr & (1 << i) && pd && pd->port_cnt && @@ -808,7 +892,7 @@ if (oldhead != curtail) { if (dd->ipath_flags & IPATH_GPIO_INTR) { ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) (1 << 2)); + (u64) (1 << IPATH_GPIO_PORT0_BIT)); istat = port0rbits | INFINIPATH_I_GPIO; } else @@ -838,10 +922,10 @@ if (unexpected) unexpected = 0; - if (unlikely(istat & ~infinipath_i_bitsextant)) + if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, "interrupt with unknown interrupts %x set\n", - istat & (u32) ~ infinipath_i_bitsextant); + istat & (u32) ~ dd->ipath_i_bitsextant); else ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); @@ -867,26 +951,80 @@ if (istat & INFINIPATH_I_GPIO) { /* - * Packets are available in the port 0 rcv queue. - * Eventually this needs to be generalized to check - * IPATH_GPIO_INTR, and the specific GPIO bit, if - * GPIO interrupts are used for anything else. - */ - if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) { - u32 gpiostatus; - gpiostatus = ipath_read_kreg32( - dd, dd->ipath_kregs->kr_gpio_status); - ipath_dbg("Unexpected GPIO interrupt bits %x\n", - gpiostatus); - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - gpiostatus); + * GPIO interrupts fall in two broad classes: + * GPIO_2 indicates (on some HT4xx boards) that a packet + * has arrived for Port 0. Checking for this + * is controlled by flag IPATH_GPIO_INTR. + * GPIO_3..5 on IBA6120 Rev2 chips indicate errors + * that we need to count. Checking for this + * is controlled by flag IPATH_GPIO_ERRINTRS. + */ + u32 gpiostatus; + u32 to_clear = 0; + + gpiostatus = ipath_read_kreg32( + dd, dd->ipath_kregs->kr_gpio_status); + /* First the error-counter case. + */ + if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) && + (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) { + /* want to clear the bits we see asserted. */ + to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK); + + /* + * Count appropriately, clear bits out of our copy, + * as they have been "handled". + */ + if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) { + ipath_dbg("FlowCtl on UnsupVL\n"); + dd->ipath_rxfc_unsupvl_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) { + ipath_dbg("Overrun Threshold exceeded\n"); + dd->ipath_overrun_thresh_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) { + ipath_dbg("Local Link Integrity error\n"); + dd->ipath_lli_errs++; + } + gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK; } - else { - /* Clear GPIO status bit 2 */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) (1 << 2)); + /* Now the Port0 Receive case */ + if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) && + (dd->ipath_flags & IPATH_GPIO_INTR)) { + /* + * GPIO status bit 2 is set, and we expected it. + * clear it and indicate in p0bits. + * This probably only happens if a Port0 pkt + * arrives at _just_ the wrong time, and we + * handle that by seting chk0rcv; + */ + to_clear |= (1 << IPATH_GPIO_PORT0_BIT); + gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); chk0rcv = 1; } + if (unlikely(gpiostatus)) { + /* + * Some unexpected bits remain. If they could have + * caused the interrupt, complain and clear. + * MEA: this is almost certainly non-ideal. + * we should look into auto-disable of unexpected + * GPIO interrupts, possibly on a "three strikes" + * basis. + */ + u32 mask; + mask = ipath_read_kreg32( + dd, dd->ipath_kregs->kr_gpio_mask); + if (mask & gpiostatus) { + ipath_dbg("Unexpected GPIO IRQ bits %x\n", + gpiostatus & mask); + to_clear |= (gpiostatus & mask); + } + } + if (to_clear) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, + (u64) to_clear); + } } chk0rcv |= istat & port0rbits; @@ -911,9 +1049,9 @@ istat &= ~port0rbits; } - if (istat & ((infinipath_i_rcvavail_mask << + if (istat & ((dd->ipath_i_rcvavail_mask << INFINIPATH_I_RCVAVAIL_SHIFT) - | (infinipath_i_rcvurg_mask << + | (dd->ipath_i_rcvurg_mask << INFINIPATH_I_RCVURG_SHIFT))) handle_urcv(dd, istat); diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_kernel.h newtree/drivers/infiniband/hw/ipath/ipath_kernel.h --- oldtree/drivers/infiniband/hw/ipath/ipath_kernel.h 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_kernel.h 2006-09-30 05:21:44.000000000 -0400 @@ -39,6 +39,8 @@ */ #include +#include +#include #include #include "ipath_common.h" @@ -62,7 +64,7 @@ /* rcvhdrq base, needs mmap before useful */ void *port_rcvhdrq; /* kernel virtual address where hdrqtail is updated */ - volatile __le64 *port_rcvhdrtail_kvaddr; + void *port_rcvhdrtail_kvaddr; /* * temp buffer for expected send setup, allocated at open, instead * of each setup call @@ -79,8 +81,8 @@ dma_addr_t port_rcvhdrq_phys; dma_addr_t port_rcvhdrqtailaddr_phys; /* - * number of opens on this instance (0 or 1; ignoring forks, dup, - * etc. for now) + * number of opens (including slave subports) on this instance + * (ignoring forks, dup, etc. for now) */ int port_cnt; /* @@ -89,6 +91,10 @@ */ /* instead of calculating it */ unsigned port_port; + /* non-zero if port is being shared. */ + u16 port_subport_cnt; + /* non-zero if port is being shared. */ + u16 port_subport_id; /* chip offset of PIO buffers for this port */ u32 port_piobufs; /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ @@ -121,6 +127,16 @@ u16 port_pkeys[4]; /* so file ops can get at unit */ struct ipath_devdata *port_dd; + /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ + void *subport_uregbase; + /* An array of pages for the eager receive buffers * N */ + void *subport_rcvegrbuf; + /* An array of pages for the eager header queue entries * N */ + void *subport_rcvhdr_base; + /* The version of the library which opened this port */ + u32 userversion; + /* Bitmask of active slaves */ + u32 active_slaves; }; struct sk_buff; @@ -132,6 +148,11 @@ void *l_arg; }; +struct ipath_skbinfo { + struct sk_buff *skb; + dma_addr_t phys; +}; + struct ipath_devdata { struct list_head ipath_list; @@ -154,7 +175,7 @@ /* ipath_cfgports pointers */ struct ipath_portdata **ipath_pd; /* sk_buffs used by port 0 eager receive queue */ - struct sk_buff **ipath_port0_skbs; + struct ipath_skbinfo *ipath_port0_skbinfo; /* kvirt address of 1st 2k pio buffer */ void __iomem *ipath_pio2kbase; /* kvirt address of 1st 4k pio buffer */ @@ -315,12 +336,16 @@ u8 ipath_ht_slave_off; /* for write combining settings */ unsigned long ipath_wc_cookie; + unsigned long ipath_wc_base; + unsigned long ipath_wc_len; /* ref count for each pkey */ atomic_t ipath_pkeyrefs[4]; /* shadow copy of all exptids physaddr; used only by funcsim */ u64 *ipath_tidsimshadow; /* shadow copy of struct page *'s for exp tid pages */ struct page **ipath_pageshadow; + /* shadow copy of dma handles for exp tid pages */ + dma_addr_t *ipath_physshadow; /* lock to workaround chip bug 9437 */ spinlock_t ipath_tid_lock; @@ -402,6 +427,9 @@ unsigned long ipath_rcvctrl; /* shadow kr_sendctrl */ unsigned long ipath_sendctrl; + /* ports waiting for PIOavail intr */ + unsigned long ipath_portpiowait; + unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */ /* value we put in kr_rcvhdrcnt */ u32 ipath_rcvhdrcnt; @@ -465,8 +493,6 @@ u32 ipath_htwidth; /* HT speed (200,400,800,1000) from HT config */ u32 ipath_htspeed; - /* ports waiting for PIOavail intr */ - unsigned long ipath_portpiowait; /* * number of sequential ibcstatus change for polling active/quiet * (i.e., link not coming up). @@ -510,8 +536,47 @@ u32 ipath_lli_counter; /* local link integrity errors */ u32 ipath_lli_errors; + /* + * Above counts only cases where _successive_ LocalLinkIntegrity + * errors were seen in the receive headers of kern-packets. + * Below are the three (monotonically increasing) counters + * maintained via GPIO interrupts on iba6120-rev2. + */ + u32 ipath_rxfc_unsupvl_errs; + u32 ipath_overrun_thresh_errs; + u32 ipath_lli_errs; + + /* + * Not all devices managed by a driver instance are the same + * type, so these fields must be per-device. + */ + u64 ipath_i_bitsextant; + ipath_err_t ipath_e_bitsextant; + ipath_err_t ipath_hwe_bitsextant; + + /* + * Below should be computable from number of ports, + * since they are never modified. + */ + u32 ipath_i_rcvavail_mask; + u32 ipath_i_rcvurg_mask; + + /* + * Register bits for selecting i2c direction and values, used for + * I2C serial flash. + */ + u16 ipath_gpio_sda_num; + u16 ipath_gpio_scl_num; + u64 ipath_gpio_sda; + u64 ipath_gpio_scl; }; +/* Private data for file operations */ +struct ipath_filedata { + struct ipath_portdata *pd; + unsigned subport; + unsigned tidcursor; +}; extern struct list_head ipath_dev_list; extern spinlock_t ipath_devs_lock; extern struct ipath_devdata *ipath_lookup(int unit); @@ -521,6 +586,7 @@ void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); +void ipath_disarm_senderrbufs(struct ipath_devdata *); struct file_operations; int ipath_cdev_init(int minor, char *name, struct file_operations *fops, @@ -572,7 +638,11 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* for use in system calls, where we want to know device type, etc. */ -#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) +#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd +#define subport_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->subport +#define tidcursor_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->tidcursor /* * values for ipath_flags @@ -612,6 +682,15 @@ /* can miss port0 rx interrupts */ #define IPATH_POLL_RX_INTR 0x40000 #define IPATH_DISABLED 0x80000 /* administratively disabled */ + /* Use GPIO interrupts for new counters */ +#define IPATH_GPIO_ERRINTRS 0x100000 + +/* Bits in GPIO for the added interrupts */ +#define IPATH_GPIO_PORT0_BIT 2 +#define IPATH_GPIO_RXUVL_BIT 3 +#define IPATH_GPIO_OVRUN_BIT 4 +#define IPATH_GPIO_LLI_BIT 5 +#define IPATH_GPIO_ERRINTR_MASK 0x38 /* portdata flag bit offsets */ /* waiting for a packet to arrive */ @@ -799,6 +878,13 @@ int ipathfs_remove_device(struct ipath_devdata *); /* + * dma_addr wrappers - all 0's invalid for hw + */ +dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long, + size_t, int); +dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); + +/* * Flush write combining store buffers (if present) and perform a write * barrier. */ @@ -855,4 +941,20 @@ #endif /* _IPATH_DEBUGGING */ +/* + * this is used for formatting hw error messages... + */ +struct ipath_hwerror_msgs { + u64 mask; + const char *msg; +}; + +#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b } + +/* in ipath_intr.c... */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t lmsg); + #endif /* _IPATH_KERNEL_H */ diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_keys.c newtree/drivers/infiniband/hw/ipath/ipath_keys.c --- oldtree/drivers/infiniband/hw/ipath/ipath_keys.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_keys.c 2006-09-30 05:21:44.000000000 -0400 @@ -118,9 +118,10 @@ * Check the IB SGE for validity and initialize our internal version * of it. */ -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ib_sge *sge, int acc) { + struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; struct ipath_mregion *mr; unsigned n, m; size_t off; @@ -140,7 +141,8 @@ goto bail; } mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey)) { + if (unlikely(mr == NULL || mr->lkey != sge->lkey || + qp->ibqp.pd != mr->pd)) { ret = 0; goto bail; } @@ -188,9 +190,10 @@ * * Return 1 if successful, otherwise 0. */ -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, u32 len, u64 vaddr, u32 rkey, int acc) { + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_lkey_table *rkt = &dev->lk_table; struct ipath_sge *sge = &ss->sge; struct ipath_mregion *mr; @@ -214,7 +217,8 @@ } mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey)) { + if (unlikely(mr == NULL || mr->lkey != rkey || + qp->ibqp.pd != mr->pd)) { ret = 0; goto bail; } diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_mad.c newtree/drivers/infiniband/hw/ipath/ipath_mad.c --- oldtree/drivers/infiniband/hw/ipath/ipath_mad.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_mad.c 2006-09-30 05:21:44.000000000 -0400 @@ -87,7 +87,8 @@ struct ipath_devdata *dd = to_idev(ibdev)->dd; u32 vendor, majrev, minrev; - if (smp->attr_mod) + /* GUID 0 is illegal */ + if (smp->attr_mod || (dd->ipath_guid == 0)) smp->status |= IB_SMP_INVALID_FIELD; nip->base_version = 1; @@ -131,10 +132,15 @@ * We only support one GUID for now. If this changes, the * portinfo.guid_cap field needs to be updated too. */ - if (startgx == 0) - /* The first is a copy of the read-only HW GUID. */ - *p = to_idev(ibdev)->dd->ipath_guid; - else + if (startgx == 0) { + __be64 g = to_idev(ibdev)->dd->ipath_guid; + if (g == 0) + /* GUID 0 is illegal */ + smp->status |= IB_SMP_INVALID_FIELD; + else + /* The first is a copy of the read-only HW GUID. */ + *p = g; + } else smp->status |= IB_SMP_INVALID_FIELD; return reply(smp); diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_mr.c newtree/drivers/infiniband/hw/ipath/ipath_mr.c --- oldtree/drivers/infiniband/hw/ipath/ipath_mr.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_mr.c 2006-09-30 05:21:44.000000000 -0400 @@ -138,6 +138,7 @@ goto bail; } + mr->mr.pd = pd; mr->mr.user_base = *iova_start; mr->mr.iova = *iova_start; mr->mr.length = 0; @@ -197,6 +198,7 @@ goto bail; } + mr->mr.pd = pd; mr->mr.user_base = region->user_base; mr->mr.iova = region->virt_base; mr->mr.length = region->length; @@ -289,6 +291,7 @@ * Resources are allocated but no valid mapping (RKEY can't be * used). */ + fmr->mr.pd = pd; fmr->mr.user_base = 0; fmr->mr.iova = 0; fmr->mr.length = 0; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_qp.c newtree/drivers/infiniband/hw/ipath/ipath_qp.c --- oldtree/drivers/infiniband/hw/ipath/ipath_qp.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_qp.c 2006-09-30 05:21:44.000000000 -0400 @@ -335,6 +335,7 @@ qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; + qp->r_wrid_valid = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -342,6 +343,7 @@ qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; + qp->s_wait_credit = 0; if (qp->r_rq.wq) { qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; @@ -352,12 +354,13 @@ /** * ipath_error_qp - put a QP into an error state * @qp: the QP to put into an error state + * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * QP s_lock should be held and interrupts disabled. */ -void ipath_error_qp(struct ipath_qp *qp) +void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; @@ -373,7 +376,6 @@ list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.status = IB_WC_WR_FLUSH_ERR; wc.vendor_err = 0; wc.byte_len = 0; wc.imm_data = 0; @@ -385,6 +387,12 @@ wc.sl = 0; wc.dlid_path_bits = 0; wc.port_num = 0; + if (qp->r_wrid_valid) { + qp->r_wrid_valid = 0; + wc.status = err; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; while (qp->s_last != qp->s_head) { struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); @@ -501,7 +509,7 @@ break; case IB_QPS_ERR: - ipath_error_qp(qp); + ipath_error_qp(qp, IB_WC_GENERAL_ERR); break; default: @@ -516,7 +524,7 @@ qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn; + qp->s_psn = qp->s_next_psn = attr->sq_psn; qp->s_last_psn = qp->s_next_psn - 1; } diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_rc.c newtree/drivers/infiniband/hw/ipath/ipath_rc.c --- oldtree/drivers/infiniband/hw/ipath/ipath_rc.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_rc.c 2006-09-30 05:21:44.000000000 -0400 @@ -201,6 +201,18 @@ qp->s_rnr_timeout) goto done; + /* Limit the number of packets sent without an ACK. */ + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { + qp->s_wait_credit = 1; + dev->n_rc_stalls++; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); + goto done; + } + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; bth0 = 0; @@ -221,7 +233,7 @@ /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) goto done; - qp->s_psn = wqe->psn = qp->s_next_psn; + wqe->psn = qp->s_next_psn; newreq = 1; } /* @@ -393,12 +405,6 @@ ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -435,12 +441,6 @@ ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -498,6 +498,8 @@ */ goto done; } + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) + bth2 |= 1 << 31; /* Request ACK. */ qp->s_len -= len; qp->s_hdrwords = hwords; qp->s_cur_sge = ss; @@ -737,6 +739,15 @@ return; } +static inline void update_last_psn(struct ipath_qp *qp, u32 psn) +{ + if (qp->s_wait_credit) { + qp->s_wait_credit = 0; + tasklet_hi_schedule(&qp->s_task); + } + qp->s_last_psn = psn; +} + /** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on @@ -805,7 +816,7 @@ * The last valid PSN seen is the previous * request's. */ - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ ipath_restart_rc(qp, wqe->psn, &wc); /* @@ -864,7 +875,7 @@ ipath_get_credit(qp, aeth); qp->s_rnr_retry = qp->s_rnr_retry_cnt; qp->s_retry = qp->s_retry_cnt; - qp->s_last_psn = psn; + update_last_psn(qp, psn); ret = 1; goto bail; @@ -883,7 +894,7 @@ goto bail; /* The last valid PSN is the previous PSN. */ - qp->s_last_psn = psn - 1; + update_last_psn(qp, psn - 1); dev->n_rc_resends += (int)qp->s_psn - (int)psn; @@ -898,7 +909,7 @@ case 3: /* NAK */ /* The last valid PSN seen is the previous request's. */ if (qp->s_last != qp->s_tail) - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ @@ -1071,7 +1082,7 @@ * since we don't want s_sge modified. */ qp->s_len -= pmtu; - qp->s_last_psn = psn; + update_last_psn(qp, psn); spin_unlock_irqrestore(&qp->s_lock, flags); ipath_copy_sge(&qp->s_sge, data, pmtu); goto bail; @@ -1223,7 +1234,7 @@ * Address range must be a subset of the original * request and start on pmtu boundaries. */ - ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, + ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, qp->s_rdma_len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) { @@ -1282,6 +1293,14 @@ return 1; } +static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +{ + spin_lock_irq(&qp->s_lock); + qp->state = IB_QPS_ERR; + ipath_error_qp(qp, err); + spin_unlock_irq(&qp->s_lock); +} + /** * ipath_rc_rcv - process an incoming RC packet * @dev: the device this packet came in on @@ -1309,6 +1328,10 @@ struct ib_reth *reth; int header_in_data; + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + /* Check for GRH */ if (!has_grh) { ohdr = &hdr->u.oth; @@ -1370,8 +1393,7 @@ */ if (qp->r_ack_state >= OP(COMPARE_SWAP)) goto send_ack; - /* XXX Flush WQEs */ - qp->state = IB_QPS_ERR; + ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); qp->r_ack_state = OP(SEND_ONLY); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; @@ -1477,9 +1499,9 @@ goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_ONLY)) + if (!qp->r_wrid_valid) break; + qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; @@ -1517,7 +1539,7 @@ int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(dev, &qp->r_sge, + ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) @@ -1559,7 +1581,7 @@ int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, + ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, qp->s_rdma_len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) { @@ -1618,7 +1640,7 @@ goto nack_inv; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc; @@ -1670,8 +1692,7 @@ * is pending though. */ if (qp->r_ack_state < OP(COMPARE_SWAP)) { - /* XXX Flush WQEs */ - qp->state = IB_QPS_ERR; + ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); qp->r_ack_state = OP(RDMA_WRITE_ONLY); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_registers.h newtree/drivers/infiniband/hw/ipath/ipath_registers.h --- oldtree/drivers/infiniband/hw/ipath/ipath_registers.h 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_registers.h 2006-09-30 05:21:44.000000000 -0400 @@ -134,10 +134,24 @@ #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 #define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL #define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44 -#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL -#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL #define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL #define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL +/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL +/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL +/* waldo specific -- find the rest in ipath_6110.c */ +#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL +/* monty specific -- find the rest in ipath_6120.c */ +#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL /* kr_hwdiagctrl bits */ #define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL @@ -209,9 +223,9 @@ /* combination link status states that we use with some frequency */ #define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \ - << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ + << INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \ (INFINIPATH_IBCS_LINKSTATE_MASK \ - <ibqp.device); int user = to_ipd(qp->ibqp.pd)->user; int i, j, ret; struct ib_wc wc; @@ -119,8 +118,7 @@ continue; /* Check LKEY */ if ((user && wqe->sg_list[i].lkey == 0) || - !ipath_lkey_ok(&dev->lk_table, - &qp->r_sg_list[j], &wqe->sg_list[i], + !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; @@ -231,6 +229,7 @@ } } spin_unlock_irqrestore(&rq->lock, flags); + qp->r_wrid_valid = 1; bail: return ret; @@ -326,7 +325,7 @@ case IB_WR_RDMA_WRITE: if (wqe->length == 0) break; - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_WRITE))) { @@ -350,7 +349,7 @@ break; case IB_WR_RDMA_READ: - if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) @@ -365,7 +364,7 @@ case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64), + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_ATOMIC))) @@ -575,8 +574,7 @@ } if (wr->sg_list[i].length == 0) continue; - if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table, - &wqe->sg_list[j], &wr->sg_list[i], + if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i], acc)) { spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EINVAL; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_srq.c newtree/drivers/infiniband/hw/ipath/ipath_srq.c --- oldtree/drivers/infiniband/hw/ipath/ipath_srq.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_srq.c 2006-09-30 05:21:44.000000000 -0400 @@ -104,11 +104,6 @@ u32 sz; struct ib_srq *ret; - if (dev->n_srqs_allocated == ib_ipath_max_srqs) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); goto done; @@ -180,10 +175,17 @@ spin_lock_init(&srq->rq.lock); srq->rq.wq->head = 0; srq->rq.wq->tail = 0; - srq->rq.max_sge = srq_init_attr->attr.max_sge; srq->limit = srq_init_attr->attr.srq_limit; - dev->n_srqs_allocated++; + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == ib_ipath_max_srqs) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); ret = &srq->ibsrq; goto done; @@ -351,8 +353,13 @@ struct ipath_srq *srq = to_isrq(ibsrq); struct ipath_ibdev *dev = to_idev(ibsrq->device); + spin_lock(&dev->n_srqs_lock); dev->n_srqs_allocated--; - vfree(srq->rq.wq); + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, ipath_release_mmap_info); + else + vfree(srq->rq.wq); kfree(srq); return 0; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_sysfs.c newtree/drivers/infiniband/hw/ipath/ipath_sysfs.c --- oldtree/drivers/infiniband/hw/ipath/ipath_sysfs.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_sysfs.c 2006-09-30 05:21:44.000000000 -0400 @@ -257,7 +257,7 @@ struct ipath_devdata *dd = dev_get_drvdata(dev); ssize_t ret; unsigned short guid[8]; - __be64 nguid; + __be64 new_guid; u8 *ng; int i; @@ -266,7 +266,7 @@ &guid[4], &guid[5], &guid[6], &guid[7]) != 8) goto invalid; - ng = (u8 *) &nguid; + ng = (u8 *) &new_guid; for (i = 0; i < 8; i++) { if (guid[i] > 0xff) @@ -274,7 +274,10 @@ ng[i] = guid[i]; } - dd->ipath_guid = nguid; + if (new_guid == 0) + goto invalid; + + dd->ipath_guid = new_guid; dd->ipath_nguid = 1; ret = strlen(buf); @@ -297,6 +300,16 @@ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid); } +static ssize_t show_nports(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + /* Return the number of user ports available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1); +} + static ssize_t show_serial(struct device *dev, struct device_attribute *attr, char *buf) @@ -608,6 +621,7 @@ static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu); static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled); static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL); +static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset); static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); @@ -623,6 +637,7 @@ &dev_attr_mlid.attr, &dev_attr_mtu.attr, &dev_attr_nguid.attr, + &dev_attr_nports.attr, &dev_attr_serial.attr, &dev_attr_status.attr, &dev_attr_status_str.attr, diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_uc.c newtree/drivers/infiniband/hw/ipath/ipath_uc.c --- oldtree/drivers/infiniband/hw/ipath/ipath_uc.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_uc.c 2006-09-30 05:21:44.000000000 -0400 @@ -246,6 +246,10 @@ struct ib_reth *reth; int header_in_data; + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + /* Check for GRH */ if (!has_grh) { ohdr = &hdr->u.oth; @@ -440,7 +444,7 @@ int ok; /* Check rkey */ - ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len, + ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) { diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_ud.c newtree/drivers/infiniband/hw/ipath/ipath_ud.c --- oldtree/drivers/infiniband/hw/ipath/ipath_ud.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_ud.c 2006-09-30 05:21:44.000000000 -0400 @@ -39,7 +39,6 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, u32 *lengthp, struct ipath_sge_state *ss) { - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int user = to_ipd(qp->ibqp.pd)->user; int i, j, ret; struct ib_wc wc; @@ -50,8 +49,7 @@ continue; /* Check LKEY */ if ((user && wqe->sg_list[i].lkey == 0) || - !ipath_lkey_ok(&dev->lk_table, - j ? &ss->sg_list[j - 1] : &ss->sge, + !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; *lengthp += wqe->sg_list[i].length; @@ -343,7 +341,7 @@ if (wr->sg_list[i].length == 0) continue; - if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ? + if (!ipath_lkey_ok(qp, ss.num_sge ? sg_list + ss.num_sge - 1 : &ss.sge, &wr->sg_list[i], 0)) { ret = -EINVAL; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_user_pages.c newtree/drivers/infiniband/hw/ipath/ipath_user_pages.c --- oldtree/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-09-30 05:21:44.000000000 -0400 @@ -90,6 +90,62 @@ } /** + * ipath_map_page - a safety wrapper around pci_map_page() + * + * A dma_addr of all 0's is interpreted by the chip as "disabled". + * Unfortunately, it can also be a valid dma_addr returned on some + * architectures. + * + * The powerpc iommu assigns dma_addrs in ascending order, so we don't + * have to bother with retries or mapping a dummy page to insure we + * don't just get the same mapping again. + * + * I'm sure we won't be so lucky with other iommu's, so FIXME. + */ +dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + dma_addr_t phys; + + phys = pci_map_page(hwdev, page, offset, size, direction); + + if (phys == 0) { + pci_unmap_page(hwdev, phys, size, direction); + phys = pci_map_page(hwdev, page, offset, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** + * ipath_map_single - a safety wrapper around pci_map_single() + * + * Same idea as ipath_map_page(). + */ +dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size, + int direction) +{ + dma_addr_t phys; + + phys = pci_map_single(hwdev, ptr, size, direction); + + if (phys == 0) { + pci_unmap_single(hwdev, phys, size, direction); + phys = pci_map_single(hwdev, ptr, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** * ipath_get_user_pages - lock user pages into memory * @start_page: the start page * @num_pages: the number of pages diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_verbs.c newtree/drivers/infiniband/hw/ipath/ipath_verbs.c --- oldtree/drivers/infiniband/hw/ipath/ipath_verbs.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_verbs.c 2006-09-30 05:21:44.000000000 -0400 @@ -898,7 +898,8 @@ ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); + ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) + + dd->ipath_rxfc_unsupvl_errs; cntrs->port_rcv_remphys_errors = ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); cntrs->port_xmit_discards = @@ -911,8 +912,10 @@ ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); cntrs->port_rcv_packets = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - cntrs->local_link_integrity_errors = dd->ipath_lli_errors; - cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ + cntrs->local_link_integrity_errors = + (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors; + cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs; ret = 0; @@ -1199,6 +1202,7 @@ struct ipath_ah *ah; struct ib_ah *ret; struct ipath_ibdev *dev = to_idev(pd->device); + unsigned long flags; /* A multicast address requires a GRH (see ch. 8.4.1). */ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && @@ -1225,16 +1229,16 @@ goto bail; } - spin_lock(&dev->n_ahs_lock); + spin_lock_irqsave(&dev->n_ahs_lock, flags); if (dev->n_ahs_allocated == ib_ipath_max_ahs) { - spin_unlock(&dev->n_ahs_lock); + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); kfree(ah); ret = ERR_PTR(-ENOMEM); goto bail; } dev->n_ahs_allocated++; - spin_unlock(&dev->n_ahs_lock); + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); /* ib_create_ah() will initialize ah->ibah. */ ah->attr = *ah_attr; @@ -1255,10 +1259,11 @@ { struct ipath_ibdev *dev = to_idev(ibah->device); struct ipath_ah *ah = to_iah(ibah); + unsigned long flags; - spin_lock(&dev->n_ahs_lock); + spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; - spin_unlock(&dev->n_ahs_lock); + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); kfree(ah); @@ -1380,11 +1385,13 @@ * processing. */ if (dd->ipath_flags & IPATH_GPIO_INTR) { + u64 val; ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, 0x2074076542310ULL); /* Enable GPIO bit 2 interrupt */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - (u64) (1 << 2)); + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val |= (u64) (1 << IPATH_GPIO_PORT0_BIT); + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); } init_timer(&dd->verbs_timer); @@ -1399,8 +1406,17 @@ static int disable_timer(struct ipath_devdata *dd) { /* Disable GPIO bit 2 interrupt */ - if (dd->ipath_flags & IPATH_GPIO_INTR) - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); + if (dd->ipath_flags & IPATH_GPIO_INTR) { + u64 val; + /* Disable GPIO bit 2 interrupt */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + /* + * We might want to undo changes to debugportselect, + * but how? + */ + } del_timer_sync(&dd->verbs_timer); @@ -1683,6 +1699,7 @@ "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" + "RC stalls %d\n" "piobuf wait %d\n" "no piobuf %d\n" "PKT drops %d\n" @@ -1690,7 +1707,7 @@ dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_piowait, + dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_verbs.h newtree/drivers/infiniband/hw/ipath/ipath_verbs.h --- oldtree/drivers/infiniband/hw/ipath/ipath_verbs.h 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_verbs.h 2006-09-30 05:21:44.000000000 -0400 @@ -220,6 +220,7 @@ }; struct ipath_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ u64 user_base; /* User's address for this region */ u64 iova; /* IB start address of this region */ size_t length; @@ -364,12 +365,14 @@ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 r_reuse_sge; /* for UC receive errors */ u8 r_sge_inx; /* current index into sg_list */ + u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ u8 qp_access_flags; u8 s_max_sge; /* size of s_wq->sg_list */ u8 s_retry_cnt; /* number of times to retry */ u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; @@ -393,6 +396,8 @@ #define IPATH_S_BUSY 0 #define IPATH_S_SIGNAL_REQ_WR 1 +#define IPATH_PSN_CREDIT 2048 + /* * Since struct ipath_swqe is not a fixed size, we can't simply index into * struct ipath_qp.s_wq. This function does the array index computation. @@ -521,6 +526,7 @@ u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; + u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; @@ -634,6 +640,8 @@ int ipath_destroy_qp(struct ib_qp *ibqp); +void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err); + int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); @@ -653,12 +661,6 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, - u32 len, u64 vaddr, u32 rkey, int acc); - -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, - struct ib_sge *sge, int acc); - void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); @@ -683,10 +685,10 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ib_sge *sge, int acc); -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, u32 len, u64 vaddr, u32 rkey, int acc); int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c newtree/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c --- oldtree/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c 2006-09-30 05:21:44.000000000 -0400 @@ -38,13 +38,23 @@ #include "ipath_kernel.h" /** - * ipath_unordered_wc - indicate whether write combining is ordered + * ipath_enable_wc - enable write combining for MMIO writes to the device + * @dd: infinipath device * - * PowerPC systems (at least those in the 970 processor family) - * write partially filled store buffers in address order, but will write - * completely filled store buffers in "random" order, and therefore must - * have serialization for correctness with current InfiniPath chips. + * Nothing to do on PowerPC, so just return without error. + */ +int ipath_enable_wc(struct ipath_devdata *dd) +{ + return 0; +} + +/** + * ipath_unordered_wc - indicate whether write combining is unordered * + * Because our performance depends on our ability to do write + * combining mmio writes in the most efficient way, we need to + * know if we are on a processor that may reorder stores when + * write combining. */ int ipath_unordered_wc(void) { diff -urN oldtree/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c newtree/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c --- oldtree/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c 2006-09-30 05:21:44.000000000 -0400 @@ -123,6 +123,8 @@ ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " "cookie is %d\n", cookie); dd->ipath_wc_cookie = cookie; + dd->ipath_wc_base = (unsigned long) pioaddr; + dd->ipath_wc_len = (unsigned long) piolen; } } @@ -136,9 +138,16 @@ void ipath_disable_wc(struct ipath_devdata *dd) { if (dd->ipath_wc_cookie) { + int r; ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); - mtrr_del(dd->ipath_wc_cookie, 0, 0); - dd->ipath_wc_cookie = 0; + r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len); + if (r < 0) + dev_info(&dd->pcidev->dev, + "mtrr_del(%lx, %lx, %lx) failed: %d\n", + dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len, r); + dd->ipath_wc_cookie = 0; /* even on failure */ } } diff -urN oldtree/drivers/infiniband/ulp/iser/Kconfig newtree/drivers/infiniband/ulp/iser/Kconfig --- oldtree/drivers/infiniband/ulp/iser/Kconfig 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/ulp/iser/Kconfig 2006-09-30 05:21:44.000000000 -0400 @@ -1,11 +1,12 @@ config INFINIBAND_ISER - tristate "ISCSI RDMA Protocol" + tristate "iSCSI Extensions for RDMA (iSER)" depends on INFINIBAND && SCSI && INET select SCSI_ISCSI_ATTRS ---help--- - Support for the ISCSI RDMA Protocol over InfiniBand. This - allows you to access storage devices that speak ISER/ISCSI - over InfiniBand. + Support for the iSCSI Extensions for RDMA (iSER) Protocol + over InfiniBand. This allows you to access storage devices + that speak iSCSI over iSER over InfiniBand. - The ISER protocol is defined by IETF. - See . + The iSER protocol is defined by IETF. + See + and diff -urN oldtree/drivers/infiniband/ulp/iser/iscsi_iser.c newtree/drivers/infiniband/ulp/iser/iscsi_iser.c --- oldtree/drivers/infiniband/ulp/iser/iscsi_iser.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/ulp/iser/iscsi_iser.c 2006-09-30 05:21:44.000000000 -0400 @@ -317,6 +317,8 @@ struct iscsi_iser_conn *iser_conn = conn->dd_data; iscsi_conn_teardown(cls_conn); + if (iser_conn->ib_conn) + iser_conn->ib_conn->iser_conn = NULL; kfree(iser_conn); } diff -urN oldtree/drivers/infiniband/ulp/iser/iscsi_iser.h newtree/drivers/infiniband/ulp/iser/iscsi_iser.h --- oldtree/drivers/infiniband/ulp/iser/iscsi_iser.h 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/ulp/iser/iscsi_iser.h 2006-09-30 05:21:44.000000000 -0400 @@ -192,7 +192,7 @@ struct iser_dto { struct iscsi_iser_cmd_task *ctask; - struct iscsi_iser_conn *conn; + struct iser_conn *ib_conn; int notify_enable; /* vector of registered buffers */ @@ -355,4 +355,11 @@ int iser_conn_state_comp(struct iser_conn *ib_conn, enum iser_ib_conn_state comp); + +int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir); + +void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask); #endif diff -urN oldtree/drivers/infiniband/ulp/iser/iser_initiator.c newtree/drivers/infiniband/ulp/iser/iser_initiator.c --- oldtree/drivers/infiniband/ulp/iser/iser_initiator.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/drivers/infiniband/ulp/iser/iser_initiator.c 2006-09-30 05:21:44.000000000 -0400 @@ -66,42 +66,6 @@ dto->regd_vector_len++; } -static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir) -{ - struct device *dma_device; - - iser_ctask->dir[iser_dir] = 1; - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - - data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); - if (data->dma_nents == 0) { - iser_err("dma_map_sg failed!!!\n"); - return -EINVAL; - } - return 0; -} - -static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) -{ - struct device *dma_device; - struct iser_data_buf *data; - - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - - if (iser_ctask->dir[ISER_DIR_IN]) { - data = &iser_ctask->data[ISER_DIR_IN]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); - } - - if (iser_ctask->dir[ISER_DIR_OUT]) { - data = &iser_ctask->data[ISER_DIR_OUT]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); - } -} - /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in * iser_ctask->data[ISER_DIR_IN].data_len @@ -249,7 +213,7 @@ } recv_dto = &rx_desc->dto; - recv_dto->conn = iser_conn; + recv_dto->ib_conn = iser_conn->ib_conn; recv_dto->regd_vector_len = 0; regd_hdr = &rx_desc->hdr_regd_buf; @@ -296,7 +260,7 @@ regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; - send_dto->conn = iser_conn; + send_dto->ib_conn = iser_conn->ib_conn; send_dto->notify_enable = 1; send_dto->regd_vector_len = 0; @@ -588,7 +552,7 @@ unsigned long dto_xfer_len) { struct iser_dto *dto = &rx_desc->dto; - struct iscsi_iser_conn *conn = dto->conn; + struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; struct iscsi_session *session = conn->iscsi_conn->session; struct iscsi_cmd_task *ctask; struct iscsi_iser_cmd_task *iser_ctask; @@ -641,7 +605,8 @@ void iser_snd_completion(struct iser_desc *tx_desc) { struct iser_dto *dto = &tx_desc->dto; - struct iscsi_iser_conn *iser_conn = dto->conn; + struct iser_conn *ib_conn = dto->ib_conn; + struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_mgmt_task *mtask; @@ -652,7 +617,7 @@ if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); - atomic_dec(&iser_conn->ib_conn->post_send_buf_count); + atomic_dec(&ib_conn->post_send_buf_count); write_lock(conn->recv_lock); if (conn->suspend_tx) { @@ -698,14 +663,19 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) { int deferred; + int is_rdma_aligned = 1; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ - if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) + if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) { + is_rdma_aligned = 0; iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); - if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) + } + if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) { + is_rdma_aligned = 0; iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); + } if (iser_ctask->dir[ISER_DIR_IN]) { deferred = iser_regd_buff_release @@ -725,7 +695,9 @@ } } - iser_dma_unmap_task_data(iser_ctask); + /* if the data was unaligned, it was already unmapped and then copied */ + if (is_rdma_aligned) + iser_dma_unmap_task_data(iser_ctask); } void iser_dto_buffs_release(struct iser_dto *dto) diff -urN oldtree/drivers/infiniband/ulp/iser/iser_memory.c newtree/drivers/infiniband/ulp/iser/iser_memory.c --- oldtree/drivers/infiniband/ulp/iser/iser_memory.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/ulp/iser/iser_memory.c 2006-09-30 05:21:44.000000000 -0400 @@ -369,6 +369,44 @@ } } +int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) +{ + struct device *dma_device; + + iser_ctask->dir[iser_dir] = 1; + dma_device = + iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); + if (data->dma_nents == 0) { + iser_err("dma_map_sg failed!!!\n"); + return -EINVAL; + } + return 0; +} + +void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) +{ + struct device *dma_device; + struct iser_data_buf *data; + + dma_device = + iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + if (iser_ctask->dir[ISER_DIR_IN]) { + data = &iser_ctask->data[ISER_DIR_IN]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); + } + + if (iser_ctask->dir[ISER_DIR_OUT]) { + data = &iser_ctask->data[ISER_DIR_OUT]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); + } +} + /** * iser_reg_rdma_mem - Registers memory intended for RDMA, * obtaining rkey and va @@ -394,6 +432,10 @@ iser_err("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem); + + /* unmap the command data before accessing it */ + iser_dma_unmap_task_data(iser_ctask); + /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) diff -urN oldtree/drivers/infiniband/ulp/iser/iser_verbs.c newtree/drivers/infiniband/ulp/iser/iser_verbs.c --- oldtree/drivers/infiniband/ulp/iser/iser_verbs.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/infiniband/ulp/iser/iser_verbs.c 2006-09-30 05:21:44.000000000 -0400 @@ -571,6 +571,8 @@ /* on EVENT_ADDR_ERROR there's no device yet for this conn */ if (device != NULL) iser_device_try_release(device); + if (ib_conn->iser_conn) + ib_conn->iser_conn->ib_conn = NULL; kfree(ib_conn); } @@ -694,7 +696,7 @@ struct iser_dto *recv_dto = &rx_desc->dto; /* Retrieve conn */ - ib_conn = recv_dto->conn->ib_conn; + ib_conn = recv_dto->ib_conn; iser_dto_to_iov(recv_dto, iov, 2); @@ -727,7 +729,7 @@ struct iser_conn *ib_conn; struct iser_dto *dto = &tx_desc->dto; - ib_conn = dto->conn->ib_conn; + ib_conn = dto->ib_conn; iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); @@ -774,7 +776,7 @@ static void iser_handle_comp_error(struct iser_desc *desc) { struct iser_dto *dto = &desc->dto; - struct iser_conn *ib_conn = dto->conn->ib_conn; + struct iser_conn *ib_conn = dto->ib_conn; iser_dto_buffs_release(dto); diff -urN oldtree/drivers/net/bnx2.c newtree/drivers/net/bnx2.c --- oldtree/drivers/net/bnx2.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/net/bnx2.c 2006-09-30 05:21:44.000000000 -0400 @@ -56,8 +56,8 @@ #define DRV_MODULE_NAME "bnx2" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.4.44" -#define DRV_MODULE_RELDATE "August 10, 2006" +#define DRV_MODULE_VERSION "1.4.45" +#define DRV_MODULE_RELDATE "September 29, 2006" #define RUN_AT(x) (jiffies + (x)) @@ -5805,6 +5805,34 @@ bp->cmd_ticks_int = bp->cmd_ticks; } + /* Disable MSI on 5706 if AMD 8132 bridge is found. + * + * MSI is defined to be 32-bit write. The 5706 does 64-bit MSI writes + * with byte enables disabled on the unused 32-bit word. This is legal + * but causes problems on the AMD 8132 which will eventually stop + * responding after a while. + * + * AMD believes this incompatibility is unique to the 5706, and + * prefers to locally disable MSI rather than globally disabling it + * using pci_msi_quirk. + */ + if (CHIP_NUM(bp) == CHIP_NUM_5706 && disable_msi == 0) { + struct pci_dev *amd_8132 = NULL; + + while ((amd_8132 = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_8132_BRIDGE, + amd_8132))) { + u8 rev; + + pci_read_config_byte(amd_8132, PCI_REVISION_ID, &rev); + if (rev >= 0x10 && rev <= 0x13) { + disable_msi = 1; + pci_dev_put(amd_8132); + break; + } + } + } + bp->autoneg = AUTONEG_SPEED | AUTONEG_FLOW_CTRL; bp->req_line_speed = 0; if (bp->phy_flags & PHY_SERDES_FLAG) { diff -urN oldtree/drivers/serial/sunzilog.c newtree/drivers/serial/sunzilog.c --- oldtree/drivers/serial/sunzilog.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/drivers/serial/sunzilog.c 2006-09-30 05:21:44.000000000 -0400 @@ -1270,7 +1270,7 @@ } #endif -static void __init sunzilog_init_hw(struct uart_sunzilog_port *up) +static void __devinit sunzilog_init_hw(struct uart_sunzilog_port *up) { struct zilog_channel __iomem *channel; unsigned long flags; diff -urN oldtree/include/linux/atmlec.h newtree/include/linux/atmlec.h --- oldtree/include/linux/atmlec.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/include/linux/atmlec.h 2006-09-30 05:21:44.000000000 -0400 @@ -1,9 +1,7 @@ /* - * - * ATM Lan Emulation Daemon vs. driver interface - * - * mkiiskila@yahoo.com + * ATM Lan Emulation Daemon driver interface * + * Marko Kiiskila */ #ifndef _ATMLEC_H_ @@ -13,76 +11,87 @@ #include #include #include + /* ATM lec daemon control socket */ -#define ATMLEC_CTRL _IO('a',ATMIOC_LANE) -#define ATMLEC_DATA _IO('a',ATMIOC_LANE+1) -#define ATMLEC_MCAST _IO('a',ATMIOC_LANE+2) +#define ATMLEC_CTRL _IO('a', ATMIOC_LANE) +#define ATMLEC_DATA _IO('a', ATMIOC_LANE+1) +#define ATMLEC_MCAST _IO('a', ATMIOC_LANE+2) /* Maximum number of LEC interfaces (tweakable) */ #define MAX_LEC_ITF 48 -/* From the total of MAX_LEC_ITF, last NUM_TR_DEVS are reserved for Token Ring. +/* + * From the total of MAX_LEC_ITF, last NUM_TR_DEVS are reserved for Token Ring. * E.g. if MAX_LEC_ITF = 48 and NUM_TR_DEVS = 8, then lec0-lec39 are for * Ethernet ELANs and lec40-lec47 are for Token Ring ELANS. */ #define NUM_TR_DEVS 8 -typedef enum { - l_set_mac_addr, l_del_mac_addr, - l_svc_setup, - l_addr_delete, l_topology_change, - l_flush_complete, l_arp_update, - l_narp_req, /* LANE2 mandates the use of this */ - l_config, l_flush_tran_id, - l_set_lecid, l_arp_xmt, - l_rdesc_arp_xmt, - l_associate_req, - l_should_bridge /* should we bridge this MAC? */ +typedef enum { + l_set_mac_addr, + l_del_mac_addr, + l_svc_setup, + l_addr_delete, + l_topology_change, + l_flush_complete, + l_arp_update, + l_narp_req, /* LANE2 mandates the use of this */ + l_config, + l_flush_tran_id, + l_set_lecid, + l_arp_xmt, + l_rdesc_arp_xmt, + l_associate_req, + l_should_bridge /* should we bridge this MAC? */ } atmlec_msg_type; #define ATMLEC_MSG_TYPE_MAX l_should_bridge struct atmlec_config_msg { - unsigned int maximum_unknown_frame_count; - unsigned int max_unknown_frame_time; - unsigned short max_retry_count; - unsigned int aging_time; - unsigned int forward_delay_time; - unsigned int arp_response_time; - unsigned int flush_timeout; - unsigned int path_switching_delay; - unsigned int lane_version; /* LANE2: 1 for LANEv1, 2 for LANEv2 */ - int mtu; - int is_proxy; + unsigned int maximum_unknown_frame_count; + unsigned int max_unknown_frame_time; + unsigned short max_retry_count; + unsigned int aging_time; + unsigned int forward_delay_time; + unsigned int arp_response_time; + unsigned int flush_timeout; + unsigned int path_switching_delay; + unsigned int lane_version; /* LANE2: 1 for LANEv1, 2 for LANEv2 */ + int mtu; + int is_proxy; }; - + struct atmlec_msg { - atmlec_msg_type type; - int sizeoftlvs; /* LANE2: if != 0, tlvs follow */ - union { - struct { - unsigned char mac_addr[ETH_ALEN]; - unsigned char atm_addr[ATM_ESA_LEN]; - unsigned int flag;/* Topology_change flag, - remoteflag, permanent flag, - lecid, transaction id */ - unsigned int targetless_le_arp; /* LANE2 */ - unsigned int no_source_le_narp; /* LANE2 */ - } normal; - struct atmlec_config_msg config; - struct { - uint16_t lec_id; /* requestor lec_id */ - uint32_t tran_id; /* transaction id */ - unsigned char mac_addr[ETH_ALEN]; /* dst mac addr */ - unsigned char atm_addr[ATM_ESA_LEN]; /* reqestor ATM addr */ - } proxy; - /* For mapping LE_ARP requests to responses. Filled by */ - } content; /* zeppelin, returned by kernel. Used only when proxying */ + atmlec_msg_type type; + int sizeoftlvs; /* LANE2: if != 0, tlvs follow */ + union { + struct { + unsigned char mac_addr[ETH_ALEN]; + unsigned char atm_addr[ATM_ESA_LEN]; + unsigned int flag; /* + * Topology_change flag, + * remoteflag, permanent flag, + * lecid, transaction id + */ + unsigned int targetless_le_arp; /* LANE2 */ + unsigned int no_source_le_narp; /* LANE2 */ + } normal; + struct atmlec_config_msg config; + struct { + uint16_t lec_id; /* requestor lec_id */ + uint32_t tran_id; /* transaction id */ + unsigned char mac_addr[ETH_ALEN]; /* dst mac addr */ + unsigned char atm_addr[ATM_ESA_LEN]; /* reqestor ATM addr */ + } proxy; /* + * For mapping LE_ARP requests to responses. Filled by + * zeppelin, returned by kernel. Used only when proxying + */ + } content; } __ATM_API_ALIGN; struct atmlec_ioc { - int dev_num; - unsigned char atm_addr[ATM_ESA_LEN]; - unsigned char receive; /* 1= receive vcc, 0 = send vcc */ + int dev_num; + unsigned char atm_addr[ATM_ESA_LEN]; + unsigned char receive; /* 1= receive vcc, 0 = send vcc */ }; #endif /* _ATMLEC_H_ */ diff -urN oldtree/include/linux/audit.h newtree/include/linux/audit.h --- oldtree/include/linux/audit.h 2006-09-29 14:03:22.000000000 -0400 +++ newtree/include/linux/audit.h 2006-09-30 05:21:44.000000000 -0400 @@ -95,12 +95,11 @@ #define AUDIT_MAC_POLICY_LOAD 1403 /* Policy file load */ #define AUDIT_MAC_STATUS 1404 /* Changed enforcing,permissive,off */ #define AUDIT_MAC_CONFIG_CHANGE 1405 /* Changes to booleans */ -#define AUDIT_MAC_UNLBL_ACCEPT 1406 /* NetLabel: allow unlabeled traffic */ -#define AUDIT_MAC_UNLBL_DENY 1407 /* NetLabel: deny unlabeled traffic */ -#define AUDIT_MAC_CIPSOV4_ADD 1408 /* NetLabel: add CIPSOv4 DOI entry */ -#define AUDIT_MAC_CIPSOV4_DEL 1409 /* NetLabel: del CIPSOv4 DOI entry */ -#define AUDIT_MAC_MAP_ADD 1410 /* NetLabel: add LSM domain mapping */ -#define AUDIT_MAC_MAP_DEL 1411 /* NetLabel: del LSM domain mapping */ +#define AUDIT_MAC_UNLBL_ALLOW 1406 /* NetLabel: allow unlabeled traffic */ +#define AUDIT_MAC_CIPSOV4_ADD 1407 /* NetLabel: add CIPSOv4 DOI entry */ +#define AUDIT_MAC_CIPSOV4_DEL 1408 /* NetLabel: del CIPSOv4 DOI entry */ +#define AUDIT_MAC_MAP_ADD 1409 /* NetLabel: add LSM domain mapping */ +#define AUDIT_MAC_MAP_DEL 1410 /* NetLabel: del LSM domain mapping */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 diff -urN oldtree/include/linux/pci_ids.h newtree/include/linux/pci_ids.h --- oldtree/include/linux/pci_ids.h 2006-09-29 15:59:29.000000000 -0400 +++ newtree/include/linux/pci_ids.h 2006-09-30 05:21:44.000000000 -0400 @@ -506,6 +506,7 @@ #define PCI_DEVICE_ID_AMD_8151_0 0x7454 #define PCI_DEVICE_ID_AMD_8131_BRIDGE 0x7450 #define PCI_DEVICE_ID_AMD_8131_APIC 0x7451 +#define PCI_DEVICE_ID_AMD_8132_BRIDGE 0x7458 #define PCI_DEVICE_ID_AMD_CS5536_ISA 0x2090 #define PCI_DEVICE_ID_AMD_CS5536_FLASH 0x2091 #define PCI_DEVICE_ID_AMD_CS5536_AUDIO 0x2093 diff -urN oldtree/include/net/cipso_ipv4.h newtree/include/net/cipso_ipv4.h --- oldtree/include/net/cipso_ipv4.h 2006-09-29 14:03:22.000000000 -0400 +++ newtree/include/net/cipso_ipv4.h 2006-09-30 05:21:44.000000000 -0400 @@ -129,7 +129,7 @@ #ifdef CONFIG_NETLABEL int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); int cipso_v4_doi_walk(u32 *skip_cnt, @@ -145,7 +145,7 @@ } static inline int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)) { return 0; diff -urN oldtree/include/net/netlabel.h newtree/include/net/netlabel.h --- oldtree/include/net/netlabel.h 2006-09-29 14:03:22.000000000 -0400 +++ newtree/include/net/netlabel.h 2006-09-30 05:21:44.000000000 -0400 @@ -92,11 +92,17 @@ * */ +/* NetLabel audit information */ +struct netlbl_audit { + u32 secid; + uid_t loginuid; +}; + /* Domain mapping definition struct */ struct netlbl_dom_map; /* Domain mapping operations */ -int netlbl_domhsh_remove(const char *domain, u32 audit_secid); +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); /* LSM security attributes */ struct netlbl_lsm_cache { diff -urN oldtree/net/atm/lec.c newtree/net/atm/lec.c --- oldtree/net/atm/lec.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/net/atm/lec.c 2006-09-30 05:21:44.000000000 -0400 @@ -1,7 +1,7 @@ /* * lec.c: Lan Emulation driver - * Marko Kiiskila mkiiskila@yahoo.com * + * Marko Kiiskila */ #include @@ -38,7 +38,7 @@ #include #include "../bridge/br_private.h" -static unsigned char bridge_ula_lec[] = {0x01, 0x80, 0xc2, 0x00, 0x00}; +static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; #endif /* Modular too */ @@ -55,38 +55,41 @@ #define DPRINTK(format,args...) #endif -#define DUMP_PACKETS 0 /* 0 = None, - * 1 = 30 first bytes - * 2 = Whole packet - */ - -#define LEC_UNRES_QUE_LEN 8 /* number of tx packets to queue for a - single destination while waiting for SVC */ +#define DUMP_PACKETS 0 /* + * 0 = None, + * 1 = 30 first bytes + * 2 = Whole packet + */ + +#define LEC_UNRES_QUE_LEN 8 /* + * number of tx packets to queue for a + * single destination while waiting for SVC + */ static int lec_open(struct net_device *dev); static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev); static int lec_close(struct net_device *dev); static struct net_device_stats *lec_get_stats(struct net_device *dev); static void lec_init(struct net_device *dev); -static struct lec_arp_table* lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr); +static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, + unsigned char *mac_addr); static int lec_arp_remove(struct lec_priv *priv, - struct lec_arp_table *to_remove); + struct lec_arp_table *to_remove); /* LANE2 functions */ -static void lane2_associate_ind (struct net_device *dev, u8 *mac_address, - u8 *tlvs, u32 sizeoftlvs); +static void lane2_associate_ind(struct net_device *dev, u8 *mac_address, + u8 *tlvs, u32 sizeoftlvs); static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs); -static int lane2_associate_req (struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); + u8 **tlvs, u32 *sizeoftlvs); +static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, + u8 *tlvs, u32 sizeoftlvs); -static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, +static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, unsigned long permanent); static void lec_arp_check_empties(struct lec_priv *priv, struct atm_vcc *vcc, struct sk_buff *skb); static void lec_arp_destroy(struct lec_priv *priv); static void lec_arp_init(struct lec_priv *priv); -static struct atm_vcc* lec_arp_resolve(struct lec_priv *priv, +static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, struct lec_arp_table **ret_entry); @@ -100,16 +103,30 @@ unsigned long tran_id); static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, struct atm_vcc *vcc, - void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb)); + void (*old_push) (struct atm_vcc *vcc, + struct sk_buff *skb)); static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc); +/* must be done under lec_arp_lock */ +static inline void lec_arp_hold(struct lec_arp_table *entry) +{ + atomic_inc(&entry->usage); +} + +static inline void lec_arp_put(struct lec_arp_table *entry) +{ + if (atomic_dec_and_test(&entry->usage)) + kfree(entry); +} + + static struct lane2_ops lane2_ops = { - lane2_resolve, /* resolve, spec 3.1.3 */ - lane2_associate_req, /* associate_req, spec 3.1.4 */ - NULL /* associate indicator, spec 3.1.5 */ + lane2_resolve, /* resolve, spec 3.1.3 */ + lane2_associate_req, /* associate_req, spec 3.1.4 */ + NULL /* associate indicator, spec 3.1.5 */ }; -static unsigned char bus_mac[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; +static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; @@ -117,36 +134,39 @@ #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) { - struct ethhdr *eth; - char *buff; - struct lec_priv *priv; - - /* Check if this is a BPDU. If so, ask zeppelin to send - * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit - * as the Config BPDU has */ - eth = (struct ethhdr *)skb->data; - buff = skb->data + skb->dev->hard_header_len; - if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) { + struct ethhdr *eth; + char *buff; + struct lec_priv *priv; + + /* + * Check if this is a BPDU. If so, ask zeppelin to send + * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit + * as the Config BPDU has + */ + eth = (struct ethhdr *)skb->data; + buff = skb->data + skb->dev->hard_header_len; + if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) { struct sock *sk; - struct sk_buff *skb2; - struct atmlec_msg *mesg; + struct sk_buff *skb2; + struct atmlec_msg *mesg; - skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); - if (skb2 == NULL) return; - skb2->len = sizeof(struct atmlec_msg); - mesg = (struct atmlec_msg *)skb2->data; - mesg->type = l_topology_change; - buff += 4; - mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */ + skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); + if (skb2 == NULL) + return; + skb2->len = sizeof(struct atmlec_msg); + mesg = (struct atmlec_msg *)skb2->data; + mesg->type = l_topology_change; + buff += 4; + mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */ - priv = (struct lec_priv *)dev->priv; - atm_force_charge(priv->lecd, skb2->truesize); + priv = (struct lec_priv *)dev->priv; + atm_force_charge(priv->lecd, skb2->truesize); sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk, skb2->len); - } + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk, skb2->len); + } - return; + return; } #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ @@ -162,36 +182,35 @@ #ifdef CONFIG_TR static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) { - struct trh_hdr *trh; - int riflen, num_rdsc; - - trh = (struct trh_hdr *)packet; - if (trh->daddr[0] & (uint8_t)0x80) - return bus_mac; /* multicast */ - - if (trh->saddr[0] & TR_RII) { - riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8; - if ((ntohs(trh->rcf) >> 13) != 0) - return bus_mac; /* ARE or STE */ - } - else - return trh->daddr; /* not source routed */ - - if (riflen < 6) - return trh->daddr; /* last hop, source routed */ - - /* riflen is 6 or more, packet has more than one route descriptor */ - num_rdsc = (riflen/2) - 1; - memset(rdesc, 0, ETH_ALEN); - /* offset 4 comes from LAN destination field in LE control frames */ - if (trh->rcf & htons((uint16_t)TR_RCF_DIR_BIT)) - memcpy(&rdesc[4], &trh->rseg[num_rdsc-2], sizeof(uint16_t)); - else { - memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t)); - rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0)); - } + struct trh_hdr *trh; + int riflen, num_rdsc; + + trh = (struct trh_hdr *)packet; + if (trh->daddr[0] & (uint8_t) 0x80) + return bus_mac; /* multicast */ + + if (trh->saddr[0] & TR_RII) { + riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8; + if ((ntohs(trh->rcf) >> 13) != 0) + return bus_mac; /* ARE or STE */ + } else + return trh->daddr; /* not source routed */ + + if (riflen < 6) + return trh->daddr; /* last hop, source routed */ + + /* riflen is 6 or more, packet has more than one route descriptor */ + num_rdsc = (riflen / 2) - 1; + memset(rdesc, 0, ETH_ALEN); + /* offset 4 comes from LAN destination field in LE control frames */ + if (trh->rcf & htons((uint16_t) TR_RCF_DIR_BIT)) + memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(uint16_t)); + else { + memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t)); + rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0)); + } - return NULL; + return NULL; } #endif /* CONFIG_TR */ @@ -204,15 +223,14 @@ * there is non-reboot way to recover if something goes wrong. */ -static int -lec_open(struct net_device *dev) +static int lec_open(struct net_device *dev) { - struct lec_priv *priv = (struct lec_priv *)dev->priv; - + struct lec_priv *priv = (struct lec_priv *)dev->priv; + netif_start_queue(dev); - memset(&priv->stats,0,sizeof(struct net_device_stats)); - - return 0; + memset(&priv->stats, 0, sizeof(struct net_device_stats)); + + return 0; } static __inline__ void @@ -231,160 +249,166 @@ priv->stats.tx_bytes += skb->len; } -static void -lec_tx_timeout(struct net_device *dev) +static void lec_tx_timeout(struct net_device *dev) { printk(KERN_INFO "%s: tx timeout\n", dev->name); dev->trans_start = jiffies; netif_wake_queue(dev); } -static int -lec_start_xmit(struct sk_buff *skb, struct net_device *dev) +static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct sk_buff *skb2; - struct lec_priv *priv = (struct lec_priv *)dev->priv; - struct lecdatahdr_8023 *lec_h; - struct atm_vcc *vcc; + struct sk_buff *skb2; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lecdatahdr_8023 *lec_h; + struct atm_vcc *vcc; struct lec_arp_table *entry; - unsigned char *dst; + unsigned char *dst; int min_frame_size; #ifdef CONFIG_TR - unsigned char rdesc[ETH_ALEN]; /* Token Ring route descriptor */ + unsigned char rdesc[ETH_ALEN]; /* Token Ring route descriptor */ #endif - int is_rdesc; + int is_rdesc; #if DUMP_PACKETS > 0 - char buf[300]; - int i=0; + char buf[300]; + int i = 0; #endif /* DUMP_PACKETS >0 */ - - DPRINTK("lec_start_xmit called\n"); - if (!priv->lecd) { - printk("%s:No lecd attached\n",dev->name); - priv->stats.tx_errors++; - netif_stop_queue(dev); - return -EUNATCH; - } - - DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", - (long)skb->head, (long)skb->data, (long)skb->tail, - (long)skb->end); + + DPRINTK("lec_start_xmit called\n"); + if (!priv->lecd) { + printk("%s:No lecd attached\n", dev->name); + priv->stats.tx_errors++; + netif_stop_queue(dev); + return -EUNATCH; + } + + DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", + (long)skb->head, (long)skb->data, (long)skb->tail, + (long)skb->end); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) - lec_handle_bridge(skb, dev); + if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) + lec_handle_bridge(skb, dev); #endif - /* Make sure we have room for lec_id */ - if (skb_headroom(skb) < 2) { + /* Make sure we have room for lec_id */ + if (skb_headroom(skb) < 2) { - DPRINTK("lec_start_xmit: reallocating skb\n"); - skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); - kfree_skb(skb); - if (skb2 == NULL) return 0; - skb = skb2; - } - skb_push(skb, 2); - - /* Put le header to place, works for TokenRing too */ - lec_h = (struct lecdatahdr_8023*)skb->data; - lec_h->le_header = htons(priv->lecid); + DPRINTK("lec_start_xmit: reallocating skb\n"); + skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); + kfree_skb(skb); + if (skb2 == NULL) + return 0; + skb = skb2; + } + skb_push(skb, 2); + + /* Put le header to place, works for TokenRing too */ + lec_h = (struct lecdatahdr_8023 *)skb->data; + lec_h->le_header = htons(priv->lecid); #ifdef CONFIG_TR - /* Ugly. Use this to realign Token Ring packets for - * e.g. PCA-200E driver. */ - if (priv->is_trdev) { - skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); - kfree_skb(skb); - if (skb2 == NULL) return 0; - skb = skb2; - } + /* + * Ugly. Use this to realign Token Ring packets for + * e.g. PCA-200E driver. + */ + if (priv->is_trdev) { + skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); + kfree_skb(skb); + if (skb2 == NULL) + return 0; + skb = skb2; + } #endif #if DUMP_PACKETS > 0 - printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name, - skb->len, priv->lecid); + printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name, + skb->len, priv->lecid); #if DUMP_PACKETS >= 2 - for(i=0;ilen && i <99;i++) { - sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 99; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #elif DUMP_PACKETS >= 1 - for(i=0;ilen && i < 30;i++) { - sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 30; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #endif /* DUMP_PACKETS >= 1 */ - if (i==skb->len) - printk("%s\n",buf); - else - printk("%s...\n",buf); + if (i == skb->len) + printk("%s\n", buf); + else + printk("%s...\n", buf); #endif /* DUMP_PACKETS > 0 */ - /* Minimum ethernet-frame size */ + /* Minimum ethernet-frame size */ #ifdef CONFIG_TR - if (priv->is_trdev) - min_frame_size = LEC_MINIMUM_8025_SIZE; + if (priv->is_trdev) + min_frame_size = LEC_MINIMUM_8025_SIZE; else #endif - min_frame_size = LEC_MINIMUM_8023_SIZE; - if (skb->len < min_frame_size) { - if ((skb->len + skb_tailroom(skb)) < min_frame_size) { - skb2 = skb_copy_expand(skb, 0, - min_frame_size - skb->truesize, GFP_ATOMIC); - dev_kfree_skb(skb); - if (skb2 == NULL) { - priv->stats.tx_dropped++; - return 0; - } - skb = skb2; - } + min_frame_size = LEC_MINIMUM_8023_SIZE; + if (skb->len < min_frame_size) { + if ((skb->len + skb_tailroom(skb)) < min_frame_size) { + skb2 = skb_copy_expand(skb, 0, + min_frame_size - skb->truesize, + GFP_ATOMIC); + dev_kfree_skb(skb); + if (skb2 == NULL) { + priv->stats.tx_dropped++; + return 0; + } + skb = skb2; + } skb_put(skb, min_frame_size - skb->len); - } - - /* Send to right vcc */ - is_rdesc = 0; - dst = lec_h->h_dest; + } + + /* Send to right vcc */ + is_rdesc = 0; + dst = lec_h->h_dest; #ifdef CONFIG_TR - if (priv->is_trdev) { - dst = get_tr_dst(skb->data+2, rdesc); - if (dst == NULL) { - dst = rdesc; - is_rdesc = 1; - } - } -#endif - entry = NULL; - vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); - DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, - vcc, vcc?vcc->flags:0, entry); - if (!vcc || !test_bit(ATM_VF_READY,&vcc->flags)) { - if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { - DPRINTK("%s:lec_start_xmit: queuing packet, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); - skb_queue_tail(&entry->tx_wait, skb); - } else { - DPRINTK("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); - priv->stats.tx_dropped++; - dev_kfree_skb(skb); - } - return 0; - } - -#if DUMP_PACKETS > 0 - printk("%s:sending to vpi:%d vci:%d\n", dev->name, - vcc->vpi, vcc->vci); + if (priv->is_trdev) { + dst = get_tr_dst(skb->data + 2, rdesc); + if (dst == NULL) { + dst = rdesc; + is_rdesc = 1; + } + } +#endif + entry = NULL; + vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); + DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, + vcc, vcc ? vcc->flags : 0, entry); + if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) { + if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { + DPRINTK("%s:lec_start_xmit: queuing packet, ", + dev->name); + DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + lec_h->h_dest[0], lec_h->h_dest[1], + lec_h->h_dest[2], lec_h->h_dest[3], + lec_h->h_dest[4], lec_h->h_dest[5]); + skb_queue_tail(&entry->tx_wait, skb); + } else { + DPRINTK + ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", + dev->name); + DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + lec_h->h_dest[0], lec_h->h_dest[1], + lec_h->h_dest[2], lec_h->h_dest[3], + lec_h->h_dest[4], lec_h->h_dest[5]); + priv->stats.tx_dropped++; + dev_kfree_skb(skb); + } + goto out; + } +#if DUMP_PACKETS > 0 + printk("%s:sending to vpi:%d vci:%d\n", dev->name, vcc->vpi, vcc->vci); #endif /* DUMP_PACKETS > 0 */ - - while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { - DPRINTK("lec.c: emptying tx queue, "); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); + + while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { + DPRINTK("lec.c: emptying tx queue, "); + DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], + lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); lec_send(vcc, skb2, priv); - } + } lec_send(vcc, skb, priv); @@ -404,210 +428,219 @@ netif_wake_queue(dev); } +out: + if (entry) + lec_arp_put(entry); dev->trans_start = jiffies; - return 0; + return 0; } /* The inverse routine to net_open(). */ -static int -lec_close(struct net_device *dev) +static int lec_close(struct net_device *dev) { - netif_stop_queue(dev); - return 0; + netif_stop_queue(dev); + return 0; } /* * Get the current statistics. * This may be called with the card open or closed. */ -static struct net_device_stats * -lec_get_stats(struct net_device *dev) +static struct net_device_stats *lec_get_stats(struct net_device *dev) { - return &((struct lec_priv *)dev->priv)->stats; + return &((struct lec_priv *)dev->priv)->stats; } -static int -lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) +static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; - struct net_device *dev = (struct net_device*)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv*)dev->priv; - struct atmlec_msg *mesg; - struct lec_arp_table *entry; - int i; - char *tmp; /* FIXME */ + struct net_device *dev = (struct net_device *)vcc->proto_data; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct atmlec_msg *mesg; + struct lec_arp_table *entry; + int i; + char *tmp; /* FIXME */ atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - mesg = (struct atmlec_msg *)skb->data; - tmp = skb->data; - tmp += sizeof(struct atmlec_msg); - DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type); - switch(mesg->type) { - case l_set_mac_addr: - for (i=0;i<6;i++) { - dev->dev_addr[i] = mesg->content.normal.mac_addr[i]; - } - break; - case l_del_mac_addr: - for(i=0;i<6;i++) { - dev->dev_addr[i] = 0; - } - break; - case l_addr_delete: - lec_addr_delete(priv, mesg->content.normal.atm_addr, - mesg->content.normal.flag); - break; - case l_topology_change: - priv->topology_change = mesg->content.normal.flag; - break; - case l_flush_complete: - lec_flush_complete(priv, mesg->content.normal.flag); - break; - case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */ + mesg = (struct atmlec_msg *)skb->data; + tmp = skb->data; + tmp += sizeof(struct atmlec_msg); + DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type); + switch (mesg->type) { + case l_set_mac_addr: + for (i = 0; i < 6; i++) { + dev->dev_addr[i] = mesg->content.normal.mac_addr[i]; + } + break; + case l_del_mac_addr: + for (i = 0; i < 6; i++) { + dev->dev_addr[i] = 0; + } + break; + case l_addr_delete: + lec_addr_delete(priv, mesg->content.normal.atm_addr, + mesg->content.normal.flag); + break; + case l_topology_change: + priv->topology_change = mesg->content.normal.flag; + break; + case l_flush_complete: + lec_flush_complete(priv, mesg->content.normal.flag); + break; + case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */ spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mesg->content.normal.mac_addr); - lec_arp_remove(priv, entry); + entry = lec_arp_find(priv, mesg->content.normal.mac_addr); + lec_arp_remove(priv, entry); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - if (mesg->content.normal.no_source_le_narp) - break; - /* FALL THROUGH */ - case l_arp_update: - lec_arp_update(priv, mesg->content.normal.mac_addr, - mesg->content.normal.atm_addr, - mesg->content.normal.flag, - mesg->content.normal.targetless_le_arp); - DPRINTK("lec: in l_arp_update\n"); - if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ - DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", mesg->sizeoftlvs); - lane2_associate_ind(dev, - mesg->content.normal.mac_addr, - tmp, mesg->sizeoftlvs); - } - break; - case l_config: - priv->maximum_unknown_frame_count = - mesg->content.config.maximum_unknown_frame_count; - priv->max_unknown_frame_time = - (mesg->content.config.max_unknown_frame_time*HZ); - priv->max_retry_count = - mesg->content.config.max_retry_count; - priv->aging_time = (mesg->content.config.aging_time*HZ); - priv->forward_delay_time = - (mesg->content.config.forward_delay_time*HZ); - priv->arp_response_time = - (mesg->content.config.arp_response_time*HZ); - priv->flush_timeout = (mesg->content.config.flush_timeout*HZ); - priv->path_switching_delay = - (mesg->content.config.path_switching_delay*HZ); - priv->lane_version = mesg->content.config.lane_version; /* LANE2 */ + if (mesg->content.normal.no_source_le_narp) + break; + /* FALL THROUGH */ + case l_arp_update: + lec_arp_update(priv, mesg->content.normal.mac_addr, + mesg->content.normal.atm_addr, + mesg->content.normal.flag, + mesg->content.normal.targetless_le_arp); + DPRINTK("lec: in l_arp_update\n"); + if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ + DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", + mesg->sizeoftlvs); + lane2_associate_ind(dev, mesg->content.normal.mac_addr, + tmp, mesg->sizeoftlvs); + } + break; + case l_config: + priv->maximum_unknown_frame_count = + mesg->content.config.maximum_unknown_frame_count; + priv->max_unknown_frame_time = + (mesg->content.config.max_unknown_frame_time * HZ); + priv->max_retry_count = mesg->content.config.max_retry_count; + priv->aging_time = (mesg->content.config.aging_time * HZ); + priv->forward_delay_time = + (mesg->content.config.forward_delay_time * HZ); + priv->arp_response_time = + (mesg->content.config.arp_response_time * HZ); + priv->flush_timeout = (mesg->content.config.flush_timeout * HZ); + priv->path_switching_delay = + (mesg->content.config.path_switching_delay * HZ); + priv->lane_version = mesg->content.config.lane_version; /* LANE2 */ priv->lane2_ops = NULL; if (priv->lane_version > 1) priv->lane2_ops = &lane2_ops; if (dev->change_mtu(dev, mesg->content.config.mtu)) printk("%s: change_mtu to %d failed\n", dev->name, - mesg->content.config.mtu); + mesg->content.config.mtu); priv->is_proxy = mesg->content.config.is_proxy; - break; - case l_flush_tran_id: - lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr, - mesg->content.normal.flag); - break; - case l_set_lecid: - priv->lecid=(unsigned short)(0xffff&mesg->content.normal.flag); - break; - case l_should_bridge: { + break; + case l_flush_tran_id: + lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr, + mesg->content.normal.flag); + break; + case l_set_lecid: + priv->lecid = + (unsigned short)(0xffff & mesg->content.normal.flag); + break; + case l_should_bridge: #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - struct net_bridge_fdb_entry *f; + { + struct net_bridge_fdb_entry *f; - DPRINTK("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - dev->name, - mesg->content.proxy.mac_addr[0], mesg->content.proxy.mac_addr[1], - mesg->content.proxy.mac_addr[2], mesg->content.proxy.mac_addr[3], - mesg->content.proxy.mac_addr[4], mesg->content.proxy.mac_addr[5]); - - if (br_fdb_get_hook == NULL || dev->br_port == NULL) - break; - - f = br_fdb_get_hook(dev->br_port->br, mesg->content.proxy.mac_addr); - if (f != NULL && - f->dst->dev != dev && - f->dst->state == BR_STATE_FORWARDING) { - /* hit from bridge table, send LE_ARP_RESPONSE */ - struct sk_buff *skb2; - struct sock *sk; - - DPRINTK("%s: entry found, responding to zeppelin\n", dev->name); - skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); - if (skb2 == NULL) { - br_fdb_put_hook(f); - break; - } - skb2->len = sizeof(struct atmlec_msg); - memcpy(skb2->data, mesg, sizeof(struct atmlec_msg)); - atm_force_charge(priv->lecd, skb2->truesize); - sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk, skb2->len); - } - if (f != NULL) br_fdb_put_hook(f); + DPRINTK + ("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + dev->name, mesg->content.proxy.mac_addr[0], + mesg->content.proxy.mac_addr[1], + mesg->content.proxy.mac_addr[2], + mesg->content.proxy.mac_addr[3], + mesg->content.proxy.mac_addr[4], + mesg->content.proxy.mac_addr[5]); + + if (br_fdb_get_hook == NULL || dev->br_port == NULL) + break; + + f = br_fdb_get_hook(dev->br_port->br, + mesg->content.proxy.mac_addr); + if (f != NULL && f->dst->dev != dev + && f->dst->state == BR_STATE_FORWARDING) { + /* hit from bridge table, send LE_ARP_RESPONSE */ + struct sk_buff *skb2; + struct sock *sk; + + DPRINTK + ("%s: entry found, responding to zeppelin\n", + dev->name); + skb2 = + alloc_skb(sizeof(struct atmlec_msg), + GFP_ATOMIC); + if (skb2 == NULL) { + br_fdb_put_hook(f); + break; + } + skb2->len = sizeof(struct atmlec_msg); + memcpy(skb2->data, mesg, + sizeof(struct atmlec_msg)); + atm_force_charge(priv->lecd, skb2->truesize); + sk = sk_atm(priv->lecd); + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk, skb2->len); + } + if (f != NULL) + br_fdb_put_hook(f); + } #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ - } - break; - default: - printk("%s: Unknown message type %d\n", dev->name, mesg->type); - dev_kfree_skb(skb); - return -EINVAL; - } - dev_kfree_skb(skb); - return 0; -} - -static void -lec_atm_close(struct atm_vcc *vcc) -{ - struct sk_buff *skb; - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + break; + default: + printk("%s: Unknown message type %d\n", dev->name, mesg->type); + dev_kfree_skb(skb); + return -EINVAL; + } + dev_kfree_skb(skb); + return 0; +} - priv->lecd = NULL; - /* Do something needful? */ +static void lec_atm_close(struct atm_vcc *vcc) +{ + struct sk_buff *skb; + struct net_device *dev = (struct net_device *)vcc->proto_data; + struct lec_priv *priv = (struct lec_priv *)dev->priv; - netif_stop_queue(dev); - lec_arp_destroy(priv); + priv->lecd = NULL; + /* Do something needful? */ - if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) + netif_stop_queue(dev); + lec_arp_destroy(priv); + + if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) printk("%s lec_atm_close: closing with messages pending\n", - dev->name); - while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) { - atm_return(vcc, skb->truesize); + dev->name); + while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) { + atm_return(vcc, skb->truesize); dev_kfree_skb(skb); - } - + } + printk("%s: Shut down!\n", dev->name); - module_put(THIS_MODULE); + module_put(THIS_MODULE); } static struct atmdev_ops lecdev_ops = { - .close = lec_atm_close, - .send = lec_atm_send + .close = lec_atm_close, + .send = lec_atm_send }; static struct atm_dev lecatm_dev = { - .ops = &lecdev_ops, - .type = "lec", - .number = 999, /* dummy device number */ - .lock = SPIN_LOCK_UNLOCKED + .ops = &lecdev_ops, + .type = "lec", + .number = 999, /* dummy device number */ + .lock = SPIN_LOCK_UNLOCKED }; /* * LANE2: new argument struct sk_buff *data contains * the LE_ARP based TLVs introduced in the LANE2 spec */ -static int -send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, - unsigned char *mac_addr, unsigned char *atm_addr, - struct sk_buff *data) +static int +send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, + unsigned char *mac_addr, unsigned char *atm_addr, + struct sk_buff *data) { struct sock *sk; struct sk_buff *skb; @@ -621,187 +654,193 @@ return -1; skb->len = sizeof(struct atmlec_msg); mesg = (struct atmlec_msg *)skb->data; - memset(mesg, 0, sizeof(struct atmlec_msg)); + memset(mesg, 0, sizeof(struct atmlec_msg)); mesg->type = type; - if (data != NULL) - mesg->sizeoftlvs = data->len; + if (data != NULL) + mesg->sizeoftlvs = data->len; if (mac_addr) memcpy(&mesg->content.normal.mac_addr, mac_addr, ETH_ALEN); - else - mesg->content.normal.targetless_le_arp = 1; + else + mesg->content.normal.targetless_le_arp = 1; if (atm_addr) memcpy(&mesg->content.normal.atm_addr, atm_addr, ATM_ESA_LEN); - atm_force_charge(priv->lecd, skb->truesize); + atm_force_charge(priv->lecd, skb->truesize); sk = sk_atm(priv->lecd); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk, skb->len); - if (data != NULL) { - DPRINTK("lec: about to send %d bytes of data\n", data->len); - atm_force_charge(priv->lecd, data->truesize); - skb_queue_tail(&sk->sk_receive_queue, data); - sk->sk_data_ready(sk, skb->len); - } + if (data != NULL) { + DPRINTK("lec: about to send %d bytes of data\n", data->len); + atm_force_charge(priv->lecd, data->truesize); + skb_queue_tail(&sk->sk_receive_queue, data); + sk->sk_data_ready(sk, skb->len); + } - return 0; + return 0; } /* shamelessly stolen from drivers/net/net_init.c */ static int lec_change_mtu(struct net_device *dev, int new_mtu) { - if ((new_mtu < 68) || (new_mtu > 18190)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; + if ((new_mtu < 68) || (new_mtu > 18190)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; } static void lec_set_multicast_list(struct net_device *dev) { - /* by default, all multicast frames arrive over the bus. - * eventually support selective multicast service - */ - return; + /* + * by default, all multicast frames arrive over the bus. + * eventually support selective multicast service + */ + return; } -static void -lec_init(struct net_device *dev) -{ - dev->change_mtu = lec_change_mtu; - dev->open = lec_open; - dev->stop = lec_close; - dev->hard_start_xmit = lec_start_xmit; +static void lec_init(struct net_device *dev) +{ + dev->change_mtu = lec_change_mtu; + dev->open = lec_open; + dev->stop = lec_close; + dev->hard_start_xmit = lec_start_xmit; dev->tx_timeout = lec_tx_timeout; - dev->get_stats = lec_get_stats; - dev->set_multicast_list = lec_set_multicast_list; - dev->do_ioctl = NULL; - printk("%s: Initialized!\n",dev->name); - return; + dev->get_stats = lec_get_stats; + dev->set_multicast_list = lec_set_multicast_list; + dev->do_ioctl = NULL; + printk("%s: Initialized!\n", dev->name); + return; } static unsigned char lec_ctrl_magic[] = { - 0xff, - 0x00, - 0x01, - 0x01 }; + 0xff, + 0x00, + 0x01, + 0x01 +}; #define LEC_DATA_DIRECT_8023 2 #define LEC_DATA_DIRECT_8025 3 static int lec_is_data_direct(struct atm_vcc *vcc) -{ +{ return ((vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8023) || (vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8025)); -} +} -static void -lec_push(struct atm_vcc *vcc, struct sk_buff *skb) +static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct net_device *dev = (struct net_device *)vcc->proto_data; + struct lec_priv *priv = (struct lec_priv *)dev->priv; #if DUMP_PACKETS >0 - int i=0; - char buf[300]; + int i = 0; + char buf[300]; - printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name, - vcc->vpi, vcc->vci); + printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name, + vcc->vpi, vcc->vci); #endif - if (!skb) { - DPRINTK("%s: null skb\n",dev->name); - lec_vcc_close(priv, vcc); - return; - } + if (!skb) { + DPRINTK("%s: null skb\n", dev->name); + lec_vcc_close(priv, vcc); + return; + } #if DUMP_PACKETS > 0 - printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name, - skb->len, priv->lecid); + printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name, + skb->len, priv->lecid); #if DUMP_PACKETS >= 2 - for(i=0;ilen && i <99;i++) { - sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 99; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #elif DUMP_PACKETS >= 1 - for(i=0;ilen && i < 30;i++) { - sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 30; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #endif /* DUMP_PACKETS >= 1 */ - if (i==skb->len) - printk("%s\n",buf); - else - printk("%s...\n",buf); + if (i == skb->len) + printk("%s\n", buf); + else + printk("%s...\n", buf); #endif /* DUMP_PACKETS > 0 */ - if (memcmp(skb->data, lec_ctrl_magic, 4) ==0) { /* Control frame, to daemon*/ + if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) { /* Control frame, to daemon */ struct sock *sk = sk_atm(vcc); - DPRINTK("%s: To daemon\n",dev->name); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } else { /* Data frame, queue to protocol handlers */ + DPRINTK("%s: To daemon\n", dev->name); + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); + } else { /* Data frame, queue to protocol handlers */ struct lec_arp_table *entry; - unsigned char *src, *dst; + unsigned char *src, *dst; - atm_return(vcc,skb->truesize); - if (*(uint16_t *)skb->data == htons(priv->lecid) || - !priv->lecd || - !(dev->flags & IFF_UP)) { - /* Probably looping back, or if lecd is missing, - lecd has gone down */ - DPRINTK("Ignoring frame...\n"); - dev_kfree_skb(skb); - return; - } + atm_return(vcc, skb->truesize); + if (*(uint16_t *) skb->data == htons(priv->lecid) || + !priv->lecd || !(dev->flags & IFF_UP)) { + /* + * Probably looping back, or if lecd is missing, + * lecd has gone down + */ + DPRINTK("Ignoring frame...\n"); + dev_kfree_skb(skb); + return; + } #ifdef CONFIG_TR - if (priv->is_trdev) - dst = ((struct lecdatahdr_8025 *) skb->data)->h_dest; - else + if (priv->is_trdev) + dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest; + else #endif - dst = ((struct lecdatahdr_8023 *) skb->data)->h_dest; + dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest; - /* If this is a Data Direct VCC, and the VCC does not match + /* + * If this is a Data Direct VCC, and the VCC does not match * the LE_ARP cache entry, delete the LE_ARP cache entry. */ spin_lock_irqsave(&priv->lec_arp_lock, flags); if (lec_is_data_direct(vcc)) { #ifdef CONFIG_TR if (priv->is_trdev) - src = ((struct lecdatahdr_8025 *) skb->data)->h_source; + src = + ((struct lecdatahdr_8025 *)skb->data)-> + h_source; else #endif - src = ((struct lecdatahdr_8023 *) skb->data)->h_source; + src = + ((struct lecdatahdr_8023 *)skb->data)-> + h_source; entry = lec_arp_find(priv, src); if (entry && entry->vcc != vcc) { lec_arp_remove(priv, entry); - kfree(entry); + lec_arp_put(entry); } } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - if (!(dst[0]&0x01) && /* Never filter Multi/Broadcast */ - !priv->is_proxy && /* Proxy wants all the packets */ + if (!(dst[0] & 0x01) && /* Never filter Multi/Broadcast */ + !priv->is_proxy && /* Proxy wants all the packets */ memcmp(dst, dev->dev_addr, dev->addr_len)) { - dev_kfree_skb(skb); - return; - } - if (priv->lec_arp_empty_ones) { - lec_arp_check_empties(priv, vcc, skb); - } - skb->dev = dev; - skb_pull(skb, 2); /* skip lec_id */ + dev_kfree_skb(skb); + return; + } + if (!hlist_empty(&priv->lec_arp_empty_ones)) { + lec_arp_check_empties(priv, vcc, skb); + } + skb->dev = dev; + skb_pull(skb, 2); /* skip lec_id */ #ifdef CONFIG_TR - if (priv->is_trdev) skb->protocol = tr_type_trans(skb, dev); - else + if (priv->is_trdev) + skb->protocol = tr_type_trans(skb, dev); + else #endif - skb->protocol = eth_type_trans(skb, dev); - priv->stats.rx_packets++; - priv->stats.rx_bytes += skb->len; - memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); - netif_rx(skb); - } + skb->protocol = eth_type_trans(skb, dev); + priv->stats.rx_packets++; + priv->stats.rx_bytes += skb->len; + memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); + netif_rx(skb); + } } -static void -lec_pop(struct atm_vcc *vcc, struct sk_buff *skb) +static void lec_pop(struct atm_vcc *vcc, struct sk_buff *skb) { struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); struct net_device *dev = skb->dev; @@ -820,123 +859,121 @@ } } -static int -lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) +static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) { struct lec_vcc_priv *vpriv; - int bytes_left; - struct atmlec_ioc ioc_data; + int bytes_left; + struct atmlec_ioc ioc_data; - /* Lecd must be up in this case */ - bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); - if (bytes_left != 0) { - printk("lec: lec_vcc_attach, copy from user failed for %d bytes\n", - bytes_left); - } - if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF || - !dev_lec[ioc_data.dev_num]) - return -EINVAL; + /* Lecd must be up in this case */ + bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); + if (bytes_left != 0) { + printk + ("lec: lec_vcc_attach, copy from user failed for %d bytes\n", + bytes_left); + } + if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF || + !dev_lec[ioc_data.dev_num]) + return -EINVAL; if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL))) return -ENOMEM; vpriv->xoff = 0; vpriv->old_pop = vcc->pop; vcc->user_back = vpriv; vcc->pop = lec_pop; - lec_vcc_added(dev_lec[ioc_data.dev_num]->priv, - &ioc_data, vcc, vcc->push); - vcc->proto_data = dev_lec[ioc_data.dev_num]; - vcc->push = lec_push; - return 0; + lec_vcc_added(dev_lec[ioc_data.dev_num]->priv, + &ioc_data, vcc, vcc->push); + vcc->proto_data = dev_lec[ioc_data.dev_num]; + vcc->push = lec_push; + return 0; } -static int -lec_mcast_attach(struct atm_vcc *vcc, int arg) -{ - if (arg <0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) - return -EINVAL; - vcc->proto_data = dev_lec[arg]; - return (lec_mcast_make((struct lec_priv*)dev_lec[arg]->priv, vcc)); +static int lec_mcast_attach(struct atm_vcc *vcc, int arg) +{ + if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + return -EINVAL; + vcc->proto_data = dev_lec[arg]; + return (lec_mcast_make((struct lec_priv *)dev_lec[arg]->priv, vcc)); } /* Initialize device. */ -static int -lecd_attach(struct atm_vcc *vcc, int arg) -{ - int i; - struct lec_priv *priv; - - if (arg<0) - i = 0; - else - i = arg; +static int lecd_attach(struct atm_vcc *vcc, int arg) +{ + int i; + struct lec_priv *priv; + + if (arg < 0) + i = 0; + else + i = arg; #ifdef CONFIG_TR - if (arg >= MAX_LEC_ITF) - return -EINVAL; -#else /* Reserve the top NUM_TR_DEVS for TR */ - if (arg >= (MAX_LEC_ITF-NUM_TR_DEVS)) - return -EINVAL; -#endif - if (!dev_lec[i]) { - int is_trdev, size; - - is_trdev = 0; - if (i >= (MAX_LEC_ITF - NUM_TR_DEVS)) - is_trdev = 1; + if (arg >= MAX_LEC_ITF) + return -EINVAL; +#else /* Reserve the top NUM_TR_DEVS for TR */ + if (arg >= (MAX_LEC_ITF - NUM_TR_DEVS)) + return -EINVAL; +#endif + if (!dev_lec[i]) { + int is_trdev, size; + + is_trdev = 0; + if (i >= (MAX_LEC_ITF - NUM_TR_DEVS)) + is_trdev = 1; - size = sizeof(struct lec_priv); + size = sizeof(struct lec_priv); #ifdef CONFIG_TR - if (is_trdev) - dev_lec[i] = alloc_trdev(size); - else -#endif - dev_lec[i] = alloc_etherdev(size); - if (!dev_lec[i]) - return -ENOMEM; - snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); - if (register_netdev(dev_lec[i])) { - free_netdev(dev_lec[i]); - return -EINVAL; - } - - priv = dev_lec[i]->priv; - priv->is_trdev = is_trdev; - lec_init(dev_lec[i]); - } else { - priv = dev_lec[i]->priv; - if (priv->lecd) - return -EADDRINUSE; - } - lec_arp_init(priv); - priv->itfnum = i; /* LANE2 addition */ - priv->lecd = vcc; - vcc->dev = &lecatm_dev; - vcc_insert_socket(sk_atm(vcc)); - - vcc->proto_data = dev_lec[i]; - set_bit(ATM_VF_META,&vcc->flags); - set_bit(ATM_VF_READY,&vcc->flags); - - /* Set default values to these variables */ - priv->maximum_unknown_frame_count = 1; - priv->max_unknown_frame_time = (1*HZ); - priv->vcc_timeout_period = (1200*HZ); - priv->max_retry_count = 1; - priv->aging_time = (300*HZ); - priv->forward_delay_time = (15*HZ); - priv->topology_change = 0; - priv->arp_response_time = (1*HZ); - priv->flush_timeout = (4*HZ); - priv->path_switching_delay = (6*HZ); - - if (dev_lec[i]->flags & IFF_UP) { - netif_start_queue(dev_lec[i]); - } - __module_get(THIS_MODULE); - return i; + if (is_trdev) + dev_lec[i] = alloc_trdev(size); + else +#endif + dev_lec[i] = alloc_etherdev(size); + if (!dev_lec[i]) + return -ENOMEM; + snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); + if (register_netdev(dev_lec[i])) { + free_netdev(dev_lec[i]); + return -EINVAL; + } + + priv = dev_lec[i]->priv; + priv->is_trdev = is_trdev; + lec_init(dev_lec[i]); + } else { + priv = dev_lec[i]->priv; + if (priv->lecd) + return -EADDRINUSE; + } + lec_arp_init(priv); + priv->itfnum = i; /* LANE2 addition */ + priv->lecd = vcc; + vcc->dev = &lecatm_dev; + vcc_insert_socket(sk_atm(vcc)); + + vcc->proto_data = dev_lec[i]; + set_bit(ATM_VF_META, &vcc->flags); + set_bit(ATM_VF_READY, &vcc->flags); + + /* Set default values to these variables */ + priv->maximum_unknown_frame_count = 1; + priv->max_unknown_frame_time = (1 * HZ); + priv->vcc_timeout_period = (1200 * HZ); + priv->max_retry_count = 1; + priv->aging_time = (300 * HZ); + priv->forward_delay_time = (15 * HZ); + priv->topology_change = 0; + priv->arp_response_time = (1 * HZ); + priv->flush_timeout = (4 * HZ); + priv->path_switching_delay = (6 * HZ); + + if (dev_lec[i]->flags & IFF_UP) { + netif_start_queue(dev_lec[i]); + } + __module_get(THIS_MODULE); + return i; } #ifdef CONFIG_PROC_FS -static char* lec_arp_get_status_string(unsigned char status) +static char *lec_arp_get_status_string(unsigned char status) { static char *lec_arp_status_string[] = { "ESI_UNKNOWN ", @@ -966,52 +1003,54 @@ if (entry->vcc) seq_printf(seq, "%3d %3d ", entry->vcc->vpi, entry->vcc->vci); else - seq_printf(seq, " "); + seq_printf(seq, " "); if (entry->recv_vcc) { seq_printf(seq, " %3d %3d", entry->recv_vcc->vpi, entry->recv_vcc->vci); - } - seq_putc(seq, '\n'); + } + seq_putc(seq, '\n'); } - struct lec_state { unsigned long flags; struct lec_priv *locked; - struct lec_arp_table *entry; + struct hlist_node *node; struct net_device *dev; int itf; int arp_table; int misc_table; }; -static void *lec_tbl_walk(struct lec_state *state, struct lec_arp_table *tbl, +static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl, loff_t *l) { - struct lec_arp_table *e = state->entry; + struct hlist_node *e = state->node; + struct lec_arp_table *tmp; if (!e) - e = tbl; + e = tbl->first; if (e == (void *)1) { - e = tbl; + e = tbl->first; --*l; } - for (; e; e = e->next) { + + hlist_for_each_entry_from(tmp, e, next) { if (--*l < 0) break; } - state->entry = e; + state->node = e; + return (*l < 0) ? state : NULL; } static void *lec_arp_walk(struct lec_state *state, loff_t *l, - struct lec_priv *priv) + struct lec_priv *priv) { void *v = NULL; int p; for (p = state->arp_table; p < LEC_ARP_TABLE_SIZE; p++) { - v = lec_tbl_walk(state, priv->lec_arp_tables[p], l); + v = lec_tbl_walk(state, &priv->lec_arp_tables[p], l); if (v) break; } @@ -1022,10 +1061,10 @@ static void *lec_misc_walk(struct lec_state *state, loff_t *l, struct lec_priv *priv) { - struct lec_arp_table *lec_misc_tables[] = { - priv->lec_arp_empty_ones, - priv->lec_no_forward, - priv->mcast_fwds + struct hlist_head *lec_misc_tables[] = { + &priv->lec_arp_empty_ones, + &priv->lec_no_forward, + &priv->mcast_fwds }; void *v = NULL; int q; @@ -1046,8 +1085,7 @@ state->locked = priv; spin_lock_irqsave(&priv->lec_arp_lock, state->flags); } - if (!lec_arp_walk(state, l, priv) && - !lec_misc_walk(state, l, priv)) { + if (!lec_arp_walk(state, l, priv) && !lec_misc_walk(state, l, priv)) { spin_unlock_irqrestore(&priv->lec_arp_lock, state->flags); state->locked = NULL; /* Partial state reset for the next time we get called */ @@ -1081,7 +1119,7 @@ if (v) break; } - return v; + return v; } static void *lec_seq_start(struct seq_file *seq, loff_t *pos) @@ -1093,9 +1131,9 @@ state->locked = NULL; state->arp_table = 0; state->misc_table = 0; - state->entry = (void *)1; + state->node = (void *)1; - return *pos ? lec_get_idx(state, *pos) : (void*)1; + return *pos ? lec_get_idx(state, *pos) : (void *)1; } static void lec_seq_stop(struct seq_file *seq, void *v) @@ -1120,27 +1158,28 @@ static int lec_seq_show(struct seq_file *seq, void *v) { - static char lec_banner[] = "Itf MAC ATM destination" - " Status Flags " - "VPI/VCI Recv VPI/VCI\n"; + static char lec_banner[] = "Itf MAC ATM destination" + " Status Flags " + "VPI/VCI Recv VPI/VCI\n"; if (v == (void *)1) seq_puts(seq, lec_banner); else { struct lec_state *state = seq->private; - struct net_device *dev = state->dev; + struct net_device *dev = state->dev; + struct lec_arp_table *entry = hlist_entry(state->node, struct lec_arp_table, next); seq_printf(seq, "%s ", dev->name); - lec_info(seq, state->entry); + lec_info(seq, entry); } return 0; } static struct seq_operations lec_seq_ops = { - .start = lec_seq_start, - .next = lec_seq_next, - .stop = lec_seq_stop, - .show = lec_seq_show, + .start = lec_seq_start, + .next = lec_seq_next, + .stop = lec_seq_stop, + .show = lec_seq_show, }; static int lec_seq_open(struct inode *inode, struct file *file) @@ -1174,11 +1213,11 @@ } static struct file_operations lec_seq_fops = { - .owner = THIS_MODULE, - .open = lec_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = lec_seq_release, + .owner = THIS_MODULE, + .open = lec_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = lec_seq_release, }; #endif @@ -1186,38 +1225,38 @@ { struct atm_vcc *vcc = ATM_SD(sock); int err = 0; - + switch (cmd) { - case ATMLEC_CTRL: - case ATMLEC_MCAST: - case ATMLEC_DATA: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; - default: - return -ENOIOCTLCMD; + case ATMLEC_CTRL: + case ATMLEC_MCAST: + case ATMLEC_DATA: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + break; + default: + return -ENOIOCTLCMD; } switch (cmd) { - case ATMLEC_CTRL: - err = lecd_attach(vcc, (int) arg); - if (err >= 0) - sock->state = SS_CONNECTED; - break; - case ATMLEC_MCAST: - err = lec_mcast_attach(vcc, (int) arg); - break; - case ATMLEC_DATA: - err = lec_vcc_attach(vcc, (void __user *) arg); - break; + case ATMLEC_CTRL: + err = lecd_attach(vcc, (int)arg); + if (err >= 0) + sock->state = SS_CONNECTED; + break; + case ATMLEC_MCAST: + err = lec_mcast_attach(vcc, (int)arg); + break; + case ATMLEC_DATA: + err = lec_vcc_attach(vcc, (void __user *)arg); + break; } return err; } static struct atm_ioctl lane_ioctl_ops = { - .owner = THIS_MODULE, - .ioctl = lane_ioctl, + .owner = THIS_MODULE, + .ioctl = lane_ioctl, }; static int __init lane_module_init(void) @@ -1231,29 +1270,29 @@ #endif register_atm_ioctl(&lane_ioctl_ops); - printk("lec.c: " __DATE__ " " __TIME__ " initialized\n"); - return 0; + printk("lec.c: " __DATE__ " " __TIME__ " initialized\n"); + return 0; } static void __exit lane_module_cleanup(void) { - int i; - struct lec_priv *priv; + int i; + struct lec_priv *priv; remove_proc_entry("lec", atm_proc_root); deregister_atm_ioctl(&lane_ioctl_ops); - for (i = 0; i < MAX_LEC_ITF; i++) { - if (dev_lec[i] != NULL) { - priv = (struct lec_priv *)dev_lec[i]->priv; + for (i = 0; i < MAX_LEC_ITF; i++) { + if (dev_lec[i] != NULL) { + priv = (struct lec_priv *)dev_lec[i]->priv; unregister_netdev(dev_lec[i]); - free_netdev(dev_lec[i]); - dev_lec[i] = NULL; - } - } + free_netdev(dev_lec[i]); + dev_lec[i] = NULL; + } + } - return; + return; } module_init(lane_module_init); @@ -1267,34 +1306,34 @@ * If dst_mac == NULL, targetless LE_ARP will be sent */ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs) + u8 **tlvs, u32 *sizeoftlvs) { unsigned long flags; - struct lec_priv *priv = (struct lec_priv *)dev->priv; - struct lec_arp_table *table; - struct sk_buff *skb; - int retval; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_arp_table *table; + struct sk_buff *skb; + int retval; - if (force == 0) { + if (force == 0) { spin_lock_irqsave(&priv->lec_arp_lock, flags); - table = lec_arp_find(priv, dst_mac); + table = lec_arp_find(priv, dst_mac); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - if(table == NULL) - return -1; - - *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC); - if (*tlvs == NULL) - return -1; - - memcpy(*tlvs, table->tlvs, table->sizeoftlvs); - *sizeoftlvs = table->sizeoftlvs; - - return 0; - } + if (table == NULL) + return -1; + + *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC); + if (*tlvs == NULL) + return -1; + + memcpy(*tlvs, table->tlvs, table->sizeoftlvs); + *sizeoftlvs = table->sizeoftlvs; + + return 0; + } if (sizeoftlvs == NULL) retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, NULL); - + else { skb = alloc_skb(*sizeoftlvs, GFP_ATOMIC); if (skb == NULL) @@ -1303,9 +1342,8 @@ memcpy(skb->data, *tlvs, *sizeoftlvs); retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb); } - return retval; -} - + return retval; +} /* * LANE2: 3.1.4, LE_ASSOCIATE.request @@ -1314,80 +1352,85 @@ * Returns 1 for success, 0 for failure (out of memory) * */ -static int lane2_associate_req (struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs) +static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, + u8 *tlvs, u32 sizeoftlvs) { - int retval; - struct sk_buff *skb; - struct lec_priv *priv = (struct lec_priv*)dev->priv; - - if (compare_ether_addr(lan_dst, dev->dev_addr)) - return (0); /* not our mac address */ - - kfree(priv->tlvs); /* NULL if there was no previous association */ - - priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); - if (priv->tlvs == NULL) - return (0); - priv->sizeoftlvs = sizeoftlvs; - memcpy(priv->tlvs, tlvs, sizeoftlvs); - - skb = alloc_skb(sizeoftlvs, GFP_ATOMIC); - if (skb == NULL) - return 0; - skb->len = sizeoftlvs; - memcpy(skb->data, tlvs, sizeoftlvs); - retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb); - if (retval != 0) - printk("lec.c: lane2_associate_req() failed\n"); - /* If the previous association has changed we must - * somehow notify other LANE entities about the change - */ - return (1); + int retval; + struct sk_buff *skb; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + + if (compare_ether_addr(lan_dst, dev->dev_addr)) + return (0); /* not our mac address */ + + kfree(priv->tlvs); /* NULL if there was no previous association */ + + priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); + if (priv->tlvs == NULL) + return (0); + priv->sizeoftlvs = sizeoftlvs; + memcpy(priv->tlvs, tlvs, sizeoftlvs); + + skb = alloc_skb(sizeoftlvs, GFP_ATOMIC); + if (skb == NULL) + return 0; + skb->len = sizeoftlvs; + memcpy(skb->data, tlvs, sizeoftlvs); + retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb); + if (retval != 0) + printk("lec.c: lane2_associate_req() failed\n"); + /* + * If the previous association has changed we must + * somehow notify other LANE entities about the change + */ + return (1); } /* * LANE2: 3.1.5, LE_ASSOCIATE.indication * */ -static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs) +static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, + u8 *tlvs, u32 sizeoftlvs) { #if 0 - int i = 0; + int i = 0; #endif struct lec_priv *priv = (struct lec_priv *)dev->priv; -#if 0 /* Why have the TLVs in LE_ARP entries since we do not use them? When you - uncomment this code, make sure the TLVs get freed when entry is killed */ - struct lec_arp_table *entry = lec_arp_find(priv, mac_addr); +#if 0 /* + * Why have the TLVs in LE_ARP entries + * since we do not use them? When you + * uncomment this code, make sure the + * TLVs get freed when entry is killed + */ + struct lec_arp_table *entry = lec_arp_find(priv, mac_addr); - if (entry == NULL) - return; /* should not happen */ + if (entry == NULL) + return; /* should not happen */ - kfree(entry->tlvs); + kfree(entry->tlvs); - entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); - if (entry->tlvs == NULL) - return; + entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); + if (entry->tlvs == NULL) + return; - entry->sizeoftlvs = sizeoftlvs; - memcpy(entry->tlvs, tlvs, sizeoftlvs); + entry->sizeoftlvs = sizeoftlvs; + memcpy(entry->tlvs, tlvs, sizeoftlvs); #endif #if 0 - printk("lec.c: lane2_associate_ind()\n"); - printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs); - while (i < sizeoftlvs) - printk("%02x ", tlvs[i++]); - - printk("\n"); + printk("lec.c: lane2_associate_ind()\n"); + printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs); + while (i < sizeoftlvs) + printk("%02x ", tlvs[i++]); + + printk("\n"); #endif - /* tell MPOA about the TLVs we saw */ - if (priv->lane2_ops && priv->lane2_ops->associate_indicator) { - priv->lane2_ops->associate_indicator(dev, mac_addr, - tlvs, sizeoftlvs); - } - return; + /* tell MPOA about the TLVs we saw */ + if (priv->lane2_ops && priv->lane2_ops->associate_indicator) { + priv->lane2_ops->associate_indicator(dev, mac_addr, + tlvs, sizeoftlvs); + } + return; } /* @@ -1395,7 +1438,6 @@ * * lec_arpc.c was added here when making * lane client modular. October 1997 - * */ #include @@ -1406,7 +1448,6 @@ #include #include - #if 0 #define DPRINTK(format,args...) /* @@ -1417,7 +1458,7 @@ #define LEC_ARP_REFRESH_INTERVAL (3*HZ) -static void lec_arp_check_expire(unsigned long data); +static void lec_arp_check_expire(void *data); static void lec_arp_expire_arp(unsigned long data); /* @@ -1429,474 +1470,397 @@ /* * Initialization of arp-cache */ -static void -lec_arp_init(struct lec_priv *priv) +static void lec_arp_init(struct lec_priv *priv) { - unsigned short i; + unsigned short i; - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - priv->lec_arp_tables[i] = NULL; - } + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); + } + INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); + INIT_HLIST_HEAD(&priv->lec_no_forward); + INIT_HLIST_HEAD(&priv->mcast_fwds); spin_lock_init(&priv->lec_arp_lock); - init_timer(&priv->lec_arp_timer); - priv->lec_arp_timer.expires = jiffies + LEC_ARP_REFRESH_INTERVAL; - priv->lec_arp_timer.data = (unsigned long)priv; - priv->lec_arp_timer.function = lec_arp_check_expire; - add_timer(&priv->lec_arp_timer); + INIT_WORK(&priv->lec_arp_work, lec_arp_check_expire, priv); + schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); } -static void -lec_arp_clear_vccs(struct lec_arp_table *entry) +static void lec_arp_clear_vccs(struct lec_arp_table *entry) { - if (entry->vcc) { + if (entry->vcc) { struct atm_vcc *vcc = entry->vcc; struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - struct net_device *dev = (struct net_device*) vcc->proto_data; + struct net_device *dev = (struct net_device *)vcc->proto_data; - vcc->pop = vpriv->old_pop; + vcc->pop = vpriv->old_pop; if (vpriv->xoff) netif_wake_queue(dev); kfree(vpriv); vcc->user_back = NULL; - vcc->push = entry->old_push; + vcc->push = entry->old_push; vcc_release_async(vcc, -EPIPE); - vcc = NULL; - } - if (entry->recv_vcc) { - entry->recv_vcc->push = entry->old_recv_push; + entry->vcc = NULL; + } + if (entry->recv_vcc) { + entry->recv_vcc->push = entry->old_recv_push; vcc_release_async(entry->recv_vcc, -EPIPE); - entry->recv_vcc = NULL; - } + entry->recv_vcc = NULL; + } } /* * Insert entry to lec_arp_table * LANE2: Add to the end of the list to satisfy 8.1.13 */ -static inline void -lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add) +static inline void +lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) { - unsigned short place; - struct lec_arp_table *tmp; + struct hlist_head *tmp; + + tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])]; + hlist_add_head(&entry->next, tmp); - place = HASH(to_add->mac_addr[ETH_ALEN-1]); - tmp = priv->lec_arp_tables[place]; - to_add->next = NULL; - if (tmp == NULL) - priv->lec_arp_tables[place] = to_add; - - else { /* add to the end */ - while (tmp->next) - tmp = tmp->next; - tmp->next = to_add; - } - - DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", - 0xff&to_add->mac_addr[0], 0xff&to_add->mac_addr[1], - 0xff&to_add->mac_addr[2], 0xff&to_add->mac_addr[3], - 0xff&to_add->mac_addr[4], 0xff&to_add->mac_addr[5]); + DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + 0xff & entry->mac_addr[0], 0xff & entry->mac_addr[1], + 0xff & entry->mac_addr[2], 0xff & entry->mac_addr[3], + 0xff & entry->mac_addr[4], 0xff & entry->mac_addr[5]); } /* * Remove entry from lec_arp_table */ -static int -lec_arp_remove(struct lec_priv *priv, - struct lec_arp_table *to_remove) -{ - unsigned short place; - struct lec_arp_table *tmp; - int remove_vcc=1; - - if (!to_remove) { - return -1; - } - place = HASH(to_remove->mac_addr[ETH_ALEN-1]); - tmp = priv->lec_arp_tables[place]; - if (tmp == to_remove) { - priv->lec_arp_tables[place] = tmp->next; - } else { - while(tmp && tmp->next != to_remove) { - tmp = tmp->next; - } - if (!tmp) {/* Entry was not found */ - return -1; - } - } - tmp->next = to_remove->next; - del_timer(&to_remove->timer); - - /* If this is the only MAC connected to this VCC, also tear down - the VCC */ - if (to_remove->status >= ESI_FLUSH_PENDING) { - /* - * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT - */ - for(place = 0; place < LEC_ARP_TABLE_SIZE; place++) { - for(tmp = priv->lec_arp_tables[place]; tmp != NULL; tmp = tmp->next) { - if (memcmp(tmp->atm_addr, to_remove->atm_addr, - ATM_ESA_LEN)==0) { - remove_vcc=0; - break; - } - } - } - if (remove_vcc) - lec_arp_clear_vccs(to_remove); - } - skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - - DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", - 0xff&to_remove->mac_addr[0], 0xff&to_remove->mac_addr[1], - 0xff&to_remove->mac_addr[2], 0xff&to_remove->mac_addr[3], - 0xff&to_remove->mac_addr[4], 0xff&to_remove->mac_addr[5]); - return 0; +static int +lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) +{ + struct hlist_node *node; + struct lec_arp_table *entry; + int i, remove_vcc = 1; + + if (!to_remove) { + return -1; + } + + hlist_del(&to_remove->next); + del_timer(&to_remove->timer); + + /* If this is the only MAC connected to this VCC, also tear down the VCC */ + if (to_remove->status >= ESI_FLUSH_PENDING) { + /* + * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT + */ + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (memcmp(to_remove->atm_addr, + entry->atm_addr, ATM_ESA_LEN) == 0) { + remove_vcc = 0; + break; + } + } + } + if (remove_vcc) + lec_arp_clear_vccs(to_remove); + } + skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ + + DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + 0xff & to_remove->mac_addr[0], 0xff & to_remove->mac_addr[1], + 0xff & to_remove->mac_addr[2], 0xff & to_remove->mac_addr[3], + 0xff & to_remove->mac_addr[4], 0xff & to_remove->mac_addr[5]); + return 0; } #if DEBUG_ARP_TABLE -static char* -get_status_string(unsigned char st) +static char *get_status_string(unsigned char st) { - switch(st) { - case ESI_UNKNOWN: - return "ESI_UNKNOWN"; - case ESI_ARP_PENDING: - return "ESI_ARP_PENDING"; - case ESI_VC_PENDING: - return "ESI_VC_PENDING"; - case ESI_FLUSH_PENDING: - return "ESI_FLUSH_PENDING"; - case ESI_FORWARD_DIRECT: - return "ESI_FORWARD_DIRECT"; - default: - return ""; - } + switch (st) { + case ESI_UNKNOWN: + return "ESI_UNKNOWN"; + case ESI_ARP_PENDING: + return "ESI_ARP_PENDING"; + case ESI_VC_PENDING: + return "ESI_VC_PENDING"; + case ESI_FLUSH_PENDING: + return "ESI_FLUSH_PENDING"; + case ESI_FORWARD_DIRECT: + return "ESI_FORWARD_DIRECT"; + default: + return ""; + } } -#endif -static void -dump_arp_table(struct lec_priv *priv) -{ -#if DEBUG_ARP_TABLE - int i,j, offset; - struct lec_arp_table *rulla; - char buf[1024]; - struct lec_arp_table **lec_arp_tables = - (struct lec_arp_table **)priv->lec_arp_tables; - struct lec_arp_table *lec_arp_empty_ones = - (struct lec_arp_table *)priv->lec_arp_empty_ones; - struct lec_arp_table *lec_no_forward = - (struct lec_arp_table *)priv->lec_no_forward; - struct lec_arp_table *mcast_fwds = priv->mcast_fwds; - - - printk("Dump %p:\n",priv); - for (i=0;imac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;jatm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%p\n",rulla->next); - rulla = rulla->next; - } - printk("%s",buf); - } - rulla = lec_no_forward; - if (rulla) - printk("No forward\n"); - while(rulla) { - offset=0; - offset += sprintf(buf+offset,"Mac:"); - for(j=0;jmac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;jatm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next); - rulla = rulla->next; - printk("%s",buf); - } - rulla = lec_arp_empty_ones; - if (rulla) - printk("Empty ones\n"); - while(rulla) { - offset=0; - offset += sprintf(buf+offset,"Mac:"); - for(j=0;jmac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;jatm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next); - rulla = rulla->next; - printk("%s",buf); - } - - rulla = mcast_fwds; - if (rulla) - printk("Multicast Forward VCCs\n"); - while(rulla) { - offset=0; - offset += sprintf(buf+offset,"Mac:"); - for(j=0;jmac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;jatm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next); - rulla = rulla->next; - printk("%s",buf); - } +static void dump_arp_table(struct lec_priv *priv) +{ + struct hlist_node *node; + struct lec_arp_table *rulla; + char buf[256]; + int i, j, offset; + + printk("Dump %p:\n", priv); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(rulla, node, &priv->lec_arp_tables[i], next) { + offset = 0; + offset += sprintf(buf, "%d: %p\n", i, rulla); + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, + "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, + "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc-> + vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc-> + vci : 0, rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += + sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s\n", buf); + } + } + + if (!hlist_empty(&priv->lec_no_forward)) + printk("No forward\n"); + hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) { + offset = 0; + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc->vci : 0, + rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s\n", buf); + } + + if (!hlist_empty(&priv->lec_arp_empty_ones)) + printk("Empty ones\n"); + hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) { + offset = 0; + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc->vci : 0, + rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s", buf); + } + + if (!hlist_empty(&priv->mcast_fwds)) + printk("Multicast Forward VCCs\n"); + hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) { + offset = 0; + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc->vci : 0, + rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s\n", buf); + } -#endif } +#else +#define dump_arp_table(priv) do { } while (0) +#endif /* * Destruction of arp-cache */ -static void -lec_arp_destroy(struct lec_priv *priv) +static void lec_arp_destroy(struct lec_priv *priv) { unsigned long flags; - struct lec_arp_table *entry, *next; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + int i; + + cancel_rearming_delayed_work(&priv->lec_arp_work); - del_timer_sync(&priv->lec_arp_timer); - - /* - * Remove all entries - */ + /* + * Remove all entries + */ spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(entry = priv->lec_arp_tables[i]; entry != NULL; entry=next) { - next = entry->next; - lec_arp_remove(priv, entry); - kfree(entry); - } - } - entry = priv->lec_arp_empty_ones; - while(entry) { - next = entry->next; - del_timer_sync(&entry->timer); - lec_arp_clear_vccs(entry); - kfree(entry); - entry = next; - } - priv->lec_arp_empty_ones = NULL; - entry = priv->lec_no_forward; - while(entry) { - next = entry->next; - del_timer_sync(&entry->timer); - lec_arp_clear_vccs(entry); - kfree(entry); - entry = next; - } - priv->lec_no_forward = NULL; - entry = priv->mcast_fwds; - while(entry) { - next = entry->next; - /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ - lec_arp_clear_vccs(entry); - kfree(entry); - entry = next; - } - priv->mcast_fwds = NULL; - priv->mcast_vcc = NULL; - memset(priv->lec_arp_tables, 0, - sizeof(struct lec_arp_table *) * LEC_ARP_TABLE_SIZE); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + lec_arp_remove(priv, entry); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); + } + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + del_timer_sync(&entry->timer); + lec_arp_clear_vccs(entry); + hlist_del(&entry->next); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) { + del_timer_sync(&entry->timer); + lec_arp_clear_vccs(entry); + hlist_del(&entry->next); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->lec_no_forward); + + hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ + lec_arp_clear_vccs(entry); + hlist_del(&entry->next); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->mcast_fwds); + priv->mcast_vcc = NULL; spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } - /* * Find entry by mac_address */ -static struct lec_arp_table* -lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr) -{ - unsigned short place; - struct lec_arp_table *to_return; - - DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", - mac_addr[0]&0xff, mac_addr[1]&0xff, mac_addr[2]&0xff, - mac_addr[3]&0xff, mac_addr[4]&0xff, mac_addr[5]&0xff); - place = HASH(mac_addr[ETH_ALEN-1]); - - to_return = priv->lec_arp_tables[place]; - while(to_return) { - if (!compare_ether_addr(mac_addr, to_return->mac_addr)) { - return to_return; - } - to_return = to_return->next; - } - return NULL; -} - -static struct lec_arp_table* -make_entry(struct lec_priv *priv, unsigned char *mac_addr) -{ - struct lec_arp_table *to_return; - - to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC); - if (!to_return) { - printk("LEC: Arp entry kmalloc failed\n"); - return NULL; - } - memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); - init_timer(&to_return->timer); - to_return->timer.function = lec_arp_expire_arp; - to_return->timer.data = (unsigned long) to_return; - to_return->last_used = jiffies; - to_return->priv = priv; - skb_queue_head_init(&to_return->tx_wait); - return to_return; +static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, + unsigned char *mac_addr) +{ + struct hlist_node *node; + struct hlist_head *head; + struct lec_arp_table *entry; + + DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + mac_addr[0] & 0xff, mac_addr[1] & 0xff, mac_addr[2] & 0xff, + mac_addr[3] & 0xff, mac_addr[4] & 0xff, mac_addr[5] & 0xff); + + head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])]; + hlist_for_each_entry(entry, node, head, next) { + if (!compare_ether_addr(mac_addr, entry->mac_addr)) { + return entry; + } + } + return NULL; } -/* - * - * Arp sent timer expired - * - */ -static void -lec_arp_expire_arp(unsigned long data) +static struct lec_arp_table *make_entry(struct lec_priv *priv, + unsigned char *mac_addr) +{ + struct lec_arp_table *to_return; + + to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC); + if (!to_return) { + printk("LEC: Arp entry kmalloc failed\n"); + return NULL; + } + memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); + INIT_HLIST_NODE(&to_return->next); + init_timer(&to_return->timer); + to_return->timer.function = lec_arp_expire_arp; + to_return->timer.data = (unsigned long)to_return; + to_return->last_used = jiffies; + to_return->priv = priv; + skb_queue_head_init(&to_return->tx_wait); + atomic_set(&to_return->usage, 1); + return to_return; +} + +/* Arp sent timer expired */ +static void lec_arp_expire_arp(unsigned long data) { - struct lec_arp_table *entry; + struct lec_arp_table *entry; - entry = (struct lec_arp_table *)data; + entry = (struct lec_arp_table *)data; - DPRINTK("lec_arp_expire_arp\n"); - if (entry->status == ESI_ARP_PENDING) { - if (entry->no_tries <= entry->priv->max_retry_count) { - if (entry->is_rdesc) - send_to_lecd(entry->priv, l_rdesc_arp_xmt, entry->mac_addr, NULL, NULL); - else - send_to_lecd(entry->priv, l_arp_xmt, entry->mac_addr, NULL, NULL); - entry->no_tries++; - } - mod_timer(&entry->timer, jiffies + (1*HZ)); - } + DPRINTK("lec_arp_expire_arp\n"); + if (entry->status == ESI_ARP_PENDING) { + if (entry->no_tries <= entry->priv->max_retry_count) { + if (entry->is_rdesc) + send_to_lecd(entry->priv, l_rdesc_arp_xmt, + entry->mac_addr, NULL, NULL); + else + send_to_lecd(entry->priv, l_arp_xmt, + entry->mac_addr, NULL, NULL); + entry->no_tries++; + } + mod_timer(&entry->timer, jiffies + (1 * HZ)); + } } -/* - * - * Unknown/unused vcc expire, remove associated entry - * - */ -static void -lec_arp_expire_vcc(unsigned long data) +/* Unknown/unused vcc expire, remove associated entry */ +static void lec_arp_expire_vcc(unsigned long data) { unsigned long flags; - struct lec_arp_table *to_remove = (struct lec_arp_table*)data; - struct lec_priv *priv = (struct lec_priv *)to_remove->priv; - struct lec_arp_table *entry = NULL; - - del_timer(&to_remove->timer); - - DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", - to_remove, priv, - to_remove->vcc?to_remove->recv_vcc->vpi:0, - to_remove->vcc?to_remove->recv_vcc->vci:0); - DPRINTK("eo:%p nf:%p\n",priv->lec_arp_empty_ones,priv->lec_no_forward); + struct lec_arp_table *to_remove = (struct lec_arp_table *)data; + struct lec_priv *priv = (struct lec_priv *)to_remove->priv; + + del_timer(&to_remove->timer); + + DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", + to_remove, priv, + to_remove->vcc ? to_remove->recv_vcc->vpi : 0, + to_remove->vcc ? to_remove->recv_vcc->vci : 0); spin_lock_irqsave(&priv->lec_arp_lock, flags); - if (to_remove == priv->lec_arp_empty_ones) - priv->lec_arp_empty_ones = to_remove->next; - else { - entry = priv->lec_arp_empty_ones; - while (entry && entry->next != to_remove) - entry = entry->next; - if (entry) - entry->next = to_remove->next; - } - if (!entry) { - if (to_remove == priv->lec_no_forward) { - priv->lec_no_forward = to_remove->next; - } else { - entry = priv->lec_no_forward; - while (entry && entry->next != to_remove) - entry = entry->next; - if (entry) - entry->next = to_remove->next; - } - } + hlist_del(&to_remove->next); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - lec_arp_clear_vccs(to_remove); - kfree(to_remove); + lec_arp_clear_vccs(to_remove); + lec_arp_put(to_remove); } /* @@ -1915,158 +1879,171 @@ * to ESI_FORWARD_DIRECT. This causes the flush period to end * regardless of the progress of the flush protocol. */ -static void -lec_arp_check_expire(unsigned long data) +static void lec_arp_check_expire(void *data) { unsigned long flags; - struct lec_priv *priv = (struct lec_priv *)data; - struct lec_arp_table *entry, *next; - unsigned long now; - unsigned long time_to_check; - int i; - - DPRINTK("lec_arp_check_expire %p\n",priv); - DPRINTK("expire: eo:%p nf:%p\n",priv->lec_arp_empty_ones, - priv->lec_no_forward); + struct lec_priv *priv = data; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + unsigned long now; + unsigned long time_to_check; + int i; + + DPRINTK("lec_arp_check_expire %p\n", priv); now = jiffies; +restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); - for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(entry = priv->lec_arp_tables[i]; entry != NULL; ) { - if ((entry->flags) & LEC_REMOTE_FLAG && + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + if ((entry->flags) & LEC_REMOTE_FLAG && priv->topology_change) time_to_check = priv->forward_delay_time; else time_to_check = priv->aging_time; DPRINTK("About to expire: %lx - %lx > %lx\n", - now,entry->last_used, time_to_check); - if( time_after(now, entry->last_used+ - time_to_check) && - !(entry->flags & LEC_PERMANENT_FLAG) && - !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */ + now, entry->last_used, time_to_check); + if (time_after(now, entry->last_used + time_to_check) + && !(entry->flags & LEC_PERMANENT_FLAG) + && !(entry->mac_addr[0] & 0x01)) { /* LANE2: 7.1.20 */ /* Remove entry */ DPRINTK("LEC:Entry timed out\n"); - next = entry->next; lec_arp_remove(priv, entry); - kfree(entry); - entry = next; + lec_arp_put(entry); } else { /* Something else */ if ((entry->status == ESI_VC_PENDING || - entry->status == ESI_ARP_PENDING) + entry->status == ESI_ARP_PENDING) && time_after_eq(now, - entry->timestamp + - priv->max_unknown_frame_time)) { + entry->timestamp + + priv-> + max_unknown_frame_time)) { entry->timestamp = jiffies; entry->packets_flooded = 0; if (entry->status == ESI_VC_PENDING) - send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL); + send_to_lecd(priv, l_svc_setup, + entry->mac_addr, + entry->atm_addr, + NULL); } - if (entry->status == ESI_FLUSH_PENDING - && - time_after_eq(now, entry->timestamp+ - priv->path_switching_delay)) { + if (entry->status == ESI_FLUSH_PENDING + && + time_after_eq(now, entry->timestamp + + priv->path_switching_delay)) { struct sk_buff *skb; + struct atm_vcc *vcc = entry->vcc; + lec_arp_hold(entry); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) - lec_send(entry->vcc, skb, entry->priv); + lec_send(vcc, skb, entry->priv); entry->last_used = jiffies; - entry->status = - ESI_FORWARD_DIRECT; + entry->status = ESI_FORWARD_DIRECT; + lec_arp_put(entry); + goto restart; } - entry = entry->next; } } } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - mod_timer(&priv->lec_arp_timer, jiffies + LEC_ARP_REFRESH_INTERVAL); + schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); } + /* * Try to find vcc where mac_address is attached. * */ -static struct atm_vcc* -lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, - int is_rdesc, struct lec_arp_table **ret_entry) +static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, + unsigned char *mac_to_find, int is_rdesc, + struct lec_arp_table **ret_entry) { unsigned long flags; - struct lec_arp_table *entry; + struct lec_arp_table *entry; struct atm_vcc *found; - if (mac_to_find[0] & 0x01) { - switch (priv->lane_version) { - case 1: - return priv->mcast_vcc; - break; - case 2: /* LANE2 wants arp for multicast addresses */ - if (!compare_ether_addr(mac_to_find, bus_mac)) - return priv->mcast_vcc; - break; - default: - break; - } - } + if (mac_to_find[0] & 0x01) { + switch (priv->lane_version) { + case 1: + return priv->mcast_vcc; + break; + case 2: /* LANE2 wants arp for multicast addresses */ + if (!compare_ether_addr(mac_to_find, bus_mac)) + return priv->mcast_vcc; + break; + default: + break; + } + } spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mac_to_find); - - if (entry) { - if (entry->status == ESI_FORWARD_DIRECT) { - /* Connection Ok */ - entry->last_used = jiffies; - *ret_entry = entry; - found = entry->vcc; + entry = lec_arp_find(priv, mac_to_find); + + if (entry) { + if (entry->status == ESI_FORWARD_DIRECT) { + /* Connection Ok */ + entry->last_used = jiffies; + lec_arp_hold(entry); + *ret_entry = entry; + found = entry->vcc; goto out; - } - /* If the LE_ARP cache entry is still pending, reset count to 0 + } + /* + * If the LE_ARP cache entry is still pending, reset count to 0 * so another LE_ARP request can be made for this frame. */ if (entry->status == ESI_ARP_PENDING) { entry->no_tries = 0; } - /* Data direct VC not yet set up, check to see if the unknown - frame count is greater than the limit. If the limit has - not been reached, allow the caller to send packet to - BUS. */ - if (entry->status != ESI_FLUSH_PENDING && - entry->packets_floodedmaximum_unknown_frame_count) { - entry->packets_flooded++; - DPRINTK("LEC_ARP: Flooding..\n"); - found = priv->mcast_vcc; + /* + * Data direct VC not yet set up, check to see if the unknown + * frame count is greater than the limit. If the limit has + * not been reached, allow the caller to send packet to + * BUS. + */ + if (entry->status != ESI_FLUSH_PENDING && + entry->packets_flooded < + priv->maximum_unknown_frame_count) { + entry->packets_flooded++; + DPRINTK("LEC_ARP: Flooding..\n"); + found = priv->mcast_vcc; goto out; - } - /* We got here because entry->status == ESI_FLUSH_PENDING + } + /* + * We got here because entry->status == ESI_FLUSH_PENDING * or BUS flood limit was reached for an entry which is * in ESI_ARP_PENDING or ESI_VC_PENDING state. */ - *ret_entry = entry; - DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc); - found = NULL; - } else { - /* No matching entry was found */ - entry = make_entry(priv, mac_to_find); - DPRINTK("LEC_ARP: Making entry\n"); - if (!entry) { - found = priv->mcast_vcc; + lec_arp_hold(entry); + *ret_entry = entry; + DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, + entry->vcc); + found = NULL; + } else { + /* No matching entry was found */ + entry = make_entry(priv, mac_to_find); + DPRINTK("LEC_ARP: Making entry\n"); + if (!entry) { + found = priv->mcast_vcc; goto out; - } - lec_arp_add(priv, entry); - /* We want arp-request(s) to be sent */ - entry->packets_flooded =1; - entry->status = ESI_ARP_PENDING; - entry->no_tries = 1; - entry->last_used = entry->timestamp = jiffies; - entry->is_rdesc = is_rdesc; - if (entry->is_rdesc) - send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL, NULL); - else - send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); - entry->timer.expires = jiffies + (1*HZ); - entry->timer.function = lec_arp_expire_arp; - add_timer(&entry->timer); - found = priv->mcast_vcc; - } + } + lec_arp_add(priv, entry); + /* We want arp-request(s) to be sent */ + entry->packets_flooded = 1; + entry->status = ESI_ARP_PENDING; + entry->no_tries = 1; + entry->last_used = entry->timestamp = jiffies; + entry->is_rdesc = is_rdesc; + if (entry->is_rdesc) + send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL, + NULL); + else + send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); + entry->timer.expires = jiffies + (1 * HZ); + entry->timer.function = lec_arp_expire_arp; + add_timer(&entry->timer); + found = priv->mcast_vcc; + } out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2074,30 +2051,30 @@ } static int -lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, - unsigned long permanent) +lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, + unsigned long permanent) { unsigned long flags; - struct lec_arp_table *entry, *next; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + int i; - DPRINTK("lec_addr_delete\n"); + DPRINTK("lec_addr_delete\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); - for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(entry = priv->lec_arp_tables[i]; entry != NULL; entry = next) { - next = entry->next; - if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) - && (permanent || - !(entry->flags & LEC_PERMANENT_FLAG))) { + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) + && (permanent || + !(entry->flags & LEC_PERMANENT_FLAG))) { lec_arp_remove(priv, entry); - kfree(entry); - } + lec_arp_put(entry); + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return 0; - } - } + return 0; + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return -1; + return -1; } /* @@ -2105,109 +2082,98 @@ */ static void lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, - unsigned char *atm_addr, unsigned long remoteflag, - unsigned int targetless_le_arp) + unsigned char *atm_addr, unsigned long remoteflag, + unsigned int targetless_le_arp) { unsigned long flags; - struct lec_arp_table *entry, *tmp; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry, *tmp; + int i; - DPRINTK("lec:%s", (targetless_le_arp) ? "targetless ": " "); - DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", - mac_addr[0],mac_addr[1],mac_addr[2],mac_addr[3], - mac_addr[4],mac_addr[5]); + DPRINTK("lec:%s", (targetless_le_arp) ? "targetless " : " "); + DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], + mac_addr[4], mac_addr[5]); spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mac_addr); - if (entry == NULL && targetless_le_arp) - goto out; /* LANE2: ignore targetless LE_ARPs for which - * we have no entry in the cache. 7.1.30 - */ - if (priv->lec_arp_empty_ones) { - entry = priv->lec_arp_empty_ones; - if (!memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN)) { - priv->lec_arp_empty_ones = entry->next; - } else { - while(entry->next && memcmp(entry->next->atm_addr, - atm_addr, ATM_ESA_LEN)) - entry = entry->next; - if (entry->next) { - tmp = entry; - entry = entry->next; - tmp->next = entry->next; - } else - entry = NULL; - - } - if (entry) { - del_timer(&entry->timer); - tmp = lec_arp_find(priv, mac_addr); - if (tmp) { - del_timer(&tmp->timer); - tmp->status = ESI_FORWARD_DIRECT; - memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN); - tmp->vcc = entry->vcc; - tmp->old_push = entry->old_push; - tmp->last_used = jiffies; - del_timer(&entry->timer); - kfree(entry); - entry=tmp; - } else { - entry->status = ESI_FORWARD_DIRECT; - memcpy(entry->mac_addr, mac_addr, ETH_ALEN); - entry->last_used = jiffies; - lec_arp_add(priv, entry); - } - if (remoteflag) - entry->flags|=LEC_REMOTE_FLAG; - else - entry->flags&=~LEC_REMOTE_FLAG; - DPRINTK("After update\n"); - dump_arp_table(priv); - goto out; - } - } - entry = lec_arp_find(priv, mac_addr); - if (!entry) { - entry = make_entry(priv, mac_addr); - if (!entry) + entry = lec_arp_find(priv, mac_addr); + if (entry == NULL && targetless_le_arp) + goto out; /* + * LANE2: ignore targetless LE_ARPs for which + * we have no entry in the cache. 7.1.30 + */ + if (!hlist_empty(&priv->lec_arp_empty_ones)) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) { + hlist_del(&entry->next); + del_timer(&entry->timer); + tmp = lec_arp_find(priv, mac_addr); + if (tmp) { + del_timer(&tmp->timer); + tmp->status = ESI_FORWARD_DIRECT; + memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN); + tmp->vcc = entry->vcc; + tmp->old_push = entry->old_push; + tmp->last_used = jiffies; + del_timer(&entry->timer); + lec_arp_put(entry); + entry = tmp; + } else { + entry->status = ESI_FORWARD_DIRECT; + memcpy(entry->mac_addr, mac_addr, ETH_ALEN); + entry->last_used = jiffies; + lec_arp_add(priv, entry); + } + if (remoteflag) + entry->flags |= LEC_REMOTE_FLAG; + else + entry->flags &= ~LEC_REMOTE_FLAG; + DPRINTK("After update\n"); + dump_arp_table(priv); + goto out; + } + } + } + + entry = lec_arp_find(priv, mac_addr); + if (!entry) { + entry = make_entry(priv, mac_addr); + if (!entry) goto out; - entry->status = ESI_UNKNOWN; - lec_arp_add(priv, entry); - /* Temporary, changes before end of function */ - } - memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); - del_timer(&entry->timer); - for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(tmp = priv->lec_arp_tables[i]; tmp; tmp=tmp->next) { - if (entry != tmp && - !memcmp(tmp->atm_addr, atm_addr, - ATM_ESA_LEN)) { - /* Vcc to this host exists */ - if (tmp->status > ESI_VC_PENDING) { - /* - * ESI_FLUSH_PENDING, - * ESI_FORWARD_DIRECT - */ - entry->vcc = tmp->vcc; - entry->old_push=tmp->old_push; - } - entry->status=tmp->status; - break; - } - } - } - if (remoteflag) - entry->flags|=LEC_REMOTE_FLAG; - else - entry->flags&=~LEC_REMOTE_FLAG; - if (entry->status == ESI_ARP_PENDING || - entry->status == ESI_UNKNOWN) { - entry->status = ESI_VC_PENDING; - send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); - } - DPRINTK("After update2\n"); - dump_arp_table(priv); + entry->status = ESI_UNKNOWN; + lec_arp_add(priv, entry); + /* Temporary, changes before end of function */ + } + memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); + del_timer(&entry->timer); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(tmp, node, &priv->lec_arp_tables[i], next) { + if (entry != tmp && + !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) { + /* Vcc to this host exists */ + if (tmp->status > ESI_VC_PENDING) { + /* + * ESI_FLUSH_PENDING, + * ESI_FORWARD_DIRECT + */ + entry->vcc = tmp->vcc; + entry->old_push = tmp->old_push; + } + entry->status = tmp->status; + break; + } + } + } + if (remoteflag) + entry->flags |= LEC_REMOTE_FLAG; + else + entry->flags &= ~LEC_REMOTE_FLAG; + if (entry->status == ESI_ARP_PENDING || entry->status == ESI_UNKNOWN) { + entry->status = ESI_VC_PENDING; + send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); + } + DPRINTK("After update2\n"); + dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } @@ -2217,299 +2183,299 @@ */ static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, - struct atm_vcc *vcc, - void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb)) + struct atm_vcc *vcc, + void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) { unsigned long flags; - struct lec_arp_table *entry; - int i, found_entry=0; + struct hlist_node *node; + struct lec_arp_table *entry; + int i, found_entry = 0; spin_lock_irqsave(&priv->lec_arp_lock, flags); - if (ioc_data->receive == 2) { - /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ + if (ioc_data->receive == 2) { + /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ - DPRINTK("LEC_ARP: Attaching mcast forward\n"); + DPRINTK("LEC_ARP: Attaching mcast forward\n"); #if 0 - entry = lec_arp_find(priv, bus_mac); - if (!entry) { - printk("LEC_ARP: Multicast entry not found!\n"); + entry = lec_arp_find(priv, bus_mac); + if (!entry) { + printk("LEC_ARP: Multicast entry not found!\n"); goto out; - } - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; + } + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + entry->recv_vcc = vcc; + entry->old_recv_push = old_push; #endif - entry = make_entry(priv, bus_mac); - if (entry == NULL) + entry = make_entry(priv, bus_mac); + if (entry == NULL) goto out; - del_timer(&entry->timer); - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; - entry->next = priv->mcast_fwds; - priv->mcast_fwds = entry; - goto out; - } else if (ioc_data->receive == 1) { - /* Vcc which we don't want to make default vcc, attach it - anyway. */ - DPRINTK("LEC_ARP:Attaching data direct, not default :%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", - ioc_data->atm_addr[0],ioc_data->atm_addr[1], - ioc_data->atm_addr[2],ioc_data->atm_addr[3], - ioc_data->atm_addr[4],ioc_data->atm_addr[5], - ioc_data->atm_addr[6],ioc_data->atm_addr[7], - ioc_data->atm_addr[8],ioc_data->atm_addr[9], - ioc_data->atm_addr[10],ioc_data->atm_addr[11], - ioc_data->atm_addr[12],ioc_data->atm_addr[13], - ioc_data->atm_addr[14],ioc_data->atm_addr[15], - ioc_data->atm_addr[16],ioc_data->atm_addr[17], - ioc_data->atm_addr[18],ioc_data->atm_addr[19]); - entry = make_entry(priv, bus_mac); - if (entry == NULL) + del_timer(&entry->timer); + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + entry->recv_vcc = vcc; + entry->old_recv_push = old_push; + hlist_add_head(&entry->next, &priv->mcast_fwds); + goto out; + } else if (ioc_data->receive == 1) { + /* + * Vcc which we don't want to make default vcc, + * attach it anyway. + */ + DPRINTK + ("LEC_ARP:Attaching data direct, not default: " + "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + ioc_data->atm_addr[0], ioc_data->atm_addr[1], + ioc_data->atm_addr[2], ioc_data->atm_addr[3], + ioc_data->atm_addr[4], ioc_data->atm_addr[5], + ioc_data->atm_addr[6], ioc_data->atm_addr[7], + ioc_data->atm_addr[8], ioc_data->atm_addr[9], + ioc_data->atm_addr[10], ioc_data->atm_addr[11], + ioc_data->atm_addr[12], ioc_data->atm_addr[13], + ioc_data->atm_addr[14], ioc_data->atm_addr[15], + ioc_data->atm_addr[16], ioc_data->atm_addr[17], + ioc_data->atm_addr[18], ioc_data->atm_addr[19]); + entry = make_entry(priv, bus_mac); + if (entry == NULL) goto out; - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; - entry->status = ESI_UNKNOWN; - entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; - add_timer(&entry->timer); - entry->next = priv->lec_no_forward; - priv->lec_no_forward = entry; + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + memset(entry->mac_addr, 0, ETH_ALEN); + entry->recv_vcc = vcc; + entry->old_recv_push = old_push; + entry->status = ESI_UNKNOWN; + entry->timer.expires = jiffies + priv->vcc_timeout_period; + entry->timer.function = lec_arp_expire_vcc; + hlist_add_head(&entry->next, &priv->lec_no_forward); + add_timer(&entry->timer); dump_arp_table(priv); goto out; - } - DPRINTK("LEC_ARP:Attaching data direct, default:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", - ioc_data->atm_addr[0],ioc_data->atm_addr[1], - ioc_data->atm_addr[2],ioc_data->atm_addr[3], - ioc_data->atm_addr[4],ioc_data->atm_addr[5], - ioc_data->atm_addr[6],ioc_data->atm_addr[7], - ioc_data->atm_addr[8],ioc_data->atm_addr[9], - ioc_data->atm_addr[10],ioc_data->atm_addr[11], - ioc_data->atm_addr[12],ioc_data->atm_addr[13], - ioc_data->atm_addr[14],ioc_data->atm_addr[15], - ioc_data->atm_addr[16],ioc_data->atm_addr[17], - ioc_data->atm_addr[18],ioc_data->atm_addr[19]); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) { - if (memcmp(ioc_data->atm_addr, entry->atm_addr, - ATM_ESA_LEN)==0) { - DPRINTK("LEC_ARP: Attaching data direct\n"); - DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n", - entry->vcc?entry->vcc->vci:0, - entry->recv_vcc?entry->recv_vcc->vci:0); - found_entry=1; - del_timer(&entry->timer); - entry->vcc = vcc; - entry->old_push = old_push; - if (entry->status == ESI_VC_PENDING) { - if(priv->maximum_unknown_frame_count - ==0) - entry->status = - ESI_FORWARD_DIRECT; - else { - entry->timestamp = jiffies; - entry->status = - ESI_FLUSH_PENDING; + } + DPRINTK + ("LEC_ARP:Attaching data direct, default: " + "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + ioc_data->atm_addr[0], ioc_data->atm_addr[1], + ioc_data->atm_addr[2], ioc_data->atm_addr[3], + ioc_data->atm_addr[4], ioc_data->atm_addr[5], + ioc_data->atm_addr[6], ioc_data->atm_addr[7], + ioc_data->atm_addr[8], ioc_data->atm_addr[9], + ioc_data->atm_addr[10], ioc_data->atm_addr[11], + ioc_data->atm_addr[12], ioc_data->atm_addr[13], + ioc_data->atm_addr[14], ioc_data->atm_addr[15], + ioc_data->atm_addr[16], ioc_data->atm_addr[17], + ioc_data->atm_addr[18], ioc_data->atm_addr[19]); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (memcmp + (ioc_data->atm_addr, entry->atm_addr, + ATM_ESA_LEN) == 0) { + DPRINTK("LEC_ARP: Attaching data direct\n"); + DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n", + entry->vcc ? entry->vcc->vci : 0, + entry->recv_vcc ? entry->recv_vcc-> + vci : 0); + found_entry = 1; + del_timer(&entry->timer); + entry->vcc = vcc; + entry->old_push = old_push; + if (entry->status == ESI_VC_PENDING) { + if (priv->maximum_unknown_frame_count + == 0) + entry->status = + ESI_FORWARD_DIRECT; + else { + entry->timestamp = jiffies; + entry->status = + ESI_FLUSH_PENDING; #if 0 - send_to_lecd(priv,l_flush_xmt, - NULL, - entry->atm_addr, - NULL); -#endif - } - } else { - /* They were forming a connection - to us, and we to them. Our - ATM address is numerically lower - than theirs, so we make connection - we formed into default VCC (8.1.11). - Connection they made gets torn - down. This might confuse some - clients. Can be changed if - someone reports trouble... */ - ; - } - } - } - } - if (found_entry) { - DPRINTK("After vcc was added\n"); - dump_arp_table(priv); + send_to_lecd(priv, l_flush_xmt, + NULL, + entry->atm_addr, + NULL); +#endif + } + } else { + /* + * They were forming a connection + * to us, and we to them. Our + * ATM address is numerically lower + * than theirs, so we make connection + * we formed into default VCC (8.1.11). + * Connection they made gets torn + * down. This might confuse some + * clients. Can be changed if + * someone reports trouble... + */ + ; + } + } + } + } + if (found_entry) { + DPRINTK("After vcc was added\n"); + dump_arp_table(priv); goto out; - } - /* Not found, snatch address from first data packet that arrives from - this vcc */ - entry = make_entry(priv, bus_mac); - if (!entry) + } + /* + * Not found, snatch address from first data packet that arrives + * from this vcc + */ + entry = make_entry(priv, bus_mac); + if (!entry) goto out; - entry->vcc = vcc; - entry->old_push = old_push; - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); - entry->status = ESI_UNKNOWN; - entry->next = priv->lec_arp_empty_ones; - priv->lec_arp_empty_ones = entry; - entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; - add_timer(&entry->timer); - DPRINTK("After vcc was added\n"); + entry->vcc = vcc; + entry->old_push = old_push; + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + memset(entry->mac_addr, 0, ETH_ALEN); + entry->status = ESI_UNKNOWN; + hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); + entry->timer.expires = jiffies + priv->vcc_timeout_period; + entry->timer.function = lec_arp_expire_vcc; + add_timer(&entry->timer); + DPRINTK("After vcc was added\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } -static void -lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) +static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) { unsigned long flags; - struct lec_arp_table *entry; - int i; - - DPRINTK("LEC:lec_flush_complete %lx\n",tran_id); + struct hlist_node *node; + struct lec_arp_table *entry; + int i; + + DPRINTK("LEC:lec_flush_complete %lx\n", tran_id); +restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) { - if (entry->flush_tran_id == tran_id && - entry->status == ESI_FLUSH_PENDING) { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) - lec_send(entry->vcc, skb, entry->priv); - entry->status = ESI_FORWARD_DIRECT; - DPRINTK("LEC_ARP: Flushed\n"); - } - } - } + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (entry->flush_tran_id == tran_id + && entry->status == ESI_FLUSH_PENDING) { + struct sk_buff *skb; + struct atm_vcc *vcc = entry->vcc; + + lec_arp_hold(entry); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) + lec_send(vcc, skb, entry->priv); + entry->last_used = jiffies; + entry->status = ESI_FORWARD_DIRECT; + lec_arp_put(entry); + DPRINTK("LEC_ARP: Flushed\n"); + goto restart; + } + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - dump_arp_table(priv); + dump_arp_table(priv); } static void lec_set_flush_tran_id(struct lec_priv *priv, - unsigned char *atm_addr, unsigned long tran_id) + unsigned char *atm_addr, unsigned long tran_id) { unsigned long flags; - struct lec_arp_table *entry; - int i; + struct hlist_node *node; + struct lec_arp_table *entry; + int i; spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) - for(entry = priv->lec_arp_tables[i]; entry; entry=entry->next) - if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { - entry->flush_tran_id = tran_id; - DPRINTK("Set flush transaction id to %lx for %p\n",tran_id,entry); - } + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { + entry->flush_tran_id = tran_id; + DPRINTK("Set flush transaction id to %lx for %p\n", + tran_id, entry); + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } -static int -lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) +static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) { unsigned long flags; - unsigned char mac_addr[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - struct lec_arp_table *to_add; + unsigned char mac_addr[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + struct lec_arp_table *to_add; struct lec_vcc_priv *vpriv; int err = 0; - + if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL))) return -ENOMEM; vpriv->xoff = 0; vpriv->old_pop = vcc->pop; vcc->user_back = vpriv; - vcc->pop = lec_pop; + vcc->pop = lec_pop; spin_lock_irqsave(&priv->lec_arp_lock, flags); - to_add = make_entry(priv, mac_addr); - if (!to_add) { + to_add = make_entry(priv, mac_addr); + if (!to_add) { vcc->pop = vpriv->old_pop; kfree(vpriv); - err = -ENOMEM; + err = -ENOMEM; goto out; - } - memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN); - to_add->status = ESI_FORWARD_DIRECT; - to_add->flags |= LEC_PERMANENT_FLAG; - to_add->vcc = vcc; - to_add->old_push = vcc->push; - vcc->push = lec_push; - priv->mcast_vcc = vcc; - lec_arp_add(priv, to_add); + } + memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN); + to_add->status = ESI_FORWARD_DIRECT; + to_add->flags |= LEC_PERMANENT_FLAG; + to_add->vcc = vcc; + to_add->old_push = vcc->push; + vcc->push = lec_push; + priv->mcast_vcc = vcc; + lec_arp_add(priv, to_add); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return err; + return err; } -static void -lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) +static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) { unsigned long flags; - struct lec_arp_table *entry, *next; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + int i; + + DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); + dump_arp_table(priv); - DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n",vcc->vpi,vcc->vci); - dump_arp_table(priv); spin_lock_irqsave(&priv->lec_arp_lock, flags); - for(i=0;ilec_arp_tables[i];entry; entry=next) { - next = entry->next; - if (vcc == entry->vcc) { - lec_arp_remove(priv, entry); - kfree(entry); - if (priv->mcast_vcc == vcc) { - priv->mcast_vcc = NULL; - } - } - } - } - - entry = priv->lec_arp_empty_ones; - priv->lec_arp_empty_ones = NULL; - while (entry != NULL) { - next = entry->next; - if (entry->vcc == vcc) { /* leave it out from the list */ - lec_arp_clear_vccs(entry); - del_timer(&entry->timer); - kfree(entry); - } - else { /* put it back to the list */ - entry->next = priv->lec_arp_empty_ones; - priv->lec_arp_empty_ones = entry; - } - entry = next; - } - - entry = priv->lec_no_forward; - priv->lec_no_forward = NULL; - while (entry != NULL) { - next = entry->next; - if (entry->recv_vcc == vcc) { - lec_arp_clear_vccs(entry); - del_timer(&entry->timer); - kfree(entry); - } - else { - entry->next = priv->lec_no_forward; - priv->lec_no_forward = entry; - } - entry = next; - } - - entry = priv->mcast_fwds; - priv->mcast_fwds = NULL; - while (entry != NULL) { - next = entry->next; - if (entry->recv_vcc == vcc) { - lec_arp_clear_vccs(entry); - /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ - kfree(entry); - } - else { - entry->next = priv->mcast_fwds; - priv->mcast_fwds = entry; - } - entry = next; - } + + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + if (vcc == entry->vcc) { + lec_arp_remove(priv, entry); + lec_arp_put(entry); + if (priv->mcast_vcc == vcc) { + priv->mcast_vcc = NULL; + } + } + } + } + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + if (entry->vcc == vcc) { + lec_arp_clear_vccs(entry); + del_timer(&entry->timer); + hlist_del(&entry->next); + lec_arp_put(entry); + } + } + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) { + if (entry->recv_vcc == vcc) { + lec_arp_clear_vccs(entry); + del_timer(&entry->timer); + hlist_del(&entry->next); + lec_arp_put(entry); + } + } + + hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + if (entry->recv_vcc == vcc) { + lec_arp_clear_vccs(entry); + /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ + hlist_del(&entry->next); + lec_arp_put(entry); + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); dump_arp_table(priv); @@ -2517,57 +2483,42 @@ static void lec_arp_check_empties(struct lec_priv *priv, - struct atm_vcc *vcc, struct sk_buff *skb) + struct atm_vcc *vcc, struct sk_buff *skb) { - unsigned long flags; - struct lec_arp_table *entry, *prev; - struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; - unsigned char *src; + unsigned long flags; + struct hlist_node *node, *next; + struct lec_arp_table *entry, *tmp; + struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; + unsigned char *src; #ifdef CONFIG_TR - struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data; + struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data; - if (priv->is_trdev) src = tr_hdr->h_source; - else + if (priv->is_trdev) + src = tr_hdr->h_source; + else #endif - src = hdr->h_source; + src = hdr->h_source; spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = priv->lec_arp_empty_ones; - if (vcc == entry->vcc) { - del_timer(&entry->timer); - memcpy(entry->mac_addr, src, ETH_ALEN); - entry->status = ESI_FORWARD_DIRECT; - entry->last_used = jiffies; - priv->lec_arp_empty_ones = entry->next; - /* We might have got an entry */ - if ((prev = lec_arp_find(priv,src))) { - lec_arp_remove(priv, prev); - kfree(prev); - } - lec_arp_add(priv, entry); - goto out; - } - prev = entry; - entry = entry->next; - while (entry && entry->vcc != vcc) { - prev= entry; - entry = entry->next; - } - if (!entry) { - DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); - goto out; - } - del_timer(&entry->timer); - memcpy(entry->mac_addr, src, ETH_ALEN); - entry->status = ESI_FORWARD_DIRECT; - entry->last_used = jiffies; - prev->next = entry->next; - if ((prev = lec_arp_find(priv, src))) { - lec_arp_remove(priv, prev); - kfree(prev); - } - lec_arp_add(priv, entry); + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + if (vcc == entry->vcc) { + del_timer(&entry->timer); + memcpy(entry->mac_addr, src, ETH_ALEN); + entry->status = ESI_FORWARD_DIRECT; + entry->last_used = jiffies; + /* We might have got an entry */ + if ((tmp = lec_arp_find(priv, src))) { + lec_arp_remove(priv, tmp); + lec_arp_put(tmp); + } + hlist_del(&entry->next); + lec_arp_add(priv, entry); + goto out; + } + } + DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } + MODULE_LICENSE("GPL"); diff -urN oldtree/net/atm/lec.h newtree/net/atm/lec.h --- oldtree/net/atm/lec.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/net/atm/lec.h 2006-09-30 05:21:44.000000000 -0400 @@ -1,14 +1,13 @@ /* - * * Lan Emulation client header file * - * Marko Kiiskila mkiiskila@yahoo.com - * + * Marko Kiiskila */ #ifndef _LEC_H_ #define _LEC_H_ +#include #include #include #include @@ -16,18 +15,18 @@ #define LEC_HEADER_LEN 16 struct lecdatahdr_8023 { - unsigned short le_header; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - unsigned short h_type; + unsigned short le_header; + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_type; }; struct lecdatahdr_8025 { - unsigned short le_header; - unsigned char ac_pad; - unsigned char fc; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + unsigned short le_header; + unsigned char ac_pad; + unsigned char fc; + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; }; #define LEC_MINIMUM_8023_SIZE 62 @@ -44,17 +43,18 @@ * */ struct lane2_ops { - int (*resolve)(struct net_device *dev, u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs); - int (*associate_req)(struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); - void (*associate_indicator)(struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs); + int (*resolve) (struct net_device *dev, u8 *dst_mac, int force, + u8 **tlvs, u32 *sizeoftlvs); + int (*associate_req) (struct net_device *dev, u8 *lan_dst, + u8 *tlvs, u32 sizeoftlvs); + void (*associate_indicator) (struct net_device *dev, u8 *mac_addr, + u8 *tlvs, u32 sizeoftlvs); }; /* * ATM LAN Emulation supports both LLC & Dix Ethernet EtherType * frames. + * * 1. Dix Ethernet EtherType frames encoded by placing EtherType * field in h_type field. Data follows immediatelly after header. * 2. LLC Data frames whose total length, including LLC field and data, @@ -70,72 +70,88 @@ #define LEC_ARP_TABLE_SIZE 16 struct lec_priv { - struct net_device_stats stats; - unsigned short lecid; /* Lecid of this client */ - struct lec_arp_table *lec_arp_empty_ones; - /* Used for storing VCC's that don't have a MAC address attached yet */ - struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE]; - /* Actual LE ARP table */ - struct lec_arp_table *lec_no_forward; - /* Used for storing VCC's (and forward packets from) which are to - age out by not using them to forward packets. - This is because to some LE clients there will be 2 VCCs. Only - one of them gets used. */ - struct lec_arp_table *mcast_fwds; - /* With LANEv2 it is possible that BUS (or a special multicast server) - establishes multiple Multicast Forward VCCs to us. This list - collects all those VCCs. LANEv1 client has only one item in this - list. These entries are not aged out. */ - spinlock_t lec_arp_lock; - struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ - struct atm_vcc *lecd; - struct timer_list lec_arp_timer; - /* C10 */ - unsigned int maximum_unknown_frame_count; -/* Within the period of time defined by this variable, the client will send - no more than C10 frames to BUS for a given unicast destination. (C11) */ - unsigned long max_unknown_frame_time; -/* If no traffic has been sent in this vcc for this period of time, - vcc will be torn down (C12)*/ - unsigned long vcc_timeout_period; -/* An LE Client MUST not retry an LE_ARP_REQUEST for a - given frame's LAN Destination more than maximum retry count times, - after the first LEC_ARP_REQUEST (C13)*/ - unsigned short max_retry_count; -/* Max time the client will maintain an entry in its arp cache in - absence of a verification of that relationship (C17)*/ - unsigned long aging_time; -/* Max time the client will maintain an entry in cache when - topology change flag is true (C18) */ - unsigned long forward_delay_time; -/* Topology change flag (C19)*/ - int topology_change; -/* Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE - cycle to take (C20)*/ - unsigned long arp_response_time; -/* Time limit ot wait to receive an LE_FLUSH_RESPONSE after the - LE_FLUSH_REQUEST has been sent before taking recover action. (C21)*/ - unsigned long flush_timeout; -/* The time since sending a frame to the bus after which the - LE Client may assume that the frame has been either discarded or - delivered to the recipient (C22) */ - unsigned long path_switching_delay; - - u8 *tlvs; /* LANE2: TLVs are new */ - u32 sizeoftlvs; /* The size of the tlv array in bytes */ - int lane_version; /* LANE2 */ - int itfnum; /* e.g. 2 for lec2, 5 for lec5 */ - struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */ - int is_proxy; /* bridge between ATM and Ethernet */ - int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */ + struct net_device_stats stats; + unsigned short lecid; /* Lecid of this client */ + struct hlist_head lec_arp_empty_ones; + /* Used for storing VCC's that don't have a MAC address attached yet */ + struct hlist_head lec_arp_tables[LEC_ARP_TABLE_SIZE]; + /* Actual LE ARP table */ + struct hlist_head lec_no_forward; + /* + * Used for storing VCC's (and forward packets from) which are to + * age out by not using them to forward packets. + * This is because to some LE clients there will be 2 VCCs. Only + * one of them gets used. + */ + struct hlist_head mcast_fwds; + /* + * With LANEv2 it is possible that BUS (or a special multicast server) + * establishes multiple Multicast Forward VCCs to us. This list + * collects all those VCCs. LANEv1 client has only one item in this + * list. These entries are not aged out. + */ + spinlock_t lec_arp_lock; + struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ + struct atm_vcc *lecd; + struct work_struct lec_arp_work; /* C10 */ + unsigned int maximum_unknown_frame_count; + /* + * Within the period of time defined by this variable, the client will send + * no more than C10 frames to BUS for a given unicast destination. (C11) + */ + unsigned long max_unknown_frame_time; + /* + * If no traffic has been sent in this vcc for this period of time, + * vcc will be torn down (C12) + */ + unsigned long vcc_timeout_period; + /* + * An LE Client MUST not retry an LE_ARP_REQUEST for a + * given frame's LAN Destination more than maximum retry count times, + * after the first LEC_ARP_REQUEST (C13) + */ + unsigned short max_retry_count; + /* + * Max time the client will maintain an entry in its arp cache in + * absence of a verification of that relationship (C17) + */ + unsigned long aging_time; + /* + * Max time the client will maintain an entry in cache when + * topology change flag is true (C18) + */ + unsigned long forward_delay_time; /* Topology change flag (C19) */ + int topology_change; + /* + * Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE + * cycle to take (C20) + */ + unsigned long arp_response_time; + /* + * Time limit ot wait to receive an LE_FLUSH_RESPONSE after the + * LE_FLUSH_REQUEST has been sent before taking recover action. (C21) + */ + unsigned long flush_timeout; + /* The time since sending a frame to the bus after which the + * LE Client may assume that the frame has been either discarded or + * delivered to the recipient (C22) + */ + unsigned long path_switching_delay; + + u8 *tlvs; /* LANE2: TLVs are new */ + u32 sizeoftlvs; /* The size of the tlv array in bytes */ + int lane_version; /* LANE2 */ + int itfnum; /* e.g. 2 for lec2, 5 for lec5 */ + struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */ + int is_proxy; /* bridge between ATM and Ethernet */ + int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */ }; struct lec_vcc_priv { - void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); + void (*old_pop) (struct atm_vcc *vcc, struct sk_buff *skb); int xoff; }; #define LEC_VCC_PRIV(vcc) ((struct lec_vcc_priv *)((vcc)->user_back)) -#endif /* _LEC_H_ */ - +#endif /* _LEC_H_ */ diff -urN oldtree/net/atm/lec_arpc.h newtree/net/atm/lec_arpc.h --- oldtree/net/atm/lec_arpc.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/net/atm/lec_arpc.h 2006-09-30 05:21:44.000000000 -0400 @@ -1,92 +1,96 @@ /* * Lec arp cache - * Marko Kiiskila mkiiskila@yahoo.com * + * Marko Kiiskila */ -#ifndef _LEC_ARP_H -#define _LEC_ARP_H +#ifndef _LEC_ARP_H_ +#define _LEC_ARP_H_ #include #include #include #include struct lec_arp_table { - struct lec_arp_table *next; /* Linked entry list */ - unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */ - unsigned char mac_addr[ETH_ALEN]; /* Mac address */ - int is_rdesc; /* Mac address is a route descriptor */ - struct atm_vcc *vcc; /* Vcc this entry is attached */ - struct atm_vcc *recv_vcc; /* Vcc we receive data from */ - void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); - /* Push that leads to daemon */ - void (*old_recv_push)(struct atm_vcc *vcc, struct sk_buff *skb); - /* Push that leads to daemon */ - void (*old_close)(struct atm_vcc *vcc); - /* We want to see when this - * vcc gets closed */ - unsigned long last_used; /* For expiry */ - unsigned long timestamp; /* Used for various timestamping - * things: - * 1. FLUSH started - * (status=ESI_FLUSH_PENDING) - * 2. Counting to - * max_unknown_frame_time - * (status=ESI_ARP_PENDING|| - * status=ESI_VC_PENDING) - */ - unsigned char no_tries; /* No of times arp retry has been - tried */ - unsigned char status; /* Status of this entry */ - unsigned short flags; /* Flags for this entry */ - unsigned short packets_flooded; /* Data packets flooded */ - unsigned long flush_tran_id; /* Transaction id in flush protocol */ - struct timer_list timer; /* Arping timer */ - struct lec_priv *priv; /* Pointer back */ - - u8 *tlvs; /* LANE2: Each MAC address can have TLVs */ - u32 sizeoftlvs; /* associated with it. sizeoftlvs tells the */ - /* the length of the tlvs array */ - struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ + struct hlist_node next; /* Linked entry list */ + unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */ + unsigned char mac_addr[ETH_ALEN]; /* Mac address */ + int is_rdesc; /* Mac address is a route descriptor */ + struct atm_vcc *vcc; /* Vcc this entry is attached */ + struct atm_vcc *recv_vcc; /* Vcc we receive data from */ + + void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb); + /* Push that leads to daemon */ + + void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb); + /* Push that leads to daemon */ + + unsigned long last_used; /* For expiry */ + unsigned long timestamp; /* Used for various timestamping things: + * 1. FLUSH started + * (status=ESI_FLUSH_PENDING) + * 2. Counting to + * max_unknown_frame_time + * (status=ESI_ARP_PENDING|| + * status=ESI_VC_PENDING) + */ + unsigned char no_tries; /* No of times arp retry has been tried */ + unsigned char status; /* Status of this entry */ + unsigned short flags; /* Flags for this entry */ + unsigned short packets_flooded; /* Data packets flooded */ + unsigned long flush_tran_id; /* Transaction id in flush protocol */ + struct timer_list timer; /* Arping timer */ + struct lec_priv *priv; /* Pointer back */ + u8 *tlvs; + u32 sizeoftlvs; /* + * LANE2: Each MAC address can have TLVs + * associated with it. sizeoftlvs tells the + * the length of the tlvs array + */ + struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ + atomic_t usage; /* usage count */ }; -struct tlv { /* LANE2: Template tlv struct for accessing */ - /* the tlvs in the lec_arp_table->tlvs array*/ - u32 type; - u8 length; - u8 value[255]; +/* + * LANE2: Template tlv struct for accessing + * the tlvs in the lec_arp_table->tlvs array + */ +struct tlv { + u32 type; + u8 length; + u8 value[255]; }; /* Status fields */ -#define ESI_UNKNOWN 0 /* - * Next packet sent to this mac address - * causes ARP-request to be sent - */ -#define ESI_ARP_PENDING 1 /* - * There is no ATM address associated with this - * 48-bit address. The LE-ARP protocol is in - * progress. - */ -#define ESI_VC_PENDING 2 /* - * There is a valid ATM address associated with - * this 48-bit address but there is no VC set - * up to that ATM address. The signaling - * protocol is in process. - */ -#define ESI_FLUSH_PENDING 4 /* - * The LEC has been notified of the FLUSH_START - * status and it is assumed that the flush - * protocol is in process. - */ -#define ESI_FORWARD_DIRECT 5 /* - * Either the Path Switching Delay (C22) has - * elapsed or the LEC has notified the Mapping - * that the flush protocol has completed. In - * either case, it is safe to forward packets - * to this address via the data direct VC. - */ +#define ESI_UNKNOWN 0 /* + * Next packet sent to this mac address + * causes ARP-request to be sent + */ +#define ESI_ARP_PENDING 1 /* + * There is no ATM address associated with this + * 48-bit address. The LE-ARP protocol is in + * progress. + */ +#define ESI_VC_PENDING 2 /* + * There is a valid ATM address associated with + * this 48-bit address but there is no VC set + * up to that ATM address. The signaling + * protocol is in process. + */ +#define ESI_FLUSH_PENDING 4 /* + * The LEC has been notified of the FLUSH_START + * status and it is assumed that the flush + * protocol is in process. + */ +#define ESI_FORWARD_DIRECT 5 /* + * Either the Path Switching Delay (C22) has + * elapsed or the LEC has notified the Mapping + * that the flush protocol has completed. In + * either case, it is safe to forward packets + * to this address via the data direct VC. + */ /* Flag values */ #define LEC_REMOTE_FLAG 0x0001 #define LEC_PERMANENT_FLAG 0x0002 -#endif +#endif /* _LEC_ARP_H_ */ diff -urN oldtree/net/ipv4/cipso_ipv4.c newtree/net/ipv4/cipso_ipv4.c --- oldtree/net/ipv4/cipso_ipv4.c 2006-09-29 14:03:22.000000000 -0400 +++ newtree/net/ipv4/cipso_ipv4.c 2006-09-30 05:21:44.000000000 -0400 @@ -485,7 +485,7 @@ * */ int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)) { struct cipso_v4_doi *doi_def; @@ -506,7 +506,7 @@ list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) if (dom_iter->valid) netlbl_domhsh_remove(dom_iter->domain, - audit_secid); + audit_info); cipso_v4_cache_invalidate(); rcu_read_unlock(); diff -urN oldtree/net/netlabel/netlabel_cipso_v4.c newtree/net/netlabel/netlabel_cipso_v4.c --- oldtree/net/netlabel/netlabel_cipso_v4.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/netlabel/netlabel_cipso_v4.c 2006-09-30 05:21:44.000000000 -0400 @@ -384,11 +384,15 @@ u32 doi; const char *type_str = "(unknown)"; struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_CIPSOV4_A_DOI] || !info->attrs[NLBL_CIPSOV4_A_MTYPE]) return -EINVAL; + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); switch (type) { case CIPSO_V4_MAP_STD: @@ -401,13 +405,14 @@ break; } - if (ret_val == 0) { - doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, - NETLINK_CB(skb).sid); - audit_log_format(audit_buf, " doi=%u type=%s", doi, type_str); - audit_log_end(audit_buf); - } + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi, + type_str, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); return ret_val; } @@ -668,20 +673,25 @@ int ret_val = -EINVAL; u32 doi = 0; struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; - if (info->attrs[NLBL_CIPSOV4_A_DOI]) { - doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - ret_val = cipso_v4_doi_remove(doi, - NETLINK_CB(skb).sid, - netlbl_cipsov4_doi_free); - } + if (!info->attrs[NLBL_CIPSOV4_A_DOI]) + return -EINVAL; - if (ret_val == 0) { - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, - NETLINK_CB(skb).sid); - audit_log_format(audit_buf, " doi=%u", doi); - audit_log_end(audit_buf); - } + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = cipso_v4_doi_remove(doi, + &audit_info, + netlbl_cipsov4_doi_free); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u res=%u", + doi, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); return ret_val; } diff -urN oldtree/net/netlabel/netlabel_domainhash.c newtree/net/netlabel/netlabel_domainhash.c --- oldtree/net/netlabel/netlabel_domainhash.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/netlabel/netlabel_domainhash.c 2006-09-30 05:21:44.000000000 -0400 @@ -188,7 +188,7 @@ /** * netlbl_domhsh_add - Adds a entry to the domain hash table * @entry: the entry to add - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Adds a new entry to the domain hash table and handles any updates to the @@ -196,7 +196,8 @@ * negative on failure. * */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { int ret_val; u32 bkt; @@ -241,26 +242,26 @@ spin_unlock(&netlbl_domhsh_def_lock); } else ret_val = -EINVAL; - if (ret_val == 0) { - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, - audit_secid); - audit_log_format(audit_buf, " domain=%s", audit_domain); - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - audit_log_format(audit_buf, " protocol=unlbl"); - break; - case NETLBL_NLTYPE_CIPSOV4: - audit_log_format(audit_buf, - " protocol=cipsov4 doi=%u", - entry->type_def.cipsov4->doi); - break; - } - audit_log_end(audit_buf); + + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); + audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain); + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + entry->type_def.cipsov4->doi); + break; } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + rcu_read_unlock(); if (ret_val != 0) { @@ -279,7 +280,7 @@ /** * netlbl_domhsh_add_default - Adds the default entry to the domain hash table * @entry: the entry to add - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Adds a new default entry to the domain hash table and handles any updates @@ -287,15 +288,16 @@ * negative on failure. * */ -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid) +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { - return netlbl_domhsh_add(entry, audit_secid); + return netlbl_domhsh_add(entry, audit_info); } /** * netlbl_domhsh_remove - Removes an entry from the domain hash table * @domain: the domain to remove - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes an entry from the domain hash table and handles any updates to the @@ -303,7 +305,7 @@ * negative on failure. * */ -int netlbl_domhsh_remove(const char *domain, u32 audit_secid) +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) { int ret_val = -ENOENT; struct netlbl_dom_map *entry; @@ -345,18 +347,20 @@ ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_def_lock); } - if (ret_val == 0) { - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, - audit_secid); - audit_log_format(audit_buf, " domain=%s", audit_domain); - audit_log_end(audit_buf); + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); + audit_log_format(audit_buf, + " nlbl_domain=%s res=%u", + audit_domain, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + + if (ret_val == 0) call_rcu(&entry->rcu, netlbl_domhsh_free_entry); - } remove_return: rcu_read_unlock(); @@ -365,7 +369,7 @@ /** * netlbl_domhsh_remove_default - Removes the default entry from the table - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes/resets the default entry for the domain hash table and handles any @@ -373,9 +377,9 @@ * success, non-zero on failure. * */ -int netlbl_domhsh_remove_default(u32 audit_secid) +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info) { - return netlbl_domhsh_remove(NULL, audit_secid); + return netlbl_domhsh_remove(NULL, audit_info); } /** diff -urN oldtree/net/netlabel/netlabel_domainhash.h newtree/net/netlabel/netlabel_domainhash.h --- oldtree/net/netlabel/netlabel_domainhash.h 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/netlabel/netlabel_domainhash.h 2006-09-30 05:21:44.000000000 -0400 @@ -57,9 +57,11 @@ int netlbl_domhsh_init(u32 size); /* Manipulate the domain hash table */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid); -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid); -int netlbl_domhsh_remove_default(u32 audit_secid); +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, diff -urN oldtree/net/netlabel/netlabel_mgmt.c newtree/net/netlabel/netlabel_mgmt.c --- oldtree/net/netlabel/netlabel_mgmt.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/netlabel/netlabel_mgmt.c 2006-09-30 05:21:44.000000000 -0400 @@ -87,11 +87,14 @@ struct netlbl_dom_map *entry = NULL; size_t tmp_size; u32 tmp_val; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_DOMAIN] || !info->attrs[NLBL_MGMT_A_PROTOCOL]) goto add_failure; + netlbl_netlink_auditinfo(skb, &audit_info); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; @@ -108,7 +111,7 @@ switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -125,7 +128,7 @@ rcu_read_unlock(); goto add_failure; } - ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add(entry, &audit_info); rcu_read_unlock(); break; default: @@ -156,12 +159,15 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) { char *domain; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_DOMAIN]) return -EINVAL; + netlbl_netlink_auditinfo(skb, &audit_info); + domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); - return netlbl_domhsh_remove(domain, NETLINK_CB(skb).sid); + return netlbl_domhsh_remove(domain, &audit_info); } /** @@ -264,10 +270,13 @@ int ret_val = -EINVAL; struct netlbl_dom_map *entry = NULL; u32 tmp_val; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) goto adddef_failure; + netlbl_netlink_auditinfo(skb, &audit_info); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; @@ -277,8 +286,7 @@ switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry, - NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -295,8 +303,7 @@ rcu_read_unlock(); goto adddef_failure; } - ret_val = netlbl_domhsh_add_default(entry, - NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); rcu_read_unlock(); break; default: @@ -324,7 +331,11 @@ */ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { - return netlbl_domhsh_remove_default(NETLINK_CB(skb).sid); + struct netlbl_audit audit_info; + + netlbl_netlink_auditinfo(skb, &audit_info); + + return netlbl_domhsh_remove_default(&audit_info); } /** diff -urN oldtree/net/netlabel/netlabel_unlabeled.c newtree/net/netlabel/netlabel_unlabeled.c --- oldtree/net/netlabel/netlabel_unlabeled.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/netlabel/netlabel_unlabeled.c 2006-09-30 05:21:44.000000000 -0400 @@ -70,18 +70,25 @@ /** * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag * @value: desired value - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Set the value of the unlabeled accept flag to @value. * */ -static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid) +static void netlbl_unlabel_acceptflg_set(u8 value, + struct netlbl_audit *audit_info) { + struct audit_buffer *audit_buf; + u8 old_val; + + old_val = atomic_read(&netlabel_unlabel_accept_flg); atomic_set(&netlabel_unlabel_accept_flg, value); - netlbl_audit_nomsg((value ? - AUDIT_MAC_UNLBL_ACCEPT : AUDIT_MAC_UNLBL_DENY), - audit_secid); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, + audit_info); + audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val); + audit_log_end(audit_buf); } /* @@ -101,12 +108,13 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { u8 value; + struct netlbl_audit audit_info; if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { - netlbl_unlabel_acceptflg_set(value, - NETLINK_CB(skb).sid); + netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_unlabel_acceptflg_set(value, &audit_info); return 0; } } @@ -250,19 +258,23 @@ { int ret_val; struct netlbl_dom_map *entry; - u32 secid; + struct netlbl_audit audit_info; - security_task_getsecid(current, &secid); + /* Only the kernel is allowed to call this function and the only time + * it is called is at bootup before the audit subsystem is reporting + * messages so don't worry to much about these values. */ + security_task_getsecid(current, &audit_info.secid); + audit_info.loginuid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; entry->type = NETLBL_NLTYPE_UNLABELED; - ret_val = netlbl_domhsh_add_default(entry, secid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; - netlbl_unlabel_acceptflg_set(1, secid); + netlbl_unlabel_acceptflg_set(1, &audit_info); return 0; } diff -urN oldtree/net/netlabel/netlabel_user.c newtree/net/netlabel/netlabel_user.c --- oldtree/net/netlabel/netlabel_user.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/netlabel/netlabel_user.c 2006-09-30 05:21:44.000000000 -0400 @@ -85,7 +85,7 @@ /** * netlbl_audit_start_common - Start an audit message * @type: audit message type - * @secid: LSM context ID + * @audit_info: NetLabel audit information * * Description: * Start an audit message using the type specified in @type and fill the audit @@ -93,14 +93,11 @@ * a pointer to the audit buffer on success, NULL on failure. * */ -struct audit_buffer *netlbl_audit_start_common(int type, u32 secid) +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info) { struct audit_context *audit_ctx = current->audit_context; struct audit_buffer *audit_buf; - uid_t audit_loginuid; - const char *audit_tty; - char audit_comm[sizeof(current->comm)]; - struct vm_area_struct *vma; char *secctx; u32 secctx_len; @@ -108,60 +105,13 @@ if (audit_buf == NULL) return NULL; - audit_loginuid = audit_get_loginuid(audit_ctx); - if (current->signal && - current->signal->tty && - current->signal->tty->name) - audit_tty = current->signal->tty->name; - else - audit_tty = "(none)"; - get_task_comm(audit_comm, current); + audit_log_format(audit_buf, "netlabel: auid=%u", audit_info->loginuid); - audit_log_format(audit_buf, - "netlabel: auid=%u uid=%u tty=%s pid=%d", - audit_loginuid, - current->uid, - audit_tty, - current->pid); - audit_log_format(audit_buf, " comm="); - audit_log_untrustedstring(audit_buf, audit_comm); - if (current->mm) { - down_read(¤t->mm->mmap_sem); - vma = current->mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && - vma->vm_file) { - audit_log_d_path(audit_buf, - " exe=", - vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); - break; - } - vma = vma->vm_next; - } - up_read(¤t->mm->mmap_sem); - } - - if (secid != 0 && - security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) + if (audit_info->secid != 0 && + security_secid_to_secctx(audit_info->secid, + &secctx, + &secctx_len) == 0) audit_log_format(audit_buf, " subj=%s", secctx); return audit_buf; } - -/** - * netlbl_audit_nomsg - Send an audit message without additional text - * @type: audit message type - * @secid: LSM context ID - * - * Description: - * Send an audit message with only the common NetLabel audit fields. - * - */ -void netlbl_audit_nomsg(int type, u32 secid) -{ - struct audit_buffer *audit_buf; - - audit_buf = netlbl_audit_start_common(type, secid); - audit_log_end(audit_buf); -} diff -urN oldtree/net/netlabel/netlabel_user.h newtree/net/netlabel/netlabel_user.h --- oldtree/net/netlabel/netlabel_user.h 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/netlabel/netlabel_user.h 2006-09-30 05:21:44.000000000 -0400 @@ -72,13 +72,25 @@ NETLBL_PROTO_VERSION); } +/** + * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg + * @skb: the packet + * @audit_info: NetLabel audit information + */ +static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, + struct netlbl_audit *audit_info) +{ + audit_info->secid = NETLINK_CB(skb).sid; + audit_info->loginuid = NETLINK_CB(skb).loginuid; +} + /* NetLabel NETLINK I/O functions */ int netlbl_netlink_init(void); /* NetLabel Audit Functions */ -struct audit_buffer *netlbl_audit_start_common(int type, u32 secid); -void netlbl_audit_nomsg(int type, u32 secid); +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info); #endif diff -urN oldtree/net/sctp/input.c newtree/net/sctp/input.c --- oldtree/net/sctp/input.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/sctp/input.c 2006-09-30 05:21:44.000000000 -0400 @@ -218,12 +218,6 @@ } } - /* SCTP seems to always need a timestamp right now (FIXME) */ - if (skb->tstamp.off_sec == 0) { - __net_timestamp(skb); - sock_enable_timestamp(sk); - } - if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; nf_reset(skb); @@ -388,7 +382,7 @@ * pmtu discovery on this transport. */ t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - t->param_flags = (t->param_flags & ~SPP_HB) | + t->param_flags = (t->param_flags & ~SPP_PMTUD) | SPP_PMTUD_DISABLE; } else { t->pathmtu = pmtu; diff -urN oldtree/net/sctp/output.c newtree/net/sctp/output.c --- oldtree/net/sctp/output.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/net/sctp/output.c 2006-09-30 05:21:44.000000000 -0400 @@ -633,7 +633,7 @@ * data will fit or delay in hopes of bundling a full * sized packet. */ - if (len < asoc->pathmtu - packet->overhead) { + if (len < asoc->frag_point) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } @@ -645,7 +645,13 @@ /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; - /* Update our view of the receiver's rwnd. */ + /* Update our view of the receiver's rwnd. Include sk_buff overhead + * while updating peer.rwnd so that it reduces the chances of a + * receiver running out of receive buffer space even when receive + * window is still open. This can happen when a sender is sending + * sending small messages. + */ + datasize += sizeof(struct sk_buff); if (datasize < rwnd) rwnd -= datasize; else diff -urN oldtree/net/sctp/outqueue.c newtree/net/sctp/outqueue.c --- oldtree/net/sctp/outqueue.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/sctp/outqueue.c 2006-09-30 05:21:44.000000000 -0400 @@ -416,7 +416,8 @@ * (Section 7.2.4)), add the data size of those * chunks to the rwnd. */ - q->asoc->peer.rwnd += sctp_data_size(chunk); + q->asoc->peer.rwnd += (sctp_data_size(chunk) + + sizeof(struct sk_buff)); q->outstanding_bytes -= sctp_data_size(chunk); transport->flight_size -= sctp_data_size(chunk); diff -urN oldtree/net/sctp/sm_make_chunk.c newtree/net/sctp/sm_make_chunk.c --- oldtree/net/sctp/sm_make_chunk.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/sctp/sm_make_chunk.c 2006-09-30 05:21:44.000000000 -0400 @@ -1447,8 +1447,16 @@ /* Check to see if the cookie is stale. If there is already * an association, there is no need to check cookie's expiration * for init collision case of lost COOKIE ACK. + * If skb has been timestamped, then use the stamp, otherwise + * use current time. This introduces a small possibility that + * that a cookie may be considered expired, but his would only slow + * down the new association establishment instead of every packet. */ - skb_get_timestamp(skb, &tv); + if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) + skb_get_timestamp(skb, &tv); + else + do_gettimeofday(&tv); + if (!asoc && tv_lt(bear_cookie->expiration, tv)) { __u16 len; /* diff -urN oldtree/net/sctp/socket.c newtree/net/sctp/socket.c --- oldtree/net/sctp/socket.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/sctp/socket.c 2006-09-30 05:21:44.000000000 -0400 @@ -3084,8 +3084,8 @@ */ sp->disable_fragments = 0; - /* Turn on/off any Nagle-like algorithm. */ - sp->nodelay = 1; + /* Enable Nagle algorithm by default. */ + sp->nodelay = 0; /* Enable by default. */ sp->v4mapped = 1; diff -urN oldtree/security/selinux/hooks.c newtree/security/selinux/hooks.c --- oldtree/security/selinux/hooks.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/security/selinux/hooks.c 2006-09-30 05:21:44.000000000 -0400 @@ -3619,7 +3619,9 @@ struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; struct sk_security_struct *sksec = sk->sk_security; - isec->sid = sksec->sid; + if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 || + sk->sk_family == PF_UNIX) + isec->sid = sksec->sid; selinux_netlbl_sock_graft(sk, parent); }