/******************************************************************************* Intel PRO/1000 Linux driver Copyright(c) 1999 - 2008 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, version 2, as published by the Free Software Foundation. This program is distributed in the hope it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in the file called "COPYING". Contact Information: Linux NICS e1000-devel Mailing List Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 *******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000.h" #define DRV_VERSION "0.3.3.3-k2" char e1000e_driver_name[] = "e1000e"; const char e1000e_driver_version[] = DRV_VERSION; static const struct e1000_info *e1000_info_tbl[] = { [board_82571] = &e1000_82571_info, [board_82572] = &e1000_82572_info, [board_82573] = &e1000_82573_info, [board_80003es2lan] = &e1000_es2_info, [board_ich8lan] = &e1000_ich8_info, [board_ich9lan] = &e1000_ich9_info, }; #ifdef DEBUG /** * e1000_get_hw_dev_name - return device name string * used by hardware layer to print debugging information **/ char *e1000e_get_hw_dev_name(struct e1000_hw *hw) { return hw->adapter->netdev->name; } #endif /** * e1000_desc_unused - calculate if we have unused descriptors **/ static int e1000_desc_unused(struct e1000_ring *ring) { if (ring->next_to_clean > ring->next_to_use) return ring->next_to_clean - ring->next_to_use - 1; return ring->count + ring->next_to_clean - ring->next_to_use - 1; } /** * e1000_receive_skb - helper function to handle Rx indications * @adapter: board private structure * @status: descriptor status field as written by hardware * @vlan: descriptor vlan field as written by hardware (no le/be conversion) * @skb: pointer to sk_buff to be indicated to stack **/ static void e1000_receive_skb(struct e1000_adapter *adapter, struct net_device *netdev, struct sk_buff *skb, u8 status, __le16 vlan) { skb->protocol = eth_type_trans(skb, netdev); if (adapter->vlgrp && (status & E1000_RXD_STAT_VP)) vlan_hwaccel_receive_skb(skb, adapter->vlgrp, le16_to_cpu(vlan)); else netif_receive_skb(skb); netdev->last_rx = jiffies; } /** * e1000_rx_checksum - Receive Checksum Offload for 82543 * @adapter: board private structure * @status_err: receive descriptor status and error fields * @csum: receive descriptor csum field * @sk_buff: socket buffer with received data **/ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, u32 csum, struct sk_buff *skb) { u16 status = (u16)status_err; u8 errors = (u8)(status_err >> 24); skb->ip_summed = CHECKSUM_NONE; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) return; /* TCP/UDP checksum error bit is set */ if (errors & E1000_RXD_ERR_TCPE) { /* let the stack verify checksum errors */ adapter->hw_csum_err++; return; } /* TCP/UDP Checksum has not been calculated */ if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) return; /* It must be a TCP or UDP packet with a valid checksum */ if (status & E1000_RXD_STAT_TCPCS) { /* TCP checksum is good */ skb->ip_summed = CHECKSUM_UNNECESSARY; } else { /* * IP fragment with UDP payload * Hardware complements the payload checksum, so we undo it * and then put the value in host order for further stack use. */ __sum16 sum = (__force __sum16)htons(csum); skb->csum = csum_unfold(~sum); skb->ip_summed = CHECKSUM_COMPLETE; } adapter->hw_csum_good++; } /** * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended * @adapter: address of board private structure **/ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, int cleaned_count) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_rx_desc *rx_desc; struct e1000_buffer *buffer_info; struct sk_buff *skb; unsigned int i; unsigned int bufsz = adapter->rx_buffer_len + NET_IP_ALIGN; i = rx_ring->next_to_use; buffer_info = &rx_ring->buffer_info[i]; while (cleaned_count--) { skb = buffer_info->skb; if (skb) { skb_trim(skb, 0); goto map_skb; } skb = netdev_alloc_skb(netdev, bufsz); if (!skb) { /* Better luck next round */ adapter->alloc_rx_buff_failed++; break; } /* * Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after * the 14 byte MAC header is removed */ skb_reserve(skb, NET_IP_ALIGN); buffer_info->skb = skb; map_skb: buffer_info->dma = pci_map_single(pdev, skb->data, adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); if (pci_dma_mapping_error(pdev, buffer_info->dma)) { dev_err(&pdev->dev, "RX DMA map failed\n"); adapter->rx_dma_failed++; break; } rx_desc = E1000_RX_DESC(*rx_ring, i); rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); i++; if (i == rx_ring->count) i = 0; buffer_info = &rx_ring->buffer_info[i]; } if (rx_ring->next_to_use != i) { rx_ring->next_to_use = i; if (i-- == 0) i = (rx_ring->count - 1); /* * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); writel(i, adapter->hw.hw_addr + rx_ring->tail); } } /** * e1000_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @adapter: address of board private structure **/ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, int cleaned_count) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union e1000_rx_desc_packet_split *rx_desc; struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_buffer *buffer_info; struct e1000_ps_page *ps_page; struct sk_buff *skb; unsigned int i, j; i = rx_ring->next_to_use; buffer_info = &rx_ring->buffer_info[i]; while (cleaned_count--) { rx_desc = E1000_RX_DESC_PS(*rx_ring, i); for (j = 0; j < PS_PAGE_BUFFERS; j++) { ps_page = &buffer_info->ps_pages[j]; if (j >= adapter->rx_ps_pages) { /* all unused desc entries get hw null ptr */ rx_desc->read.buffer_addr[j+1] = ~cpu_to_le64(0); continue; } if (!ps_page->page) { ps_page->page = alloc_page(GFP_ATOMIC); if (!ps_page->page) { adapter->alloc_rx_buff_failed++; goto no_buffers; } ps_page->dma = pci_map_page(pdev, ps_page->page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); if (pci_dma_mapping_error(pdev, ps_page->dma)) { dev_err(&adapter->pdev->dev, "RX DMA page map failed\n"); adapter->rx_dma_failed++; goto no_buffers; } } /* * Refresh the desc even if buffer_addrs * didn't change because each write-back * erases this info. */ rx_desc->read.buffer_addr[j+1] = cpu_to_le64(ps_page->dma); } skb = netdev_alloc_skb(netdev, adapter->rx_ps_bsize0 + NET_IP_ALIGN); if (!skb) { adapter->alloc_rx_buff_failed++; break; } /* * Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after * the 14 byte MAC header is removed */ skb_reserve(skb, NET_IP_ALIGN); buffer_info->skb = skb; buffer_info->dma = pci_map_single(pdev, skb->data, adapter->rx_ps_bsize0, PCI_DMA_FROMDEVICE); if (pci_dma_mapping_error(pdev, buffer_info->dma)) { dev_err(&pdev->dev, "RX DMA map failed\n"); adapter->rx_dma_failed++; /* cleanup skb */ dev_kfree_skb_any(skb); buffer_info->skb = NULL; break; } rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma); i++; if (i == rx_ring->count) i = 0; buffer_info = &rx_ring->buffer_info[i]; } no_buffers: if (rx_ring->next_to_use != i) { rx_ring->next_to_use = i; if (!(i--)) i = (rx_ring->count - 1); /* * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); /* * Hardware increments by 16 bytes, but packet split * descriptors are 32 bytes...so we increment tail * twice as much. */ writel(i<<1, adapter->hw.hw_addr + rx_ring->tail); } } /** * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers * @adapter: address of board private structure * @rx_ring: pointer to receive ring structure * @cleaned_count: number of buffers to allocate this pass **/ static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter, int cleaned_count) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; struct e1000_rx_desc *rx_desc; struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_buffer *buffer_info; struct sk_buff *skb; unsigned int i; unsigned int bufsz = 256 - 16 /* for skb_reserve */ - NET_IP_ALIGN; i = rx_ring->next_to_use; buffer_info = &rx_ring->buffer_info[i]; while (cleaned_count--) { skb = buffer_info->skb; if (skb) { skb_trim(skb, 0); goto check_page; } skb = netdev_alloc_skb(netdev, bufsz); if (unlikely(!skb)) { /* Better luck next round */ adapter->alloc_rx_buff_failed++; break; } /* Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after * the 14 byte MAC header is removed */ skb_reserve(skb, NET_IP_ALIGN); buffer_info->skb = skb; check_page: /* allocate a new page if necessary */ if (!buffer_info->page) { buffer_info->page = alloc_page(GFP_ATOMIC); if (unlikely(!buffer_info->page)) { adapter->alloc_rx_buff_failed++; break; } } if (!buffer_info->dma) buffer_info->dma = pci_map_page(pdev, buffer_info->page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); rx_desc = E1000_RX_DESC(*rx_ring, i); rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); if (unlikely(++i == rx_ring->count)) i = 0; buffer_info = &rx_ring->buffer_info[i]; } if (likely(rx_ring->next_to_use != i)) { rx_ring->next_to_use = i; if (unlikely(i-- == 0)) i = (rx_ring->count - 1); /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); writel(i, adapter->hw.hw_addr + rx_ring->tail); } } /** * e1000_clean_rx_irq - Send received data up the network stack; legacy * @adapter: board private structure * * the return value indicates whether actual cleaning was done, there * is no guarantee that everything was cleaned **/ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, int *work_done, int work_to_do) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_rx_desc *rx_desc, *next_rxd; struct e1000_buffer *buffer_info, *next_buffer; u32 length; unsigned int i; int cleaned_count = 0; bool cleaned = 0; unsigned int total_rx_bytes = 0, total_rx_packets = 0; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC(*rx_ring, i); buffer_info = &rx_ring->buffer_info[i]; while (rx_desc->status & E1000_RXD_STAT_DD) { struct sk_buff *skb; u8 status; if (*work_done >= work_to_do) break; (*work_done)++; status = rx_desc->status; skb = buffer_info->skb; buffer_info->skb = NULL; prefetch(skb->data - NET_IP_ALIGN); i++; if (i == rx_ring->count) i = 0; next_rxd = E1000_RX_DESC(*rx_ring, i); prefetch(next_rxd); next_buffer = &rx_ring->buffer_info[i]; cleaned = 1; cleaned_count++; pci_unmap_single(pdev, buffer_info->dma, adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); buffer_info->dma = 0; length = le16_to_cpu(rx_desc->length); /* !EOP means multiple descriptors were used to store a single * packet, also make sure the frame isn't just CRC only */ if (!(status & E1000_RXD_STAT_EOP) || (length <= 4)) { /* All receives must fit into a single buffer */ ndev_dbg(netdev, "%s: Receive packet consumed " "multiple buffers\n", netdev->name); /* recycle */ buffer_info->skb = skb; goto next_desc; } if (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) { /* recycle */ buffer_info->skb = skb; goto next_desc; } total_rx_bytes += length; total_rx_packets++; /* * code added for copybreak, this should improve * performance for small packets with large amounts * of reassembly being done in the stack */ if (length < copybreak) { struct sk_buff *new_skb = netdev_alloc_skb(netdev, length + NET_IP_ALIGN); if (new_skb) { skb_reserve(new_skb, NET_IP_ALIGN); memcpy(new_skb->data - NET_IP_ALIGN, skb->data - NET_IP_ALIGN, length + NET_IP_ALIGN); /* save the skb in buffer_info as good */ buffer_info->skb = skb; skb = new_skb; } /* else just continue with the old one */ } /* end copybreak code */ skb_put(skb, length); /* Receive Checksum Offload */ e1000_rx_checksum(adapter, (u32)(status) | ((u32)(rx_desc->errors) << 24), le16_to_cpu(rx_desc->csum), skb); e1000_receive_skb(adapter, netdev, skb,status,rx_desc->special); next_desc: rx_desc->status = 0; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= E1000_RX_BUFFER_WRITE) { adapter->alloc_rx_buf(adapter, cleaned_count); cleaned_count = 0; } /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; } rx_ring->next_to_clean = i; cleaned_count = e1000_desc_unused(rx_ring); if (cleaned_count) adapter->alloc_rx_buf(adapter, cleaned_count); adapter->total_rx_bytes += total_rx_bytes; adapter->total_rx_packets += total_rx_packets; adapter->net_stats.rx_bytes += total_rx_bytes; adapter->net_stats.rx_packets += total_rx_packets; return cleaned; } static void e1000_put_txbuf(struct e1000_adapter *adapter, struct e1000_buffer *buffer_info) { if (buffer_info->dma) { pci_unmap_page(adapter->pdev, buffer_info->dma, buffer_info->length, PCI_DMA_TODEVICE); buffer_info->dma = 0; } if (buffer_info->skb) { dev_kfree_skb_any(buffer_info->skb); buffer_info->skb = NULL; } } static void e1000_print_tx_hang(struct e1000_adapter *adapter) { struct e1000_ring *tx_ring = adapter->tx_ring; unsigned int i = tx_ring->next_to_clean; unsigned int eop = tx_ring->buffer_info[i].next_to_watch; struct e1000_tx_desc *eop_desc = E1000_TX_DESC(*tx_ring, eop); struct net_device *netdev = adapter->netdev; /* detected Tx unit hang */ ndev_err(netdev, "Detected Tx Unit Hang:\n" " TDH <%x>\n" " TDT <%x>\n" " next_to_use <%x>\n" " next_to_clean <%x>\n" "buffer_info[next_to_clean]:\n" " time_stamp <%lx>\n" " next_to_watch <%x>\n" " jiffies <%lx>\n" " next_to_watch.status <%x>\n", readl(adapter->hw.hw_addr + tx_ring->head), readl(adapter->hw.hw_addr + tx_ring->tail), tx_ring->next_to_use, tx_ring->next_to_clean, tx_ring->buffer_info[eop].time_stamp, eop, jiffies, eop_desc->upper.fields.status); } /** * e1000_clean_tx_irq - Reclaim resources after transmit completes * @adapter: board private structure * * the return value indicates whether actual cleaning was done, there * is no guarantee that everything was cleaned **/ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct e1000_hw *hw = &adapter->hw; struct e1000_ring *tx_ring = adapter->tx_ring; struct e1000_tx_desc *tx_desc, *eop_desc; struct e1000_buffer *buffer_info; unsigned int i, eop; unsigned int count = 0; bool cleaned = 0; unsigned int total_tx_bytes = 0, total_tx_packets = 0; i = tx_ring->next_to_clean; eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) { for (cleaned = 0; !cleaned; ) { tx_desc = E1000_TX_DESC(*tx_ring, i); buffer_info = &tx_ring->buffer_info[i]; cleaned = (i == eop); if (cleaned) { struct sk_buff *skb = buffer_info->skb; unsigned int segs, bytecount; segs = skb_shinfo(skb)->gso_segs ?: 1; /* multiply data chunks by size of headers */ bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len; total_tx_packets += segs; total_tx_bytes += bytecount; } e1000_put_txbuf(adapter, buffer_info); tx_desc->upper.data = 0; i++; if (i == tx_ring->count) i = 0; } eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); #define E1000_TX_WEIGHT 64 /* weight of a sort for tx, to avoid endless transmit cleanup */ if (count++ == E1000_TX_WEIGHT) break; } tx_ring->next_to_clean = i; #define TX_WAKE_THRESHOLD 32 if (cleaned && netif_carrier_ok(netdev) && e1000_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ smp_mb(); if (netif_queue_stopped(netdev) && !(test_bit(__E1000_DOWN, &adapter->state))) { netif_wake_queue(netdev); ++adapter->restart_queue; } } if (adapter->detect_tx_hung) { /* * Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ adapter->detect_tx_hung = 0; if (tx_ring->buffer_info[eop].dma && time_after(jiffies, tx_ring->buffer_info[eop].time_stamp + (adapter->tx_timeout_factor * HZ)) && !(er32(STATUS) & E1000_STATUS_TXOFF)) { e1000_print_tx_hang(adapter); netif_stop_queue(netdev); } } adapter->total_tx_bytes += total_tx_bytes; adapter->total_tx_packets += total_tx_packets; adapter->net_stats.tx_bytes += total_tx_bytes; adapter->net_stats.tx_packets += total_tx_packets; return cleaned; } /** * e1000_clean_rx_irq_ps - Send received data up the network stack; packet split * @adapter: board private structure * * the return value indicates whether actual cleaning was done, there * is no guarantee that everything was cleaned **/ static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, int *work_done, int work_to_do) { union e1000_rx_desc_packet_split *rx_desc, *next_rxd; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_buffer *buffer_info, *next_buffer; struct e1000_ps_page *ps_page; struct sk_buff *skb; unsigned int i, j; u32 length, staterr; int cleaned_count = 0; bool cleaned = 0; unsigned int total_rx_bytes = 0, total_rx_packets = 0; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC_PS(*rx_ring, i); staterr = le32_to_cpu(rx_desc->wb.middle.status_error); buffer_info = &rx_ring->buffer_info[i]; while (staterr & E1000_RXD_STAT_DD) { if (*work_done >= work_to_do) break; (*work_done)++; skb = buffer_info->skb; /* in the packet split case this is header only */ prefetch(skb->data - NET_IP_ALIGN); i++; if (i == rx_ring->count) i = 0; next_rxd = E1000_RX_DESC_PS(*rx_ring, i); prefetch(next_rxd); next_buffer = &rx_ring->buffer_info[i]; cleaned = 1; cleaned_count++; pci_unmap_single(pdev, buffer_info->dma, adapter->rx_ps_bsize0, PCI_DMA_FROMDEVICE); buffer_info->dma = 0; if (!(staterr & E1000_RXD_STAT_EOP)) { ndev_dbg(netdev, "%s: Packet Split buffers didn't pick " "up the full packet\n", netdev->name); dev_kfree_skb_irq(skb); goto next_desc; } if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { dev_kfree_skb_irq(skb); goto next_desc; } length = le16_to_cpu(rx_desc->wb.middle.length0); if (!length) { ndev_dbg(netdev, "%s: Last part of the packet spanning" " multiple descriptors\n", netdev->name); dev_kfree_skb_irq(skb); goto next_desc; } /* Good Receive */ skb_put(skb, length); { /* * this looks ugly, but it seems compiler issues make it * more efficient than reusing j */ int l1 = le16_to_cpu(rx_desc->wb.upper.length[0]); /* * page alloc/put takes too long and effects small packet * throughput, so unsplit small packets and save the alloc/put * only valid in softirq (napi) context to call kmap_* */ if (l1 && (l1 <= copybreak) && ((length + l1) <= adapter->rx_ps_bsize0)) { u8 *vaddr; ps_page = &buffer_info->ps_pages[0]; /* * there is no documentation about how to call * kmap_atomic, so we can't hold the mapping * very long */ pci_dma_sync_single_for_cpu(pdev, ps_page->dma, PAGE_SIZE, PCI_DMA_FROMDEVICE); vaddr = kmap_atomic(ps_page->page, KM_SKB_DATA_SOFTIRQ); memcpy(skb_tail_pointer(skb), vaddr, l1); kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); pci_dma_sync_single_for_device(pdev, ps_page->dma, PAGE_SIZE, PCI_DMA_FROMDEVICE); skb_put(skb, l1); goto copydone; } /* if */ } for (j = 0; j < PS_PAGE_BUFFERS; j++) { length = le16_to_cpu(rx_desc->wb.upper.length[j]); if (!length) break; ps_page = &buffer_info->ps_pages[j]; pci_unmap_page(pdev, ps_page->dma, PAGE_SIZE, PCI_DMA_FROMDEVICE); ps_page->dma = 0; skb_fill_page_desc(skb, j, ps_page->page, 0, length); ps_page->page = NULL; skb->len += length; skb->data_len += length; skb->truesize += length; } copydone: total_rx_bytes += skb->len; total_rx_packets++; e1000_rx_checksum(adapter, staterr, le16_to_cpu( rx_desc->wb.lower.hi_dword.csum_ip.csum), skb); if (rx_desc->wb.upper.header_status & cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP)) adapter->rx_hdr_split++; e1000_receive_skb(adapter, netdev, skb, staterr, rx_desc->wb.middle.vlan); next_desc: rx_desc->wb.middle.status_error &= cpu_to_le32(~0xFF); buffer_info->skb = NULL; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= E1000_RX_BUFFER_WRITE) { adapter->alloc_rx_buf(adapter, cleaned_count); cleaned_count = 0; } /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; staterr = le32_to_cpu(rx_desc->wb.middle.status_error); } rx_ring->next_to_clean = i; cleaned_count = e1000_desc_unused(rx_ring); if (cleaned_count) adapter->alloc_rx_buf(adapter, cleaned_count); adapter->total_rx_bytes += total_rx_bytes; adapter->total_rx_packets += total_rx_packets; adapter->net_stats.rx_bytes += total_rx_bytes; adapter->net_stats.rx_packets += total_rx_packets; return cleaned; } /** * e1000_consume_page - helper function **/ static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb, u16 length) { bi->page = NULL; skb->len += length; skb->data_len += length; skb->truesize += length; } /** * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy * @adapter: board private structure * * the return value indicates whether actual cleaning was done, there * is no guarantee that everything was cleaned **/ static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter, int *work_done, int work_to_do) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_rx_desc *rx_desc, *next_rxd; struct e1000_buffer *buffer_info, *next_buffer; u32 length; unsigned int i; int cleaned_count = 0; bool cleaned = false; unsigned int total_rx_bytes=0, total_rx_packets=0; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC(*rx_ring, i); buffer_info = &rx_ring->buffer_info[i]; while (rx_desc->status & E1000_RXD_STAT_DD) { struct sk_buff *skb; u8 status; if (*work_done >= work_to_do) break; (*work_done)++; status = rx_desc->status; skb = buffer_info->skb; buffer_info->skb = NULL; ++i; if (i == rx_ring->count) i = 0; next_rxd = E1000_RX_DESC(*rx_ring, i); prefetch(next_rxd); next_buffer = &rx_ring->buffer_info[i]; cleaned = true; cleaned_count++; pci_unmap_page(pdev, buffer_info->dma, PAGE_SIZE, PCI_DMA_FROMDEVICE); buffer_info->dma = 0; length = le16_to_cpu(rx_desc->length); /* errors is only valid for DD + EOP descriptors */ if (unlikely((status & E1000_RXD_STAT_EOP) && (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) { /* recycle both page and skb */ buffer_info->skb = skb; /* an error means any chain goes out the window * too */ if (rx_ring->rx_skb_top) dev_kfree_skb(rx_ring->rx_skb_top); rx_ring->rx_skb_top = NULL; goto next_desc; } #define rxtop rx_ring->rx_skb_top if (!(status & E1000_RXD_STAT_EOP)) { /* this descriptor is only the beginning (or middle) */ if (!rxtop) { /* this is the beginning of a chain */ rxtop = skb; skb_fill_page_desc(rxtop, 0, buffer_info->page, 0, length); } else { /* this is the middle of a chain */ skb_fill_page_desc(rxtop, skb_shinfo(rxtop)->nr_frags, buffer_info->page, 0, length); /* re-use the skb, only consumed the page */ buffer_info->skb = skb; } e1000_consume_page(buffer_info, rxtop, length); goto next_desc; } else { if (rxtop) { /* end of the chain */ skb_fill_page_desc(rxtop, skb_shinfo(rxtop)->nr_frags, buffer_info->page, 0, length); /* re-use the current skb, we only consumed the * page */ buffer_info->skb = skb; skb = rxtop; rxtop = NULL; e1000_consume_page(buffer_info, skb, length); } else { /* no chain, got EOP, this buf is the packet * copybreak to save the put_page/alloc_page */ if (length <= copybreak && skb_tailroom(skb) >= length) { u8 *vaddr; vaddr = kmap_atomic(buffer_info->page, KM_SKB_DATA_SOFTIRQ); memcpy(skb_tail_pointer(skb), vaddr, length); kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); /* re-use the page, so don't erase * buffer_info->page */ skb_put(skb, length); } else { skb_fill_page_desc(skb, 0, buffer_info->page, 0, length); e1000_consume_page(buffer_info, skb, length); } } } /* Receive Checksum Offload XXX recompute due to CRC strip? */ e1000_rx_checksum(adapter, (u32)(status) | ((u32)(rx_desc->errors) << 24), le16_to_cpu(rx_desc->csum), skb); /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; total_rx_packets++; /* eth type trans needs skb->data to point to something */ if (!pskb_may_pull(skb, ETH_HLEN)) { ndev_err(netdev, "pskb_may_pull failed.\n"); dev_kfree_skb(skb); goto next_desc; } e1000_receive_skb(adapter, netdev, skb, status, rx_desc->special); next_desc: rx_desc->status = 0; /* return some buffers to hardware, one at a time is too slow */ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) { adapter->alloc_rx_buf(adapter, cleaned_count); cleaned_count = 0; } /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; } rx_ring->next_to_clean = i; cleaned_count = e1000_desc_unused(rx_ring); if (cleaned_count) adapter->alloc_rx_buf(adapter, cleaned_count); adapter->total_rx_bytes += total_rx_bytes; adapter->total_rx_packets += total_rx_packets; adapter->net_stats.rx_bytes += total_rx_bytes; adapter->net_stats.rx_packets += total_rx_packets; return cleaned; } /** * e1000_clean_rx_ring - Free Rx Buffers per Queue * @adapter: board private structure **/ static void e1000_clean_rx_ring(struct e1000_adapter *adapter) { struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_buffer *buffer_info; struct e1000_ps_page *ps_page; struct pci_dev *pdev = adapter->pdev; unsigned int i, j; /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; if (buffer_info->dma) { if (adapter->clean_rx == e1000_clean_rx_irq) pci_unmap_single(pdev, buffer_info->dma, adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) pci_unmap_page(pdev, buffer_info->dma, PAGE_SIZE, PCI_DMA_FROMDEVICE); else if (adapter->clean_rx == e1000_clean_rx_irq_ps) pci_unmap_single(pdev, buffer_info->dma, adapter->rx_ps_bsize0, PCI_DMA_FROMDEVICE); buffer_info->dma = 0; } if (buffer_info->page) { put_page(buffer_info->page); buffer_info->page = NULL; } if (buffer_info->skb) { dev_kfree_skb(buffer_info->skb); buffer_info->skb = NULL; } for (j = 0; j < PS_PAGE_BUFFERS; j++) { ps_page = &buffer_info->ps_pages[j]; if (!ps_page->page) break; pci_unmap_page(pdev, ps_page->dma, PAGE_SIZE, PCI_DMA_FROMDEVICE); ps_page->dma = 0; put_page(ps_page->page); ps_page->page = NULL; } } /* there also may be some cached data from a chained receive */ if (rx_ring->rx_skb_top) { dev_kfree_skb(rx_ring->rx_skb_top); rx_ring->rx_skb_top = NULL; } /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; writel(0, adapter->hw.hw_addr + rx_ring->head); writel(0, adapter->hw.hw_addr + rx_ring->tail); } /** * e1000_intr_msi - Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure **/ static irqreturn_t e1000_intr_msi(int irq, void *data) { struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 icr = er32(ICR); /* * read ICR disables interrupts using IAM */ if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { hw->mac.get_link_status = 1; /* * ICH8 workaround-- Call gig speed drop workaround on cable * disconnect (LSC) before accessing any PHY registers */ if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) && (!(er32(STATUS) & E1000_STATUS_LU))) e1000e_gig_downshift_workaround_ich8lan(hw); /* * 80003ES2LAN workaround-- For packet buffer work-around on * link down event; disable receives here in the ISR and reset * adapter in watchdog */ if (netif_carrier_ok(netdev) && adapter->flags & FLAG_RX_NEEDS_RESTART) { /* disable receives */ u32 rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); adapter->flags |= FLAG_RX_RESTART_NOW; } /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); } if (netif_rx_schedule_prep(netdev, &adapter->napi)) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; adapter->total_rx_bytes = 0; adapter->total_rx_packets = 0; __netif_rx_schedule(netdev, &adapter->napi); } return IRQ_HANDLED; } /** * e1000_intr - Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure **/ static irqreturn_t e1000_intr(int irq, void *data) { struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 rctl, icr = er32(ICR); if (!icr) return IRQ_NONE; /* Not our interrupt */ /* * IMS will not auto-mask if INT_ASSERTED is not set, and if it is * not set, then the adapter didn't send an interrupt */ if (!(icr & E1000_ICR_INT_ASSERTED)) return IRQ_NONE; /* * Interrupt Auto-Mask...upon reading ICR, * interrupts are masked. No need for the * IMC write */ if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { hw->mac.get_link_status = 1; /* * ICH8 workaround-- Call gig speed drop workaround on cable * disconnect (LSC) before accessing any PHY registers */ if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) && (!(er32(STATUS) & E1000_STATUS_LU))) e1000e_gig_downshift_workaround_ich8lan(hw); /* * 80003ES2LAN workaround-- * For packet buffer work-around on link down event; * disable receives here in the ISR and * reset adapter in watchdog */ if (netif_carrier_ok(netdev) && (adapter->flags & FLAG_RX_NEEDS_RESTART)) { /* disable receives */ rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); adapter->flags |= FLAG_RX_RESTART_NOW; } /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); } if (netif_rx_schedule_prep(netdev, &adapter->napi)) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; adapter->total_rx_bytes = 0; adapter->total_rx_packets = 0; __netif_rx_schedule(netdev, &adapter->napi); } return IRQ_HANDLED; } static int e1000_request_irq(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; irq_handler_t handler = e1000_intr; int irq_flags = IRQF_SHARED; int err; if (!pci_enable_msi(adapter->pdev)) { adapter->flags |= FLAG_MSI_ENABLED; handler = e1000_intr_msi; irq_flags = 0; } err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name, netdev); if (err) { ndev_err(netdev, "Unable to allocate %s interrupt (return: %d)\n", adapter->flags & FLAG_MSI_ENABLED ? "MSI":"INTx", err); if (adapter->flags & FLAG_MSI_ENABLED) pci_disable_msi(adapter->pdev); } return err; } static void e1000_free_irq(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; free_irq(adapter->pdev->irq, netdev); if (adapter->flags & FLAG_MSI_ENABLED) { pci_disable_msi(adapter->pdev); adapter->flags &= ~FLAG_MSI_ENABLED; } } /** * e1000_irq_disable - Mask off interrupt generation on the NIC **/ static void e1000_irq_disable(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; ew32(IMC, ~0); e1e_flush(); synchronize_irq(adapter->pdev->irq); } /** * e1000_irq_enable - Enable default interrupt generation settings **/ static void e1000_irq_enable(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; ew32(IMS, IMS_ENABLE_MASK); e1e_flush(); } /** * e1000_get_hw_control - get control of the h/w from f/w * @adapter: address of board private structure * * e1000_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is loaded. For AMT version (only with 82573) * of the f/w this means that the network i/f is open. **/ static void e1000_get_hw_control(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ctrl_ext; u32 swsm; /* Let firmware know the driver has taken over */ if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) { swsm = er32(SWSM); ew32(SWSM, swsm | E1000_SWSM_DRV_LOAD); } else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) { ctrl_ext = er32(CTRL_EXT); ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } } /** * e1000_release_hw_control - release control of the h/w to f/w * @adapter: address of board private structure * * e1000_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that the * driver is no longer loaded. For AMT version (only with 82573) i * of the f/w this means that the network i/f is closed. * **/ static void e1000_release_hw_control(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ctrl_ext; u32 swsm; /* Let firmware taken over control of h/w */ if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) { swsm = er32(SWSM); ew32(SWSM, swsm & ~E1000_SWSM_DRV_LOAD); } else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) { ctrl_ext = er32(CTRL_EXT); ew32(CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); } } /** * @e1000_alloc_ring - allocate memory for a ring structure **/ static int e1000_alloc_ring_dma(struct e1000_adapter *adapter, struct e1000_ring *ring) { struct pci_dev *pdev = adapter->pdev; ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma, GFP_KERNEL); if (!ring->desc) return -ENOMEM; return 0; } /** * e1000e_setup_tx_resources - allocate Tx resources (Descriptors) * @adapter: board private structure * * Return 0 on success, negative on failure **/ int e1000e_setup_tx_resources(struct e1000_adapter *adapter) { struct e1000_ring *tx_ring = adapter->tx_ring; int err = -ENOMEM, size; size = sizeof(struct e1000_buffer) * tx_ring->count; tx_ring->buffer_info = vmalloc(size); if (!tx_ring->buffer_info) goto err; memset(tx_ring->buffer_info, 0, size); /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); err = e1000_alloc_ring_dma(adapter, tx_ring); if (err) goto err; tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; spin_lock_init(&adapter->tx_queue_lock); return 0; err: vfree(tx_ring->buffer_info); ndev_err(adapter->netdev, "Unable to allocate memory for the transmit descriptor ring\n"); return err; } /** * e1000e_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: board private structure * * Returns 0 on success, negative on failure **/ int e1000e_setup_rx_resources(struct e1000_adapter *adapter) { struct e1000_ring *rx_ring = adapter->rx_ring; struct e1000_buffer *buffer_info; int i, size, desc_len, err = -ENOMEM; size = sizeof(struct e1000_buffer) * rx_ring->count; rx_ring->buffer_info = vmalloc(size); if (!rx_ring->buffer_info) goto err; memset(rx_ring->buffer_info, 0, size); for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; buffer_info->ps_pages = kcalloc(PS_PAGE_BUFFERS, sizeof(struct e1000_ps_page), GFP_KERNEL); if (!buffer_info->ps_pages) goto err_pages; } desc_len = sizeof(union e1000_rx_desc_packet_split); /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * desc_len; rx_ring->size = ALIGN(rx_ring->size, 4096); err = e1000_alloc_ring_dma(adapter, rx_ring); if (err) goto err_pages; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; rx_ring->rx_skb_top = NULL; return 0; err_pages: for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; kfree(buffer_info->ps_pages); } err: vfree(rx_ring->buffer_info); ndev_err(adapter->netdev, "Unable to allocate memory for the transmit descriptor ring\n"); return err; } /** * e1000_clean_tx_ring - Free Tx Buffers * @adapter: board private structure **/ static void e1000_clean_tx_ring(struct e1000_adapter *adapter) { struct e1000_ring *tx_ring = adapter->tx_ring; struct e1000_buffer *buffer_info; unsigned long size; unsigned int i; for (i = 0; i < tx_ring->count; i++) { buffer_info = &tx_ring->buffer_info[i]; e1000_put_txbuf(adapter, buffer_info); } size = sizeof(struct e1000_buffer) * tx_ring->count; memset(tx_ring->buffer_info, 0, size); memset(tx_ring->desc, 0, tx_ring->size); tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; writel(0, adapter->hw.hw_addr + tx_ring->head); writel(0, adapter->hw.hw_addr + tx_ring->tail); } /** * e1000e_free_tx_resources - Free Tx Resources per Queue * @adapter: board private structure * * Free all transmit software resources **/ void e1000e_free_tx_resources(struct e1000_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct e1000_ring *tx_ring = adapter->tx_ring; e1000_clean_tx_ring(adapter); vfree(tx_ring->buffer_info); tx_ring->buffer_info = NULL; dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; } /** * e1000e_free_rx_resources - Free Rx Resources * @adapter: board private structure * * Free all receive software resources **/ void e1000e_free_rx_resources(struct e1000_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct e1000_ring *rx_ring = adapter->rx_ring; int i; e1000_clean_rx_ring(adapter); for (i = 0; i < rx_ring->count; i++) { kfree(rx_ring->buffer_info[i].ps_pages); } vfree(rx_ring->buffer_info); rx_ring->buffer_info = NULL; dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } /** * e1000_update_itr - update the dynamic ITR value based on statistics * @adapter: pointer to adapter * @itr_setting: current adapter->itr * @packets: the number of packets during this measurement interval * @bytes: the number of bytes during this measurement interval * * Stores a new ITR value based on packets and byte * counts during the last interrupt. The advantage of per interrupt * computation is faster updates and more accurate ITR for the current * traffic pattern. Constants in this function were computed * based on theoretical maximum wire speed and thresholds were set based * on testing data as well as attempting to minimize response time * while increasing bulk throughput. * this functionality is controlled by the InterruptThrottleRate module * parameter (see e1000_param.c) **/ static unsigned int e1000_update_itr(struct e1000_adapter *adapter, u16 itr_setting, int packets, int bytes) { unsigned int retval = itr_setting; if (packets == 0) goto update_itr_done; switch (itr_setting) { case lowest_latency: /* handle TSO and jumbo frames */ if (bytes/packets > 8000) retval = bulk_latency; else if ((packets < 5) && (bytes > 512)) { retval = low_latency; } break; case low_latency: /* 50 usec aka 20000 ints/s */ if (bytes > 10000) { /* this if handles the TSO accounting */ if (bytes/packets > 8000) { retval = bulk_latency; } else if ((packets < 10) || ((bytes/packets) > 1200)) { retval = bulk_latency; } else if ((packets > 35)) { retval = lowest_latency; } } else if (bytes/packets > 2000) { retval = bulk_latency; } else if (packets <= 2 && bytes < 512) { retval = lowest_latency; } break; case bulk_latency: /* 250 usec aka 4000 ints/s */ if (bytes > 25000) { if (packets > 35) { retval = low_latency; } } else if (bytes < 6000) { retval = low_latency; } break; } update_itr_done: return retval; } static void e1000_set_itr(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u16 current_itr; u32 new_itr = adapter->itr; /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ if (adapter->link_speed != SPEED_1000) { current_itr = 0; new_itr = 4000; goto set_itr_now; } adapter->tx_itr = e1000_update_itr(adapter, adapter->tx_itr, adapter->total_tx_packets, adapter->total_tx_bytes); /* conservative mode (itr 3) eliminates the lowest_latency setting */ if (adapter->itr_setting == 3 && adapter->tx_itr == lowest_latency) adapter->tx_itr = low_latency; adapter->rx_itr = e1000_update_itr(adapter, adapter->rx_itr, adapter->total_rx_packets, adapter->total_rx_bytes); /* conservative mode (itr 3) eliminates the lowest_latency setting */ if (adapter->itr_setting == 3 && adapter->rx_itr == lowest_latency) adapter->rx_itr = low_latency; current_itr = max(adapter->rx_itr, adapter->tx_itr); switch (current_itr) { /* counts and packets in update_itr are dependent on these numbers */ case lowest_latency: new_itr = 70000; break; case low_latency: new_itr = 20000; /* aka hwitr = ~200 */ break; case bulk_latency: new_itr = 4000; break; default: break; } set_itr_now: if (new_itr != adapter->itr) { /* * this attempts to bias the interrupt rate towards Bulk * by adding intermediate steps when interrupt rate is * increasing */ new_itr = new_itr > adapter->itr ? min(adapter->itr + (new_itr >> 2), new_itr) : new_itr; adapter->itr = new_itr; ew32(ITR, 1000000000 / (new_itr * 256)); } } /** * e1000_clean - NAPI Rx polling callback * @napi: struct associated with this polling callback * @budget: amount of packets driver is allowed to process this poll **/ static int e1000_clean(struct napi_struct *napi, int budget) { struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi); struct net_device *poll_dev = adapter->netdev; int tx_cleaned = 0, work_done = 0; /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; /* * e1000_clean is called per-cpu. This lock protects * tx_ring from being cleaned by multiple cpus * simultaneously. A failure obtaining the lock means * tx_ring is currently being cleaned anyway. */ if (spin_trylock(&adapter->tx_queue_lock)) { tx_cleaned = e1000_clean_tx_irq(adapter); spin_unlock(&adapter->tx_queue_lock); } adapter->clean_rx(adapter, &work_done, budget); if (tx_cleaned) work_done = budget; /* If budget not fully consumed, exit the polling mode */ if (work_done < budget) { if (adapter->itr_setting & 3) e1000_set_itr(adapter); netif_rx_complete(poll_dev, napi); e1000_irq_enable(adapter); } return work_done; } static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 vfta, index; /* don't update vlan cookie if already programmed */ if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && (vid == adapter->mng_vlan_id)) return; /* add VID to filter table */ index = (vid >> 5) & 0x7F; vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index); vfta |= (1 << (vid & 0x1F)); e1000e_write_vfta(hw, index, vfta); } static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 vfta, index; if (!test_bit(__E1000_DOWN, &adapter->state)) e1000_irq_disable(adapter); vlan_group_set_device(adapter->vlgrp, vid, NULL); if (!test_bit(__E1000_DOWN, &adapter->state)) e1000_irq_enable(adapter); if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && (vid == adapter->mng_vlan_id)) { /* release control to f/w */ e1000_release_hw_control(adapter); return; } /* remove VID from filter table */ index = (vid >> 5) & 0x7F; vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index); vfta &= ~(1 << (vid & 0x1F)); e1000e_write_vfta(hw, index, vfta); } static void e1000_update_mng_vlan(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; u16 vid = adapter->hw.mng_cookie.vlan_id; u16 old_vid = adapter->mng_vlan_id; if (!adapter->vlgrp) return; if (!vlan_group_get_device(adapter->vlgrp, vid)) { adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { e1000_vlan_rx_add_vid(netdev, vid); adapter->mng_vlan_id = vid; } if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid) && !vlan_group_get_device(adapter->vlgrp, old_vid)) e1000_vlan_rx_kill_vid(netdev, old_vid); } else { adapter->mng_vlan_id = vid; } } static void e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl; if (!test_bit(__E1000_DOWN, &adapter->state)) e1000_irq_disable(adapter); adapter->vlgrp = grp; if (grp) { /* enable VLAN tag insert/strip */ ctrl = er32(CTRL); ctrl |= E1000_CTRL_VME; ew32(CTRL, ctrl); if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) { /* enable VLAN receive filtering */ rctl = er32(RCTL); rctl &= ~E1000_RCTL_CFIEN; ew32(RCTL, rctl); e1000_update_mng_vlan(adapter); } } else { /* disable VLAN tag insert/strip */ ctrl = er32(CTRL); ctrl &= ~E1000_CTRL_VME; ew32(CTRL, ctrl); if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) { if (adapter->mng_vlan_id != (u16)E1000_MNG_VLAN_NONE) { e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; } } } if (!test_bit(__E1000_DOWN, &adapter->state)) e1000_irq_enable(adapter); } static void e1000_restore_vlan(struct e1000_adapter *adapter) { u16 vid; e1000_vlan_rx_register(adapter->netdev, adapter->vlgrp); if (!adapter->vlgrp) return; for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) { if (!vlan_group_get_device(adapter->vlgrp, vid)) continue; e1000_vlan_rx_add_vid(adapter->netdev, vid); } } static void e1000_init_manageability(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 manc, manc2h; if (!(adapter->flags & FLAG_MNG_PT_ENABLED)) return; manc = er32(MANC); /* * enable receiving management packets to the host. this will probably * generate destination unreachable messages from the host OS, but * the packets will be handled on SMBUS */ manc |= E1000_MANC_EN_MNG2HOST; manc2h = er32(MANC2H); #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; ew32(MANC2H, manc2h); ew32(MANC, manc); } /** * e1000_configure_tx - Configure 8254x Transmit Unit after Reset * @adapter: board private structure * * Configure the Tx unit of the MAC after a reset. **/ static void e1000_configure_tx(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_ring *tx_ring = adapter->tx_ring; u64 tdba; u32 tdlen, tctl, tipg, tarc; u32 ipgr1, ipgr2; /* Setup the HW Tx Head and Tail descriptor pointers */ tdba = tx_ring->dma; tdlen = tx_ring->count * sizeof(struct e1000_tx_desc); ew32(TDBAL, (tdba & DMA_32BIT_MASK)); ew32(TDBAH, (tdba >> 32)); ew32(TDLEN, tdlen); ew32(TDH, 0); ew32(TDT, 0); tx_ring->head = E1000_TDH; tx_ring->tail = E1000_TDT; /* Set the default values for the Tx Inter Packet Gap timer */ tipg = DEFAULT_82543_TIPG_IPGT_COPPER; /* 8 */ ipgr1 = DEFAULT_82543_TIPG_IPGR1; /* 8 */ ipgr2 = DEFAULT_82543_TIPG_IPGR2; /* 6 */ if (adapter->flags & FLAG_TIPG_MEDIUM_FOR_80003ESLAN) ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; /* 7 */ tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT; tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT; ew32(TIPG, tipg); /* Set the Tx Interrupt Delay register */ ew32(TIDV, adapter->tx_int_delay); /* Tx irq moderation */ ew32(TADV, adapter->tx_abs_int_delay); /* Program the Transmit Control Register */ tctl = er32(TCTL); tctl &= ~E1000_TCTL_CT; tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); if (adapter->flags & FLAG_TARC_SPEED_MODE_BIT) { tarc = er32(TARC(0)); /* * set the speed mode bit, we'll clear it if we're not at * gigabit link later */ #define SPEED_MODE_BIT (1 << 21) tarc |= SPEED_MODE_BIT; ew32(TARC(0), tarc); } /* errata: program both queues to unweighted RR */ if (adapter->flags & FLAG_TARC_SET_BIT_ZERO) { tarc = er32(TARC(0)); tarc |= 1; ew32(TARC(0), tarc); tarc = er32(TARC(1)); tarc |= 1; ew32(TARC(1), tarc); } e1000e_config_collision_dist(hw); /* Setup Transmit Descriptor Settings for eop descriptor */ adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS; /* only set IDE if we are delaying interrupts using the timers */ if (adapter->tx_int_delay) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* enable Report Status bit */ adapter->txd_cmd |= E1000_TXD_CMD_RS; ew32(TCTL, tctl); adapter->tx_queue_len = adapter->netdev->tx_queue_len; } /** * e1000_setup_rctl - configure the receive control registers * @adapter: Board private structure **/ #define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \ (((S) & (PAGE_SIZE - 1)) ? 1 : 0)) static void e1000_setup_rctl(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 rctl, rfctl; u32 psrctl = 0; u32 pages = 0; /* Program MC offset vector base */ rctl = er32(RCTL); rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not Store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (adapter->netdev->mtu <= ETH_DATA_LEN) rctl &= ~E1000_RCTL_LPE; else rctl |= E1000_RCTL_LPE; /* Enable hardware CRC frame stripping */ rctl |= E1000_RCTL_SECRC; /* Setup buffer sizes */ rctl &= ~E1000_RCTL_SZ_4096; rctl |= E1000_RCTL_BSEX; switch (adapter->rx_buffer_len) { case 256: rctl |= E1000_RCTL_SZ_256; rctl &= ~E1000_RCTL_BSEX; break; case 512: rctl |= E1000_RCTL_SZ_512; rctl &= ~E1000_RCTL_BSEX; break; case 1024: rctl |= E1000_RCTL_SZ_1024; rctl &= ~E1000_RCTL_BSEX; break; case 2048: default: rctl |= E1000_RCTL_SZ_2048; rctl &= ~E1000_RCTL_BSEX; break; case 4096: rctl |= E1000_RCTL_SZ_4096; break; case 8192: rctl |= E1000_RCTL_SZ_8192; break; case 16384: rctl |= E1000_RCTL_SZ_16384; break; } /* * 82571 and greater support packet-split where the protocol * header is placed in skb->data and the packet data is * placed in pages hanging off of skb_shinfo(skb)->nr_frags. * In the case of a non-split, skb->data is linearly filled, * followed by the page buffers. Therefore, skb->data is * sized to hold the largest protocol header. * * allocations using alloc_page take too long for regular MTU * so only enable packet split for jumbo frames * * Using pages when the page size is greater than 16k wastes * a lot of memory, since we allocate 3 pages at all times * per packet. */ pages = PAGE_USE_COUNT(adapter->netdev->mtu); if (!(adapter->flags & FLAG_IS_ICH) && (pages <= 3) && (PAGE_SIZE <= 16384) && (rctl & E1000_RCTL_LPE)) adapter->rx_ps_pages = pages; else adapter->rx_ps_pages = 0; if (adapter->rx_ps_pages) { /* Configure extra packet-split registers */ rfctl = er32(RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* * disable packet split support for IPv6 extension headers, * because some malformed IPv6 headers can hang the Rx */ rfctl |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS); ew32(RFCTL, rfctl); /* Enable Packet split descriptors */ rctl |= E1000_RCTL_DTYP_PS; psrctl |= adapter->rx_ps_bsize0 >> E1000_PSRCTL_BSIZE0_SHIFT; switch (adapter->rx_ps_pages) { case 3: psrctl |= PAGE_SIZE << E1000_PSRCTL_BSIZE3_SHIFT; case 2: psrctl |= PAGE_SIZE << E1000_PSRCTL_BSIZE2_SHIFT; case 1: psrctl |= PAGE_SIZE >> E1000_PSRCTL_BSIZE1_SHIFT; break; } ew32(PSRCTL, psrctl); } ew32(RCTL, rctl); /* just started the receive unit, no need to restart */ adapter->flags &= ~FLAG_RX_RESTART_NOW; } /** * e1000_configure_rx - Configure Receive Unit after Reset * @adapter: board private structure * * Configure the Rx unit of the MAC after a reset. **/ static void e1000_configure_rx(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_ring *rx_ring = adapter->rx_ring; u64 rdba; u32 rdlen, rctl, rxcsum, ctrl_ext; if (adapter->rx_ps_pages) { /* this is a 32 byte descriptor */ rdlen = rx_ring->count * sizeof(union e1000_rx_desc_packet_split); adapter->clean_rx = e1000_clean_rx_irq_ps; adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps; } else if (adapter->netdev->mtu > ETH_FRAME_LEN + ETH_FCS_LEN) { rdlen = rx_ring->count * sizeof(struct e1000_rx_desc); adapter->clean_rx = e1000_clean_jumbo_rx_irq; adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers; } else { rdlen = rx_ring->count * sizeof(struct e1000_rx_desc); adapter->clean_rx = e1000_clean_rx_irq; adapter->alloc_rx_buf = e1000_alloc_rx_buffers; } /* disable receives while setting up the descriptors */ rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); e1e_flush(); msleep(10); /* set the Receive Delay Timer Register */ ew32(RDTR, adapter->rx_int_delay); /* irq moderation */ ew32(RADV, adapter->rx_abs_int_delay); if (adapter->itr_setting != 0) ew32(ITR, 1000000000 / (adapter->itr * 256)); ctrl_ext = er32(CTRL_EXT); /* Reset delay timers after every interrupt */ ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR; /* Auto-Mask interrupts upon ICR access */ ctrl_ext |= E1000_CTRL_EXT_IAME; ew32(IAM, 0xffffffff); ew32(CTRL_EXT, ctrl_ext); e1e_flush(); /* * Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ rdba = rx_ring->dma; ew32(RDBAL, (rdba & DMA_32BIT_MASK)); ew32(RDBAH, (rdba >> 32)); ew32(RDLEN, rdlen); ew32(RDH, 0); ew32(RDT, 0); rx_ring->head = E1000_RDH; rx_ring->tail = E1000_RDT; /* Enable Receive Checksum Offload for TCP and UDP */ rxcsum = er32(RXCSUM); if (adapter->flags & FLAG_RX_CSUM_ENABLED) { rxcsum |= E1000_RXCSUM_TUOFL; /* * IPv4 payload checksum for UDP fragments must be * used in conjunction with packet-split. */ if (adapter->rx_ps_pages) rxcsum |= E1000_RXCSUM_IPPCSE; } else { rxcsum &= ~E1000_RXCSUM_TUOFL; /* no need to clear IPPCSE as it defaults to 0 */ } ew32(RXCSUM, rxcsum); /* * Enable early receives on supported devices, only takes effect when * packet size is equal or larger than the specified value (in 8 byte * units), e.g. using jumbo frames when setting to E1000_ERT_2048 */ if ((adapter->flags & FLAG_HAS_ERT) && (adapter->netdev->mtu > ETH_DATA_LEN)) { u32 rxdctl = er32(RXDCTL(0)); ew32(RXDCTL(0), rxdctl | 0x3); ew32(ERT, E1000_ERT_2048 | (1 << 13)); /* * With jumbo frames and early-receive enabled, excessive * C4->C2 latencies result in dropped transactions. */ pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name, 55); } else { pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name, PM_QOS_DEFAULT_VALUE); } /* Enable Receives */ ew32(RCTL, rctl); } /** * e1000_update_mc_addr_list - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * @rar_used_count: the first RAR register free to program * @rar_count: total number of supported Receive Address Registers * * Updates the Receive Address Registers and Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. * The parameter rar_count will usually be hw->mac.rar_entry_count * unless there are workarounds that change this. Currently no func pointer * exists and all implementations are handled in the generic version of this * function. **/ static void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count, u32 rar_used_count, u32 rar_count) { hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, mc_addr_count, rar_used_count, rar_count); } /** * e1000_set_multi - Multicast and Promiscuous mode set * @netdev: network interface device structure * * The set_multi entry point is called whenever the multicast address * list or the network interface flags are updated. This routine is * responsible for configuring the hardware for proper multicast, * promiscuous mode, and all-multi behavior. **/ static void e1000_set_multi(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct dev_mc_list *mc_ptr; u8 *mta_list; u32 rctl; int i; /* Check for Promiscuous and All Multicast modes */ rctl = er32(RCTL); if (netdev->flags & IFF_PROMISC) { rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); rctl &= ~E1000_RCTL_VFE; } else { if (netdev->flags & IFF_ALLMULTI) { rctl |= E1000_RCTL_MPE; rctl &= ~E1000_RCTL_UPE; } else { rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE); } if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) rctl |= E1000_RCTL_VFE; } ew32(RCTL, rctl); if (netdev->mc_count) { mta_list = kmalloc(netdev->mc_count * 6, GFP_ATOMIC); if (!mta_list) return; /* prepare a packed array of only addresses. */ mc_ptr = netdev->mc_list; for (i = 0; i < netdev->mc_count; i++) { if (!mc_ptr) break; memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN); mc_ptr = mc_ptr->next; } e1000_update_mc_addr_list(hw, mta_list, i, 1, mac->rar_entry_count); kfree(mta_list); } else { /* * if we're called from probe, we might not have * anything to do here, so clear out the list */ e1000_update_mc_addr_list(hw, NULL, 0, 1, mac->rar_entry_count); } } /** * e1000_configure - configure the hardware for Rx and Tx * @adapter: private board structure **/ static void e1000_configure(struct e1000_adapter *adapter) { e1000_set_multi(adapter->netdev); e1000_restore_vlan(adapter); e1000_init_manageability(adapter); e1000_configure_tx(adapter); e1000_setup_rctl(adapter); e1000_configure_rx(adapter); adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring)); } /** * e1000e_power_up_phy - restore link in case the phy was powered down * @adapter: address of board private structure * * The phy may be powered down to save power and turn off link when the * driver is unloaded and wake on lan is not enabled (among others) * *** this routine MUST be followed by a call to e1000e_reset *** **/ void e1000e_power_up_phy(struct e1000_adapter *adapter) { u16 mii_reg = 0; /* Just clear the power down bit to wake the phy back up */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { /* * According to the manual, the phy will retain its * settings across a power-down/up cycle */ e1e_rphy(&adapter->hw, PHY_CONTROL, &mii_reg); mii_reg &= ~MII_CR_POWER_DOWN; e1e_wphy(&adapter->hw, PHY_CONTROL, mii_reg); } adapter->hw.mac.ops.setup_link(&adapter->hw); } /** * e1000_power_down_phy - Power down the PHY * * Power down the PHY so no link is implied when interface is down * The PHY cannot be powered down is management or WoL is active */ static void e1000_power_down_phy(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u16 mii_reg; /* WoL is enabled */ if (adapter->wol) return; /* non-copper PHY? */ if (adapter->hw.phy.media_type != e1000_media_type_copper) return; /* reset is blocked because of a SoL/IDER session */ if (e1000e_check_mng_mode(hw) || e1000_check_reset_block(hw)) return; /* manageability (AMT) is enabled */ if (er32(MANC) & E1000_MANC_SMBUS_EN) return; /* power down the PHY */ e1e_rphy(hw, PHY_CONTROL, &mii_reg); mii_reg |= MII_CR_POWER_DOWN; e1e_wphy(hw, PHY_CONTROL, mii_reg); mdelay(1); } /** * e1000e_reset - bring the hardware into a known good state * * This function boots the hardware and enables some settings that * require a configuration cycle of the hardware - those cannot be * set/changed during runtime. After reset the device needs to be * properly configured for Rx, Tx etc. */ void e1000e_reset(struct e1000_adapter *adapter) { struct e1000_mac_info *mac = &adapter->hw.mac; struct e1000_fc_info *fc = &adapter->hw.fc; struct e1000_hw *hw = &adapter->hw; u32 tx_space, min_tx_space, min_rx_space; u32 pba = adapter->pba; u16 hwm; /* reset Packet Buffer Allocation to default */ ew32(PBA, pba); if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) { /* * To maintain wire speed transmits, the Tx FIFO should be * large enough to accommodate two full transmit packets, * rounded up to the next 1KB and expressed in KB. Likewise, * the Rx FIFO should be large enough to accommodate at least * one full receive packet and is similarly rounded up and * expressed in KB. */ pba = er32(PBA); /* upper 16 bits has Tx packet buffer allocation size in KB */ tx_space = pba >> 16; /* lower 16 bits has Rx packet buffer allocation size in KB */ pba &= 0xffff; /* * the Tx fifo also stores 16 bytes of information about the tx * but don't include ethernet FCS because hardware appends it */ min_tx_space = (adapter->max_frame_size + sizeof(struct e1000_tx_desc) - ETH_FCS_LEN) * 2; min_tx_space = ALIGN(min_tx_space, 1024); min_tx_space >>= 10; /* software strips receive CRC, so leave room for it */ min_rx_space = adapter->max_frame_size; min_rx_space = ALIGN(min_rx_space, 1024); min_rx_space >>= 10; /* * If current Tx allocation is less than the min Tx FIFO size, * and the min Tx FIFO size is less than the current Rx FIFO * allocation, take space away from current Rx allocation */ if ((tx_space < min_tx_space) && ((min_tx_space - tx_space) < pba)) { pba -= min_tx_space - tx_space; /* * if short on Rx space, Rx wins and must trump tx * adjustment or use Early Receive if available */ if ((pba < min_rx_space) && (!(adapter->flags & FLAG_HAS_ERT))) /* ERT enabled in e1000_configure_rx */ pba = min_rx_space; } ew32(PBA, pba); } /* * flow control settings * * The high water mark must be low enough to fit one full frame * (or the size used for early receive) above it in the Rx FIFO. * Set it to the lower of: * - 90% of the Rx FIFO size, and * - the full Rx FIFO size minus the early receive size (for parts * with ERT support assuming ERT set to E1000_ERT_2048), or * - the full Rx FIFO size minus one full frame */ if (adapter->flags & FLAG_HAS_ERT) hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - (E1000_ERT_2048 << 3))); else hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - adapter->max_frame_size)); fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ fc->low_water = fc->high_water - 8; if (adapter->flags & FLAG_DISABLE_FC_PAUSE_TIME) fc->pause_time = 0xFFFF; else fc->pause_time = E1000_FC_PAUSE_TIME; fc->send_xon = 1; fc->type = fc->original_type; /* Allow time for pending master requests to run */ mac->ops.reset_hw(hw); /* * For parts with AMT enabled, let the firmware know * that the network interface is in control */ if ((adapter->flags & FLAG_HAS_AMT) && e1000e_check_mng_mode(hw)) e1000_get_hw_control(adapter); ew32(WUC, 0); if (mac->ops.init_hw(hw)) ndev_err(adapter->netdev, "Hardware Error\n"); e1000_update_mng_vlan(adapter); /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ ew32(VET, ETH_P_8021Q); e1000e_reset_adaptive(hw); e1000_get_phy_info(hw); if (!(adapter->flags & FLAG_SMART_POWER_DOWN)) { u16 phy_data = 0; /* * speed up time to link by disabling smart power down, ignore * the return value of this function because there is nothing * different we would do if it failed */ e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data); phy_data &= ~IGP02E1000_PM_SPD; e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, phy_data); } } int e1000e_up(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; /* hardware has been reset, we need to reload some things */ e1000_configure(adapter); clear_bit(__E1000_DOWN, &adapter->state); napi_enable(&adapter->napi); e1000_irq_enable(adapter); /* fire a link change interrupt to start the watchdog */ ew32(ICS, E1000_ICS_LSC); return 0; } void e1000e_down(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct e1000_hw *hw = &adapter->hw; u32 tctl, rctl; /* * signal that we're down so the interrupt handler does not * reschedule our watchdog timer */ set_bit(__E1000_DOWN, &adapter->state); /* disable receives in the hardware */ rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); /* flush and sleep below */ netif_tx_stop_all_queues(netdev); /* disable transmits in the hardware */ tctl = er32(TCTL); tctl &= ~E1000_TCTL_EN; ew32(TCTL, tctl); /* flush both disables and wait for them to finish */ e1e_flush(); msleep(10); napi_disable(&adapter->napi); e1000_irq_disable(adapter); del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); netdev->tx_queue_len = adapter->tx_queue_len; netif_carrier_off(netdev); adapter->link_speed = 0; adapter->link_duplex = 0; if (!pci_channel_offline(adapter->pdev)) e1000e_reset(adapter); e1000_clean_tx_ring(adapter); e1000_clean_rx_ring(adapter); /* * TODO: for power management, we could drop the link and * pci_disable_device here. */ } void e1000e_reinit_locked(struct e1000_adapter *adapter) { might_sleep(); while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) msleep(1); e1000e_down(adapter); e1000e_up(adapter); clear_bit(__E1000_RESETTING, &adapter->state); } /** * e1000_sw_init - Initialize general software structures (struct e1000_adapter) * @adapter: board private structure to initialize * * e1000_sw_init initializes the Adapter private data structure. * Fields are initialized based on PCI device information and * OS network device settings (MTU size). **/ static int __devinit e1000_sw_init(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; adapter->rx_ps_bsize0 = 128; adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; adapter->tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL); if (!adapter->tx_ring) goto err; adapter->rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL); if (!adapter->rx_ring) goto err; spin_lock_init(&adapter->tx_queue_lock); /* Explicitly disable IRQ since the NIC can be in any state. */ e1000_irq_disable(adapter); spin_lock_init(&adapter->stats_lock); set_bit(__E1000_DOWN, &adapter->state); return 0; err: ndev_err(netdev, "Unable to allocate memory for queues\n"); kfree(adapter->rx_ring); kfree(adapter->tx_ring); return -ENOMEM; } /** * e1000_open - Called when a network interface is made active * @netdev: network interface device structure * * Returns 0 on success, negative value on failure * * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready. **/ static int e1000_open(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int err; /* disallow open during test */ if (test_bit(__E1000_TESTING, &adapter->state)) return -EBUSY; /* allocate transmit descriptors */ err = e1000e_setup_tx_resources(adapter); if (err) goto err_setup_tx; /* allocate receive descriptors */ err = e1000e_setup_rx_resources(adapter); if (err) goto err_setup_rx; e1000e_power_up_phy(adapter); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN)) e1000_update_mng_vlan(adapter); /* * If AMT is enabled, let the firmware know that the network * interface is now open */ if ((adapter->flags & FLAG_HAS_AMT) && e1000e_check_mng_mode(&adapter->hw)) e1000_get_hw_control(adapter); /* * before we allocate an interrupt, we must be ready to handle it. * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt * as soon as we call pci_request_irq, so we have to setup our * clean_rx handler before we do so. */ e1000_configure(adapter); err = e1000_request_irq(adapter); if (err) goto err_req_irq; /* From here on the code is the same as e1000e_up() */ clear_bit(__E1000_DOWN, &adapter->state); napi_enable(&adapter->napi); e1000_irq_enable(adapter); netif_tx_start_all_queues(netdev); /* fire a link status change interrupt to start the watchdog */ ew32(ICS, E1000_ICS_LSC); return 0; err_req_irq: e1000_release_hw_control(adapter); e1000_power_down_phy(adapter); e1000e_free_rx_resources(adapter); err_setup_rx: e1000e_free_tx_resources(adapter); err_setup_tx: e1000e_reset(adapter); return err; } /** * e1000_close - Disables a network interface * @netdev: network interface device structure * * Returns 0, this is not allowed to fail * * The close entry point is called when an interface is de-activated * by the OS. The hardware is still under the drivers control, but * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed. **/ static int e1000_close(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); e1000e_down(adapter); e1000_power_down_phy(adapter); e1000_free_irq(adapter); e1000e_free_tx_resources(adapter); e1000e_free_rx_resources(adapter); /* * kill manageability vlan ID if supported, but not if a vlan with * the same ID is registered on the host OS (let 8021q kill it) */ if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && !(adapter->vlgrp && vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id))) e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); /* * If AMT is enabled, let the firmware know that the network * interface is now closed */ if ((adapter->flags & FLAG_HAS_AMT) && e1000e_check_mng_mode(&adapter->hw)) e1000_release_hw_control(adapter); return 0; } /** * e1000_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure * @p: pointer to an address structure * * Returns 0 on success, negative on failure **/ static int e1000_set_mac(struct net_device *netdev, void *p) { struct e1000_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len); e1000e_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); if (adapter->flags & FLAG_RESET_OVERWRITES_LAA) { /* activate the work around */ e1000e_set_laa_state_82571(&adapter->hw, 1); /* * Hold a copy of the LAA in RAR[14] This is done so that * between the time RAR[0] gets clobbered and the time it * gets fixed (in e1000_watchdog), the actual LAA is in one * of the RARs and no incoming packets directed to this port * are dropped. Eventually the LAA will be in RAR[0] and * RAR[14] */ e1000e_rar_set(&adapter->hw, adapter->hw.mac.addr, adapter->hw.mac.rar_entry_count - 1); } return 0; } /* * Need to wait a few seconds after link up to get diagnostic information from * the phy */ static void e1000_update_phy_info(unsigned long data) { struct e1000_adapter *adapter = (struct e1000_adapter *) data; e1000_get_phy_info(&adapter->hw); } /** * e1000e_update_stats - Update the board statistics counters * @adapter: board private structure **/ void e1000e_update_stats(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; unsigned long irq_flags; u16 phy_tmp; #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF /* * Prevent stats update while adapter is being reset, or if the pci * connection is down. */ if (adapter->link_speed == 0) return; if (pci_channel_offline(pdev)) return; spin_lock_irqsave(&adapter->stats_lock, irq_flags); /* * these counters are modified from e1000_adjust_tbi_stats, * called from the interrupt context, so they must only * be written while holding adapter->stats_lock */ adapter->stats.crcerrs += er32(CRCERRS); adapter->stats.gprc += er32(GPRC); adapter->stats.gorc += er32(GORCL); er32(GORCH); /* Clear gorc */ adapter->stats.bprc += er32(BPRC); adapter->stats.mprc += er32(MPRC); adapter->stats.roc += er32(ROC); adapter->stats.mpc += er32(MPC); adapter->stats.scc += er32(SCC); adapter->stats.ecol += er32(ECOL); adapter->stats.mcc += er32(MCC); adapter->stats.latecol += er32(LATECOL); adapter->stats.dc += er32(DC); adapter->stats.xonrxc += er32(XONRXC); adapter->stats.xontxc += er32(XONTXC); adapter->stats.xoffrxc += er32(XOFFRXC); adapter->stats.xofftxc += er32(XOFFTXC); adapter->stats.gptc += er32(GPTC); adapter->stats.gotc += er32(GOTCL); er32(GOTCH); /* Clear gotc */ adapter->stats.rnbc += er32(RNBC); adapter->stats.ruc += er32(RUC); adapter->stats.mptc += er32(MPTC); adapter->stats.bptc += er32(BPTC); /* used for adaptive IFS */ hw->mac.tx_packet_delta = er32(TPT); adapter->stats.tpt += hw->mac.tx_packet_delta; hw->mac.collision_delta = er32(COLC); adapter->stats.colc += hw->mac.collision_delta; adapter->stats.algnerrc += er32(ALGNERRC); adapter->stats.rxerrc += er32(RXERRC); adapter->stats.tncrs += er32(TNCRS); adapter->stats.cexterr += er32(CEXTERR); adapter->stats.tsctc += er32(TSCTC); adapter->stats.tsctfc += er32(TSCTFC); /* Fill out the OS statistics structure */ adapter->net_stats.multicast = adapter->stats.mprc; adapter->net_stats.collisions = adapter->stats.colc; /* Rx Errors */ /* * RLEC on some newer hardware can be incorrect so build * our own version based on RUC and ROC */ adapter->net_stats.rx_errors = adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.cexterr; adapter->net_stats.rx_length_errors = adapter->stats.ruc + adapter->stats.roc; adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs; adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc; adapter->net_stats.rx_missed_errors = adapter->stats.mpc; /* Tx Errors */ adapter->net_stats.tx_errors = adapter->stats.ecol + adapter->stats.latecol; adapter->net_stats.tx_aborted_errors = adapter->stats.ecol; adapter->net_stats.tx_window_errors = adapter->stats.latecol; adapter->net_stats.tx_carrier_errors = adapter->stats.tncrs; /* Tx Dropped needs to be maintained elsewhere */ /* Phy Stats */ if (hw->phy.media_type == e1000_media_type_copper) { if ((adapter->link_speed == SPEED_1000) && (!e1e_rphy(hw, PHY_1000T_STATUS, &phy_tmp))) { phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; adapter->phy_stats.idle_errors += phy_tmp; } } /* Management Stats */ adapter->stats.mgptc += er32(MGTPTC); adapter->stats.mgprc += er32(MGTPRC); adapter->stats.mgpdc += er32(MGTPDC); spin_unlock_irqrestore(&adapter->stats_lock, irq_flags); } /** * e1000_phy_read_status - Update the PHY register status snapshot * @adapter: board private structure **/ static void e1000_phy_read_status(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_phy_regs *phy = &adapter->phy_regs; int ret_val; unsigned long irq_flags; spin_lock_irqsave(&adapter->stats_lock, irq_flags); if ((er32(STATUS) & E1000_STATUS_LU) && (adapter->hw.phy.media_type == e1000_media_type_copper)) { ret_val = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr); ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr); ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise); ret_val |= e1e_rphy(hw, PHY_LP_ABILITY, &phy->lpa); ret_val |= e1e_rphy(hw, PHY_AUTONEG_EXP, &phy->expansion); ret_val |= e1e_rphy(hw, PHY_1000T_CTRL, &phy->ctrl1000); ret_val |= e1e_rphy(hw, PHY_1000T_STATUS, &phy->stat1000); ret_val |= e1e_rphy(hw, PHY_EXT_STATUS, &phy->estatus); if (ret_val) ndev_warn(adapter->netdev, "Error reading PHY register\n"); } else { /* * Do not read PHY registers if link is not up * Set values to typical power-on defaults */ phy->bmcr = (BMCR_SPEED1000 | BMCR_ANENABLE | BMCR_FULLDPLX); phy->bmsr = (BMSR_100FULL | BMSR_100HALF | BMSR_10FULL | BMSR_10HALF | BMSR_ESTATEN | BMSR_ANEGCAPABLE | BMSR_ERCAP); phy->advertise = (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP | ADVERTISE_ALL | ADVERTISE_CSMA); phy->lpa = 0; phy->expansion = EXPANSION_ENABLENPAGE; phy->ctrl1000 = ADVERTISE_1000FULL; phy->stat1000 = 0; phy->estatus = (ESTATUS_1000_TFULL | ESTATUS_1000_THALF); } spin_unlock_irqrestore(&adapter->stats_lock, irq_flags); } static void e1000_print_link_info(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; u32 ctrl = er32(CTRL); ndev_info(netdev, "Link is Up %d Mbps %s, Flow Control: %s\n", adapter->link_speed, (adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex", ((ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl & E1000_CTRL_RFCE) ? "RX" : ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None" ))); } static bool e1000_has_link(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; bool link_active = 0; s32 ret_val = 0; /* * get_link_status is set on LSC (link status) interrupt or * Rx sequence error interrupt. get_link_status will stay * false until the check_for_link establishes link * for copper adapters ONLY */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { ret_val = hw->mac.ops.check_for_link(hw); link_active = !hw->mac.get_link_status; } else { link_active = 1; } break; case e1000_media_type_fiber: ret_val = hw->mac.ops.check_for_link(hw); link_active = !!(er32(STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: ret_val = hw->mac.ops.check_for_link(hw); link_active = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { /* See e1000_kmrn_lock_loss_workaround_ich8lan() */ ndev_info(adapter->netdev, "Gigabit has been disabled, downgrading speed\n"); } return link_active; } static void e1000e_enable_receives(struct e1000_adapter *adapter) { /* make sure the receive unit is started */ if ((adapter->flags & FLAG_RX_NEEDS_RESTART) && (adapter->flags & FLAG_RX_RESTART_NOW)) { struct e1000_hw *hw = &adapter->hw; u32 rctl = er32(RCTL); ew32(RCTL, rctl | E1000_RCTL_EN); adapter->flags &= ~FLAG_RX_RESTART_NOW; } } /** * e1000_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ static void e1000_watchdog(unsigned long data) { struct e1000_adapter *adapter = (struct e1000_adapter *) data; /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); /* TODO: make this use queue_delayed_work() */ } static void e1000_watchdog_task(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, watchdog_task); struct net_device *netdev = adapter->netdev; struct e1000_mac_info *mac = &adapter->hw.mac; struct e1000_ring *tx_ring = adapter->tx_ring; struct e1000_hw *hw = &adapter->hw; u32 link, tctl; int tx_pending = 0; link = e1000_has_link(adapter); if ((netif_carrier_ok(netdev)) && link) { e1000e_enable_receives(adapter); goto link_up; } if ((e1000e_enable_tx_pkt_filtering(hw)) && (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id)) e1000_update_mng_vlan(adapter); if (link) { if (!netif_carrier_ok(netdev)) { bool txb2b = 1; /* update snapshot of PHY registers on LSC */ e1000_phy_read_status(adapter); mac->ops.get_link_up_info(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); e1000_print_link_info(adapter); /* * tweak tx_queue_len according to speed/duplex * and adjust the timeout factor */ netdev->tx_queue_len = adapter->tx_queue_len; adapter->tx_timeout_factor = 1; switch (adapter->link_speed) { case SPEED_10: txb2b = 0; netdev->tx_queue_len = 10; adapter->tx_timeout_factor = 14; break; case SPEED_100: txb2b = 0; netdev->tx_queue_len = 100; /* maybe add some timeout factor ? */ break; } /* * workaround: re-program speed mode bit after * link-up event */ if ((adapter->flags & FLAG_TARC_SPEED_MODE_BIT) && !txb2b) { u32 tarc0; tarc0 = er32(TARC(0)); tarc0 &= ~SPEED_MODE_BIT; ew32(TARC(0), tarc0); } /* * disable TSO for pcie and 10/100 speeds, to avoid * some hardware issues */ if (!(adapter->flags & FLAG_TSO_FORCE)) { switch (adapter->link_speed) { case SPEED_10: case SPEED_100: ndev_info(netdev, "10/100 speed: disabling TSO\n"); netdev->features &= ~NETIF_F_TSO; netdev->features &= ~NETIF_F_TSO6; break; case SPEED_1000: netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; break; default: /* oops */ break; } } /* * enable transmits in the hardware, need to do this * after setting TARC(0) */ tctl = er32(TCTL); tctl |= E1000_TCTL_EN; ew32(TCTL, tctl); netif_carrier_on(netdev); netif_tx_wake_all_queues(netdev); if (!test_bit(__E1000_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); } } else { if (netif_carrier_ok(netdev)) { adapter->link_speed = 0; adapter->link_duplex = 0; ndev_info(netdev, "Link is Down\n"); netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); if (!test_bit(__E1000_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); if (adapter->flags & FLAG_RX_NEEDS_RESTART) schedule_work(&adapter->reset_task); } } link_up: e1000e_update_stats(adapter); mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; adapter->tpt_old = adapter->stats.tpt; mac->collision_delta = adapter->stats.colc - adapter->colc_old; adapter->colc_old = adapter->stats.colc; adapter->gorc = adapter->stats.gorc - adapter->gorc_old; adapter->gorc_old = adapter->stats.gorc; adapter->gotc = adapter->stats.gotc - adapter->gotc_old; adapter->gotc_old = adapter->stats.gotc; e1000e_update_adaptive(&adapter->hw); if (!netif_carrier_ok(netdev)) { tx_pending = (e1000_desc_unused(tx_ring) + 1 < tx_ring->count); if (tx_pending) { /* * We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); } } /* Cause software interrupt to ensure Rx ring is cleaned */ ew32(ICS, E1000_ICS_RXDMT0); /* Force detection of hung controller every watchdog period */ adapter->detect_tx_hung = 1; /* * With 82571 controllers, LAA may be overwritten due to controller * reset from the other port. Set the appropriate LAA in RAR[0] */ if (e1000e_get_laa_state_82571(hw)) e1000e_rar_set(hw, adapter->hw.mac.addr, 0); /* Reset the timer */ if (!test_bit(__E1000_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, round_jiffies(jiffies + 2 * HZ)); } #define E1000_TX_FLAGS_CSUM 0x00000001 #define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_TSO 0x00000004 #define E1000_TX_FLAGS_IPV4 0x00000008 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 static int e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb) { struct e1000_ring *tx_ring = adapter->tx_ring; struct e1000_context_desc *context_desc; struct e1000_buffer *buffer_info; unsigned int i; u32 cmd_length = 0; u16 ipcse = 0, tucse, mss; u8 ipcss, ipcso, tucss, tucso, hdr_len; int err; if (skb_is_gso(skb)) { if (skb_header_cloned(skb)) { err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (err) return err; } hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); mss = skb_shinfo(skb)->gso_size; if (skb->protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, IPPROTO_TCP, 0); cmd_length = E1000_TXD_CMD_IP; ipcse = skb_transport_offset(skb) - 1; } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) { ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); ipcse = 0; } ipcss = skb_network_offset(skb); ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data; tucss = skb_transport_offset(skb); tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; tucse = 0; cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | E1000_TXD_CMD_TCP | (skb->len - (hdr_len))); i = tx_ring->next_to_use; context_desc = E1000_CONTEXT_DESC(*tx_ring, i); buffer_info = &tx_ring->buffer_info[i]; context_desc->lower_setup.ip_fields.ipcss = ipcss; context_desc->lower_setup.ip_fields.ipcso = ipcso; context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse); context_desc->upper_setup.tcp_fields.tucss = tucss; context_desc->upper_setup.tcp_fields.tucso = tucso; context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse); context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss); context_desc->tcp_seg_setup.fields.hdr_len = hdr_len; context_desc->cmd_and_length = cpu_to_le32(cmd_length); buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; i++; if (i == tx_ring->count) i = 0; tx_ring->next_to_use = i; return 1; } return 0; } static bool e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb) { struct e1000_ring *tx_ring = adapter->tx_ring; struct e1000_context_desc *context_desc; struct e1000_buffer *buffer_info; unsigned int i; u8 css; if (skb->ip_summed == CHECKSUM_PARTIAL) { css = skb_transport_offset(skb); i = tx_ring->next_to_use; buffer_info = &tx_ring->buffer_info[i]; context_desc = E1000_CONTEXT_DESC(*tx_ring, i); context_desc->lower_setup.ip_config = 0; context_desc->upper_setup.tcp_fields.tucss = css; context_desc->upper_setup.tcp_fields.tucso = css + skb->csum_offset; context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.data = 0; context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT); buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; i++; if (i == tx_ring->count) i = 0; tx_ring->next_to_use = i; return 1; } return 0; } #define E1000_MAX_PER_TXD 8192 #define E1000_MAX_TXD_PWR 12 static int e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb, unsigned int first, unsigned int max_per_txd, unsigned int nr_frags, unsigned int mss) { struct e1000_ring *tx_ring = adapter->tx_ring; struct e1000_buffer *buffer_info; unsigned int len = skb->len - skb->data_len; unsigned int offset = 0, size, count = 0, i; unsigned int f; i = tx_ring->next_to_use; while (len) { buffer_info = &tx_ring->buffer_info[i]; size = min(len, max_per_txd); /* Workaround for premature desc write-backs * in TSO mode. Append 4-byte sentinel desc */ if (mss && !nr_frags && size == len && size > 8) size -= 4; buffer_info->length = size; /* set time_stamp *before* dma to help avoid a possible race */ buffer_info->time_stamp = jiffies; buffer_info->dma = pci_map_single(adapter->pdev, skb->data + offset, size, PCI_DMA_TODEVICE); if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) { dev_err(&adapter->pdev->dev, "TX DMA map failed\n"); adapter->tx_dma_failed++; return -1; } buffer_info->next_to_watch = i; len -= size; offset += size; count++; i++; if (i == tx_ring->count) i = 0; } for (f = 0; f < nr_frags; f++) { struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[f]; len = frag->size; offset = frag->page_offset; while (len) { buffer_info = &tx_ring->buffer_info[i]; size = min(len, max_per_txd); /* Workaround for premature desc write-backs * in TSO mode. Append 4-byte sentinel desc */ if (mss && f == (nr_frags-1) && size == len && size > 8) size -= 4; buffer_info->length = size; buffer_info->time_stamp = jiffies; buffer_info->dma = pci_map_page(adapter->pdev, frag->page, offset, size, PCI_DMA_TODEVICE); if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) { dev_err(&adapter->pdev->dev, "TX DMA page map failed\n"); adapter->tx_dma_failed++; return -1; } buffer_info->next_to_watch = i; len -= size; offset += size; count++; i++; if (i == tx_ring->count) i = 0; } } if (i == 0) i = tx_ring->count - 1; else i--; tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[first].next_to_watch = i; return count; } static void e1000_tx_queue(struct e1000_adapter *adapter, int tx_flags, int count) { struct e1000_ring *tx_ring = adapter->tx_ring; struct e1000_tx_desc *tx_desc = NULL; struct e1000_buffer *buffer_info; u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; unsigned int i; if (tx_flags & E1000_TX_FLAGS_TSO) { txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | E1000_TXD_CMD_TSE; txd_upper |= E1000_TXD_POPTS_TXSM << 8; if (tx_flags & E1000_TX_FLAGS_IPV4) txd_upper |= E1000_TXD_POPTS_IXSM << 8; } if (tx_flags & E1000_TX_FLAGS_CSUM) { txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; txd_upper |= E1000_TXD_POPTS_TXSM << 8; } if (tx_flags & E1000_TX_FLAGS_VLAN) { txd_lower |= E1000_TXD_CMD_VLE; txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); } i = tx_ring->next_to_use; while (count--) { buffer_info = &tx_ring->buffer_info[i]; tx_desc = E1000_TX_DESC(*tx_ring, i); tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); tx_desc->lower.data = cpu_to_le32(txd_lower | buffer_info->length); tx_desc->upper.data = cpu_to_le32(txd_upper); i++; if (i == tx_ring->count) i = 0; } tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); /* * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); tx_ring->next_to_use = i; writel(i, adapter->hw.hw_addr + tx_ring->tail); /* * we need this if more than one processor can write to our tail * at a time, it synchronizes IO on IA64/Altix systems */ mmiowb(); } #define MINIMUM_DHCP_PACKET_SIZE 282 static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct sk_buff *skb) { struct e1000_hw *hw = &adapter->hw; u16 length, offset; if (vlan_tx_tag_present(skb)) { if (!((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) && (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN))) return 0; } if (skb->len <= MINIMUM_DHCP_PACKET_SIZE) return 0; if (((struct ethhdr *) skb->data)->h_proto != htons(ETH_P_IP)) return 0; { const struct iphdr *ip = (struct iphdr *)((u8 *)skb->data+14); struct udphdr *udp; if (ip->protocol != IPPROTO_UDP) return 0; udp = (struct udphdr *)((u8 *)ip + (ip->ihl << 2)); if (ntohs(udp->dest) != 67) return 0; offset = (u8 *)udp + 8 - skb->data; length = skb->len - offset; return e1000e_mng_write_dhcp_info(hw, (u8 *)udp + 8, length); } return 0; } static int __e1000_maybe_stop_tx(struct net_device *netdev, int size) { struct e1000_adapter *adapter = netdev_priv(netdev); netif_stop_queue(netdev); /* * Herbert's original patch had: * smp_mb__after_netif_stop_queue(); * but since that doesn't exist yet, just open code it. */ smp_mb(); /* * We need to check again in a case another CPU has just * made room available. */ if (e1000_desc_unused(adapter->tx_ring) < size) return -EBUSY; /* A reprieve! */ netif_start_queue(netdev); ++adapter->restart_queue; return 0; } static int e1000_maybe_stop_tx(struct net_device *netdev, int size) { struct e1000_adapter *adapter = netdev_priv(netdev); if (e1000_desc_unused(adapter->tx_ring) >= size) return 0; return __e1000_maybe_stop_tx(netdev, size); } #define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_ring *tx_ring = adapter->tx_ring; unsigned int first; unsigned int max_per_txd = E1000_MAX_PER_TXD; unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; unsigned int tx_flags = 0; unsigned int len = skb->len - skb->data_len; unsigned long irq_flags; unsigned int nr_frags; unsigned int mss; int count = 0; int tso; unsigned int f; if (test_bit(__E1000_DOWN, &adapter->state)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } if (skb->len <= 0) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } mss = skb_shinfo(skb)->gso_size; /* * The controller does a simple calculation to * make sure there is enough room in the FIFO before * initiating the DMA for each buffer. The calc is: * 4 = ceil(buffer len/mss). To make sure we don't * overrun the FIFO, adjust the max buffer len if mss * drops. */ if (mss) { u8 hdr_len; max_per_txd = min(mss << 2, max_per_txd); max_txd_pwr = fls(max_per_txd) - 1; /* * TSO Workaround for 82571/2/3 Controllers -- if skb->data * points to just header, pull a few bytes of payload from * frags into skb->data */ hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); /* * we do this workaround for ES2LAN, but it is un-necessary, * avoiding it could save a lot of cycles */ if (skb->data_len && (hdr_len == len)) { unsigned int pull_size; pull_size = min((unsigned int)4, skb->data_len); if (!__pskb_pull_tail(skb, pull_size)) { ndev_err(netdev, "__pskb_pull_tail failed.\n"); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } len = skb->len - skb->data_len; } } /* reserve a descriptor for the offload context */ if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) count++; count++; count += TXD_USE_COUNT(len, max_txd_pwr); nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, max_txd_pwr); if (adapter->hw.mac.tx_pkt_filtering) e1000_transfer_dhcp_info(adapter, skb); if (!spin_trylock_irqsave(&adapter->tx_queue_lock, irq_flags)) /* Collision - tell upper layer to requeue */ return NETDEV_TX_LOCKED; /* * need: count + 2 desc gap to keep tail from touching * head, otherwise try next time */ if (e1000_maybe_stop_tx(netdev, count + 2)) { spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags); return NETDEV_TX_BUSY; } if (adapter->vlgrp && vlan_tx_tag_present(skb)) { tx_flags |= E1000_TX_FLAGS_VLAN; tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT); } first = tx_ring->next_to_use; tso = e1000_tso(adapter, skb); if (tso < 0) { dev_kfree_skb_any(skb); spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags); return NETDEV_TX_OK; } if (tso) tx_flags |= E1000_TX_FLAGS_TSO; else if (e1000_tx_csum(adapter, skb)) tx_flags |= E1000_TX_FLAGS_CSUM; /* * Old method was to assume IPv4 packet by default if TSO was enabled. * 82571 hardware supports TSO capabilities for IPv6 as well... * no longer assume, we must. */ if (skb->protocol == htons(ETH_P_IP)) tx_flags |= E1000_TX_FLAGS_IPV4; count = e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss); if (count < 0) { /* handle pci_map_single() error in e1000_tx_map */ dev_kfree_skb_any(skb); spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags); return NETDEV_TX_OK; } e1000_tx_queue(adapter, tx_flags, count); netdev->trans_start = jiffies; /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 2); spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags); return NETDEV_TX_OK; } /** * e1000_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ static void e1000_tx_timeout(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); } static void e1000_reset_task(struct work_struct *work) { struct e1000_adapter *adapter; adapter = container_of(work, struct e1000_adapter, reset_task); e1000e_reinit_locked(adapter); } /** * e1000_get_stats - Get System Network Statistics * @netdev: network interface device structure * * Returns the address of the device statistics structure. * The statistics are actually updated from the timer callback. **/ static struct net_device_stats *e1000_get_stats(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); /* only return the current stats */ return &adapter->net_stats; } /** * e1000_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size * * Returns 0 on success, negative on failure **/ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { ndev_err(netdev, "Invalid MTU setting\n"); return -EINVAL; } /* Jumbo frame size limits */ if (max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) { if (!(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { ndev_err(netdev, "Jumbo Frames not supported.\n"); return -EINVAL; } if (adapter->hw.phy.type == e1000_phy_ife) { ndev_err(netdev, "Jumbo Frames not supported.\n"); return -EINVAL; } } #define MAX_STD_JUMBO_FRAME_SIZE 9234 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { ndev_err(netdev, "MTU > 9216 not supported.\n"); return -EINVAL; } while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) msleep(1); /* e1000e_down has a dependency on max_frame_size */ adapter->max_frame_size = max_frame; if (netif_running(netdev)) e1000e_down(adapter); /* * NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN * means we reserve 2 more, this pushes us to allocate from the next * larger slab size. * i.e. RXBUFFER_2048 --> size-4096 slab * However with the new *_jumbo_rx* routines, jumbo receives will use * fragmented skbs */ if (max_frame <= 256) adapter->rx_buffer_len = 256; else if (max_frame <= 512) adapter->rx_buffer_len = 512; else if (max_frame <= 1024) adapter->rx_buffer_len = 1024; else if (max_frame <= 2048) adapter->rx_buffer_len = 2048; else adapter->rx_buffer_len = 4096; /* adjust allocation if LPE protects us, and we aren't using SBP */ if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; ndev_info(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) e1000e_up(adapter); else e1000e_reset(adapter); clear_bit(__E1000_RESETTING, &adapter->state); return 0; } static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct e1000_adapter *adapter = netdev_priv(netdev); struct mii_ioctl_data *data = if_mii(ifr); if (adapter->hw.phy.media_type != e1000_media_type_copper) return -EOPNOTSUPP; switch (cmd) { case SIOCGMIIPHY: data->phy_id = adapter->hw.phy.addr; break; case SIOCGMIIREG: if (!capable(CAP_NET_ADMIN)) return -EPERM; switch (data->reg_num & 0x1F) { case MII_BMCR: data->val_out = adapter->phy_regs.bmcr; break; case MII_BMSR: data->val_out = adapter->phy_regs.bmsr; break; case MII_PHYSID1: data->val_out = (adapter->hw.phy.id >> 16); break; case MII_PHYSID2: data->val_out = (adapter->hw.phy.id & 0xFFFF); break; case MII_ADVERTISE: data->val_out = adapter->phy_regs.advertise; break; case MII_LPA: data->val_out = adapter->phy_regs.lpa; break; case MII_EXPANSION: data->val_out = adapter->phy_regs.expansion; break; case MII_CTRL1000: data->val_out = adapter->phy_regs.ctrl1000; break; case MII_STAT1000: data->val_out = adapter->phy_regs.stat1000; break; case MII_ESTATUS: data->val_out = adapter->phy_regs.estatus; break; default: return -EIO; } break; case SIOCSMIIREG: default: return -EOPNOTSUPP; } return 0; } static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { switch (cmd) { case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: return e1000_mii_ioctl(netdev, ifr, cmd); default: return -EOPNOTSUPP; } } static int e1000_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 ctrl, ctrl_ext, rctl, status; u32 wufc = adapter->wol; int retval = 0; netif_device_detach(netdev); if (netif_running(netdev)) { WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); e1000e_down(adapter); e1000_free_irq(adapter); } retval = pci_save_state(pdev); if (retval) return retval; status = er32(STATUS); if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; if (wufc) { e1000_setup_rctl(adapter); e1000_set_multi(netdev); /* turn on all-multi mode if wake on multicast is enabled */ if (wufc & E1000_WUFC_MC) { rctl = er32(RCTL); rctl |= E1000_RCTL_MPE; ew32(RCTL, rctl); } ctrl = er32(CTRL); /* advertise wake from D3Cold */ #define E1000_CTRL_ADVD3WUC 0x00100000 /* phy power management enable */ #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ctrl |= E1000_CTRL_ADVD3WUC | E1000_CTRL_EN_PHY_PWR_MGMT; ew32(CTRL, ctrl); if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { /* keep the laser running in D3 */ ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP7_DATA; ew32(CTRL_EXT, ctrl_ext); } if (adapter->flags & FLAG_IS_ICH) e1000e_disable_gig_wol_ich8lan(&adapter->hw); /* Allow time for pending master requests to run */ e1000e_disable_pcie_master(&adapter->hw); ew32(WUC, E1000_WUC_PME_EN); ew32(WUFC, wufc); pci_enable_wake(pdev, PCI_D3hot, 1); pci_enable_wake(pdev, PCI_D3cold, 1); } else { ew32(WUC, 0); ew32(WUFC, 0); pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); } /* make sure adapter isn't asleep if manageability is enabled */ if (adapter->flags & FLAG_MNG_PT_ENABLED) { pci_enable_wake(pdev, PCI_D3hot, 1); pci_enable_wake(pdev, PCI_D3cold, 1); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); /* * Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ e1000_release_hw_control(adapter); pci_disable_device(pdev); pci_set_power_state(pdev, pci_choose_state(pdev, state)); return 0; } static void e1000e_disable_l1aspm(struct pci_dev *pdev) { int pos; u16 val; /* * 82573 workaround - disable L1 ASPM on mobile chipsets * * L1 ASPM on various mobile (ich7) chipsets do not behave properly * resulting in lost data or garbage information on the pci-e link * level. This could result in (false) bad EEPROM checksum errors, * long ping times (up to 2s) or even a system freeze/hang. * * Unfortunately this feature saves about 1W power consumption when * active. */ pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &val); if (val & 0x2) { dev_warn(&pdev->dev, "Disabling L1 ASPM\n"); val &= ~0x2; pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, val); } } #ifdef CONFIG_PM static int e1000_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 err; pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); e1000e_disable_l1aspm(pdev); if (adapter->need_ioport) err = pci_enable_device(pdev); else err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); return err; } pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); if (netif_running(netdev)) { err = e1000_request_irq(adapter); if (err) return err; } e1000e_power_up_phy(adapter); e1000e_reset(adapter); ew32(WUS, ~0); e1000_init_manageability(adapter); if (netif_running(netdev)) e1000e_up(adapter); netif_device_attach(netdev); /* * If the controller has AMT, do not set DRV_LOAD until the interface * is up. For all other cases, let the f/w know that the h/w is now * under the control of the driver. */ if (!(adapter->flags & FLAG_HAS_AMT) || !e1000e_check_mng_mode(&adapter->hw)) e1000_get_hw_control(adapter); return 0; } #endif static void e1000_shutdown(struct pci_dev *pdev) { e1000_suspend(pdev, PMSG_SUSPEND); } #ifdef CONFIG_NET_POLL_CONTROLLER /* * Polling 'interrupt' - used by things like netconsole to send skbs * without having to re-enable interrupts. It's not called while * the interrupt routine is executing. */ static void e1000_netpoll(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); disable_irq(adapter->pdev->irq); e1000_intr(adapter->pdev->irq, netdev); enable_irq(adapter->pdev->irq); } #endif /** * e1000_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device * @state: The current pci connection state * * This function is called after a PCI bus error affecting * this device has been detected. */ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); netif_device_detach(netdev); if (netif_running(netdev)) e1000e_down(adapter); pci_disable_device(pdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; } /** * e1000_io_slot_reset - called after the pci bus has been reset. * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation * resembles the first-half of the e1000_resume routine. */ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int err; e1000e_disable_l1aspm(pdev); if (adapter->need_ioport) err = pci_enable_device(pdev); else err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } pci_set_master(pdev); pci_restore_state(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); e1000e_reset(adapter); ew32(WUS, ~0); return PCI_ERS_RESULT_RECOVERED; } /** * e1000_io_resume - called when traffic can start flowing again. * @pdev: Pointer to PCI device * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the * second-half of the e1000_resume routine. */ static void e1000_io_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); e1000_init_manageability(adapter); if (netif_running(netdev)) { if (e1000e_up(adapter)) { dev_err(&pdev->dev, "can't bring device back up after reset\n"); return; } } netif_device_attach(netdev); /* * If the controller has AMT, do not set DRV_LOAD until the interface * is up. For all other cases, let the f/w know that the h/w is now * under the control of the driver. */ if (!(adapter->flags & FLAG_HAS_AMT) || !e1000e_check_mng_mode(&adapter->hw)) e1000_get_hw_control(adapter); } static void e1000_print_device_info(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; u32 pba_num; /* print bus type/speed/width info */ ndev_info(netdev, "(PCI Express:2.5GB/s:%s) " "%02x:%02x:%02x:%02x:%02x:%02x\n", /* bus width */ ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : "Width x1"), /* MAC address */ netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]); ndev_info(netdev, "Intel(R) PRO/%s Network Connection\n", (hw->phy.type == e1000_phy_ife) ? "10/100" : "1000"); e1000e_read_pba_num(hw, &pba_num); ndev_info(netdev, "MAC: %d, PHY: %d, PBA No: %06x-%03x\n", hw->mac.type, hw->phy.type, (pba_num >> 8), (pba_num & 0xff)); } /** * e1000e_is_need_ioport - determine if an adapter needs ioport resources or not * @pdev: PCI device information struct * * Returns true if an adapters needs ioport resources **/ static int e1000e_is_need_ioport(struct pci_dev *pdev) { switch (pdev->device) { /* Currently there are no adapters that need ioport resources */ default: return false; } } /** * e1000_probe - Device Initialization Routine * @pdev: PCI device information struct * @ent: entry in e1000_pci_tbl * * Returns 0 on success, negative on failure * * e1000_probe initializes an adapter identified by a pci_dev structure. * The OS initialization, configuring of the adapter private structure, * and a hardware reset occur. **/ static int __devinit e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct e1000_adapter *adapter; struct e1000_hw *hw; const struct e1000_info *ei = e1000_info_tbl[ent->driver_data]; resource_size_t mmio_start, mmio_len; resource_size_t flash_start, flash_len; static int cards_found; int i, err, pci_using_dac; u16 eeprom_data = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; int bars, need_ioport; e1000e_disable_l1aspm(pdev); /* do not allocate ioport bars when not needed */ need_ioport = e1000e_is_need_ioport(pdev); if (need_ioport) { bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO); err = pci_enable_device(pdev); } else { bars = pci_select_bars(pdev, IORESOURCE_MEM); err = pci_enable_device_mem(pdev); } if (err) return err; pci_using_dac = 0; err = pci_set_dma_mask(pdev, DMA_64BIT_MASK); if (!err) { err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); if (!err) pci_using_dac = 1; } else { err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); if (err) { err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); if (err) { dev_err(&pdev->dev, "No usable DMA " "configuration, aborting\n"); goto err_dma; } } } err = pci_request_selected_regions(pdev, bars, e1000e_driver_name); if (err) goto err_pci_reg; pci_set_master(pdev); pci_save_state(pdev); err = -ENOMEM; netdev = alloc_etherdev(sizeof(struct e1000_adapter)); if (!netdev) goto err_alloc_etherdev; SET_NETDEV_DEV(netdev, &pdev->dev); pci_set_drvdata(pdev, netdev); adapter = netdev_priv(netdev); hw = &adapter->hw; adapter->netdev = netdev; adapter->pdev = pdev; adapter->ei = ei; adapter->pba = ei->pba; adapter->flags = ei->flags; adapter->hw.adapter = adapter; adapter->hw.mac.type = ei->mac; adapter->msg_enable = (1 << NETIF_MSG_DRV | NETIF_MSG_PROBE) - 1; adapter->bars = bars; adapter->need_ioport = need_ioport; mmio_start = pci_resource_start(pdev, 0); mmio_len = pci_resource_len(pdev, 0); err = -EIO; adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); if (!adapter->hw.hw_addr) goto err_ioremap; if ((adapter->flags & FLAG_HAS_FLASH) && (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { flash_start = pci_resource_start(pdev, 1); flash_len = pci_resource_len(pdev, 1); adapter->hw.flash_address = ioremap(flash_start, flash_len); if (!adapter->hw.flash_address) goto err_flashmap; } /* construct the net_device struct */ netdev->open = &e1000_open; netdev->stop = &e1000_close; netdev->hard_start_xmit = &e1000_xmit_frame; netdev->get_stats = &e1000_get_stats; netdev->set_multicast_list = &e1000_set_multi; netdev->set_mac_address = &e1000_set_mac; netdev->change_mtu = &e1000_change_mtu; netdev->do_ioctl = &e1000_ioctl; e1000e_set_ethtool_ops(netdev); netdev->tx_timeout = &e1000_tx_timeout; netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); netdev->vlan_rx_register = e1000_vlan_rx_register; netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid; netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid; #ifdef CONFIG_NET_POLL_CONTROLLER netdev->poll_controller = e1000_netpoll; #endif strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len; adapter->bd_number = cards_found++; /* setup adapter struct */ err = e1000_sw_init(adapter); if (err) goto err_sw_init; err = -EIO; memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops)); memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); err = ei->get_variants(adapter); if (err) goto err_hw_init; hw->mac.ops.get_bus_info(&adapter->hw); adapter->hw.phy.autoneg_wait_to_complete = 0; /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = 0; adapter->hw.phy.ms_type = e1000_ms_hw_default; } if (e1000_check_reset_block(&adapter->hw)) ndev_info(netdev, "PHY reset is blocked due to SOL/IDER session.\n"); netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) netdev->features |= NETIF_F_HW_VLAN_FILTER; netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_HW_CSUM; netdev->vlan_features |= NETIF_F_SG; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; /* * We should not be using LLTX anymore, but we are still Tx faster with * it. */ netdev->features |= NETIF_F_LLTX; if (e1000e_enable_mng_pass_thru(&adapter->hw)) adapter->flags |= FLAG_MNG_PT_ENABLED; /* * before reading the NVM, reset the controller to * put the device in a known good starting state */ adapter->hw.mac.ops.reset_hw(&adapter->hw); /* * systems with ASPM and others may see the checksum fail on the first * attempt. Let's give it a few tries */ for (i = 0;; i++) { if (e1000_validate_nvm_checksum(&adapter->hw) >= 0) break; if (i == 2) { ndev_err(netdev, "The NVM Checksum Is Not Valid\n"); err = -EIO; goto err_eeprom; } } /* copy the MAC address out of the NVM */ if (e1000e_read_mac_addr(&adapter->hw)) ndev_err(netdev, "NVM Read Error while reading MAC address\n"); memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len); if (!is_valid_ether_addr(netdev->perm_addr)) { ndev_err(netdev, "Invalid MAC Address: " "%02x:%02x:%02x:%02x:%02x:%02x\n", netdev->perm_addr[0], netdev->perm_addr[1], netdev->perm_addr[2], netdev->perm_addr[3], netdev->perm_addr[4], netdev->perm_addr[5]); err = -EIO; goto err_eeprom; } init_timer(&adapter->watchdog_timer); adapter->watchdog_timer.function = &e1000_watchdog; adapter->watchdog_timer.data = (unsigned long) adapter; init_timer(&adapter->phy_info_timer); adapter->phy_info_timer.function = &e1000_update_phy_info; adapter->phy_info_timer.data = (unsigned long) adapter; INIT_WORK(&adapter->reset_task, e1000_reset_task); INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); e1000e_check_options(adapter); /* Initialize link parameters. User can change them with ethtool */ adapter->hw.mac.autoneg = 1; adapter->fc_autoneg = 1; adapter->hw.fc.original_type = e1000_fc_default; adapter->hw.fc.type = e1000_fc_default; adapter->hw.phy.autoneg_advertised = 0x2f; /* ring size defaults */ adapter->rx_ring->count = 256; adapter->tx_ring->count = 256; /* * Initial Wake on LAN setting - If APM wake is enabled in * the EEPROM, enable the ACPI Magic Packet filter */ if (adapter->flags & FLAG_APME_IN_WUC) { /* APME bit in EEPROM is mapped to WUC.APME */ eeprom_data = er32(WUC); eeprom_apme_mask = E1000_WUC_APME; } else if (adapter->flags & FLAG_APME_IN_CTRL3) { if (adapter->flags & FLAG_APME_CHECK_PORT_B && (adapter->hw.bus.func == 1)) e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); } /* fetch WoL from EEPROM */ if (eeprom_data & eeprom_apme_mask) adapter->eeprom_wol |= E1000_WUFC_MAG; /* * now that we have the eeprom settings, apply the special cases * where the eeprom may be wrong or the board simply won't support * wake on lan on a particular port */ if (!(adapter->flags & FLAG_HAS_WOL)) adapter->eeprom_wol = 0; /* initialize the wol settings based on the eeprom settings */ adapter->wol = adapter->eeprom_wol; /* reset the hardware with the new settings */ e1000e_reset(adapter); /* * If the controller has AMT, do not set DRV_LOAD until the interface * is up. For all other cases, let the f/w know that the h/w is now * under the control of the driver. */ if (!(adapter->flags & FLAG_HAS_AMT) || !e1000e_check_mng_mode(&adapter->hw)) e1000_get_hw_control(adapter); /* tell the stack to leave us alone until e1000_open() is called */ netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); strcpy(netdev->name, "eth%d"); err = register_netdev(netdev); if (err) goto err_register; e1000_print_device_info(adapter); return 0; err_register: err_hw_init: e1000_release_hw_control(adapter); err_eeprom: if (!e1000_check_reset_block(&adapter->hw)) e1000_phy_hw_reset(&adapter->hw); if (adapter->hw.flash_address) iounmap(adapter->hw.flash_address); err_flashmap: kfree(adapter->tx_ring); kfree(adapter->rx_ring); err_sw_init: iounmap(adapter->hw.hw_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, bars); err_pci_reg: err_dma: pci_disable_device(pdev); return err; } /** * e1000_remove - Device Removal Routine * @pdev: PCI device information struct * * e1000_remove is called by the PCI subsystem to alert the driver * that it should release a PCI device. The could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory. **/ static void __devexit e1000_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); /* * flush_scheduled work may reschedule our watchdog task, so * explicitly disable watchdog tasks from being rescheduled */ set_bit(__E1000_DOWN, &adapter->state); del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); flush_scheduled_work(); /* * Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ e1000_release_hw_control(adapter); unregister_netdev(netdev); if (!e1000_check_reset_block(&adapter->hw)) e1000_phy_hw_reset(&adapter->hw); kfree(adapter->tx_ring); kfree(adapter->rx_ring); iounmap(adapter->hw.hw_addr); if (adapter->hw.flash_address) iounmap(adapter->hw.flash_address); pci_release_selected_regions(pdev, adapter->bars); free_netdev(netdev); pci_disable_device(pdev); } /* PCI Error Recovery (ERS) */ static struct pci_error_handlers e1000_err_handler = { .error_detected = e1000_io_error_detected, .slot_reset = e1000_io_slot_reset, .resume = e1000_io_resume, }; static struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI), board_82572 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_COPPER), board_82572 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_FIBER), board_82572 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_SERDES), board_82572 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E), board_82573 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E_IAMT), board_82573 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82573L), board_82573 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_DPT), board_80003es2lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_SPT), board_80003es2lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_DPT), board_80003es2lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_SPT), board_80003es2lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE), board_ich8lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_G), board_ich8lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_GT), board_ich8lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_AMT), board_ich8lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_C), board_ich8lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M), board_ich8lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M_AMT), board_ich8lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_G), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_GT), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_AMT), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_C), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_AMT), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LM), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LF), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_V), board_ich9lan }, { } /* terminate list */ }; MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); /* PCI Device API Driver */ static struct pci_driver e1000_driver = { .name = e1000e_driver_name, .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = __devexit_p(e1000_remove), #ifdef CONFIG_PM /* Power Management Hooks */ .suspend = e1000_suspend, .resume = e1000_resume, #endif .shutdown = e1000_shutdown, .err_handler = &e1000_err_handler }; /** * e1000_init_module - Driver Registration Routine * * e1000_init_module is the first routine called when the driver is * loaded. All it does is register with the PCI subsystem. **/ static int __init e1000_init_module(void) { int ret; printk(KERN_INFO "%s: Intel(R) PRO/1000 Network Driver - %s\n", e1000e_driver_name, e1000e_driver_version); printk(KERN_INFO "%s: Copyright (c) 1999-2008 Intel Corporation.\n", e1000e_driver_name); ret = pci_register_driver(&e1000_driver); pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name, PM_QOS_DEFAULT_VALUE); return ret; } module_init(e1000_init_module); /** * e1000_exit_module - Driver Exit Cleanup Routine * * e1000_exit_module is called just before the driver is removed * from memory. **/ static void __exit e1000_exit_module(void) { pci_unregister_driver(&e1000_driver); pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name); } module_exit(e1000_exit_module); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); /* e1000_main.c */