summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-05-05 19:31:50 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-05 19:31:50 -0400
commitc60cd8c505eab52cfead464e0621ff3039c429ac (patch)
tree2539768dd0c876b63a12a8b039a301860cec9da7
parent6decd63acacb3b8de81ccc435cf9acea8b6bdfeb (diff)
parent637d3e99735102f06c3eee095d2bbebf1863ca24 (diff)
Merge branch 'cxgb4-next'
Hariprasad Shenai says: ==================== Trivial fixes and changes for SGE This patch series adds the following. Discard packet if length is greater than MTU, move sge monitor code to a new routine, add device node to ULD info, add congestion notification from SGE for ingress queue and freelists and for T5, setting up the Congestion Manager values of the new RX Ethernet Queue is done by firmware now. This patch series has been created against net-next tree and includes patches on cxgb4 driver. We have included all the maintainers of respective drivers. Kindly review the change and let us know in case of any review comments. Thanks V2: Align parenthesis for PATCH 2/6 and PATCH 5/6 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h26
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c11
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c160
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c133
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_values.h9
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/sge.c15
8 files changed, 262 insertions, 96 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 524d11098c56..1f52d9f66e41 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -328,6 +328,17 @@ struct adapter_params {
unsigned int max_ird_adapter; /* Max read depth per adapter */
};
+/* State needed to monitor the forward progress of SGE Ingress DMA activities
+ * and possible hangs.
+ */
+struct sge_idma_monitor_state {
+ unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */
+ unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */
+ unsigned int idma_state[2]; /* IDMA Hang detect state */
+ unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */
+ unsigned int idma_warn[2]; /* time to warning in HZ */
+};
+
#include "t4fw_api.h"
#define FW_VERSION(chip) ( \
@@ -630,12 +641,7 @@ struct sge {
u32 fl_align; /* response queue message alignment */
u32 fl_starve_thres; /* Free List starvation threshold */
- /* State variables for detecting an SGE Ingress DMA hang */
- unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */
- unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */
- unsigned int idma_state[2]; /* SGE IDMA Hang detect state */
- unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */
-
+ struct sge_idma_monitor_state idma_monitor;
unsigned int egr_start;
unsigned int egr_sz;
unsigned int ingr_start;
@@ -1055,7 +1061,7 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
int t4_ofld_send(struct adapter *adap, struct sk_buff *skb);
int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
struct net_device *dev, int intr_idx,
- struct sge_fl *fl, rspq_handler_t hnd);
+ struct sge_fl *fl, rspq_handler_t hnd, int cong);
int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
struct net_device *dev, struct netdev_queue *netdevq,
unsigned int iqid);
@@ -1215,6 +1221,7 @@ int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data,
u64 *parity);
int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data,
u64 *parity);
+unsigned int t4_get_mps_bg_map(struct adapter *adapter, int idx);
void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data,
@@ -1310,4 +1317,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
u32 addr, u32 val);
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
void t4_free_mem(void *addr);
+void t4_idma_monitor_init(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma);
+void t4_idma_monitor(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma,
+ int hz, int ticks);
#endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 803d91beec6f..6c781c1b8fb8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -977,7 +977,7 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
err = t4_sge_alloc_rxq(adap, &q->rspq, false,
adap->port[i / per_chan],
msi_idx, q->fl.size ? &q->fl : NULL,
- uldrx_handler);
+ uldrx_handler, 0);
if (err)
return err;
memset(&q->stats, 0, sizeof(q->stats));
@@ -1007,7 +1007,7 @@ static int setup_sge_queues(struct adapter *adap)
msi_idx = 1; /* vector 0 is for non-queue interrupts */
else {
err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
- NULL, NULL);
+ NULL, NULL, -1);
if (err)
return err;
msi_idx = -((int)s->intrq.abs_id + 1);
@@ -1027,7 +1027,7 @@ static int setup_sge_queues(struct adapter *adap)
* new/deleted queues.
*/
err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
- msi_idx, NULL, fwevtq_handler);
+ msi_idx, NULL, fwevtq_handler, -1);
if (err) {
freeout: t4_free_sge_resources(adap);
return err;
@@ -1044,7 +1044,9 @@ freeout: t4_free_sge_resources(adap);
msi_idx++;
err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
msi_idx, &q->fl,
- t4_ethrx_handler);
+ t4_ethrx_handler,
+ t4_get_mps_bg_map(adap,
+ pi->tx_chan));
if (err)
goto freeout;
q->rspq.idx = j;
@@ -2432,6 +2434,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
lli.max_ordird_qp = adap->params.max_ordird_qp;
lli.max_ird_adapter = adap->params.max_ird_adapter;
lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
+ lli.nodeid = dev_to_node(adap->pdev_dev);
handle = ulds[uld].add(&lli);
if (IS_ERR(handle)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 78ab4d406ce2..df34293f35e8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -264,6 +264,7 @@ struct cxgb4_lld_info {
unsigned int max_ordird_qp; /* Max ORD/IRD depth per RDMA QP */
unsigned int max_ird_adapter; /* Max IRD memory per adapter */
bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */
+ int nodeid; /* device numa node id */
};
struct cxgb4_uld_info {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 0d2eddab04ef..898842df38fc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -100,16 +100,6 @@
*/
#define TX_QCHECK_PERIOD (HZ / 2)
-/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate
- * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA
- * State Machines in the same state for this amount of time (in HZ) then we'll
- * issue a warning about a potential hang. We'll repeat the warning as the
- * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till
- * the situation clears. If the situation clears, we'll note that as well.
- */
-#define SGE_IDMA_WARN_THRESH (1 * HZ)
-#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD)
-
/*
* Max number of Tx descriptors to be reclaimed by the Tx timer.
*/
@@ -1130,7 +1120,6 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap,
*/
netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
{
- int len;
u32 wr_mid;
u64 cntrl, *end;
int qidx, credits;
@@ -1143,6 +1132,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
const struct skb_shared_info *ssi;
dma_addr_t addr[MAX_SKB_FRAGS + 1];
bool immediate = false;
+ int len, max_pkt_len;
#ifdef CONFIG_CHELSIO_T4_FCOE
int err;
#endif /* CONFIG_CHELSIO_T4_FCOE */
@@ -1156,6 +1146,13 @@ out_free: dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
+ /* Discard the packet if the length is greater than mtu */
+ max_pkt_len = ETH_HLEN + dev->mtu;
+ if (skb_vlan_tag_present(skb))
+ max_pkt_len += VLAN_HLEN;
+ if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+ goto out_free;
+
pi = netdev_priv(dev);
adap = pi->adapter;
qidx = skb_get_queue_mapping(skb);
@@ -2279,7 +2276,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap)
static void sge_rx_timer_cb(unsigned long data)
{
unsigned long m;
- unsigned int i, idma_same_state_cnt[2];
+ unsigned int i;
struct adapter *adap = (struct adapter *)data;
struct sge *s = &adap->sge;
@@ -2300,67 +2297,16 @@ static void sge_rx_timer_cb(unsigned long data)
set_bit(id, s->starving_fl);
}
}
+ /* The remainder of the SGE RX Timer Callback routine is dedicated to
+ * global Master PF activities like checking for chip ingress stalls,
+ * etc.
+ */
+ if (!(adap->flags & MASTER_PF))
+ goto done;
- t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13);
- idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A);
- idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
-
- for (i = 0; i < 2; i++) {
- u32 debug0, debug11;
-
- /* If the Ingress DMA Same State Counter ("timer") is less
- * than 1s, then we can reset our synthesized Stall Timer and
- * continue. If we have previously emitted warnings about a
- * potential stalled Ingress Queue, issue a note indicating
- * that the Ingress Queue has resumed forward progress.
- */
- if (idma_same_state_cnt[i] < s->idma_1s_thresh) {
- if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH)
- CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n",
- i, s->idma_qid[i],
- s->idma_stalled[i]/HZ);
- s->idma_stalled[i] = 0;
- continue;
- }
-
- /* Synthesize an SGE Ingress DMA Same State Timer in the Hz
- * domain. The first time we get here it'll be because we
- * passed the 1s Threshold; each additional time it'll be
- * because the RX Timer Callback is being fired on its regular
- * schedule.
- *
- * If the stall is below our Potential Hung Ingress Queue
- * Warning Threshold, continue.
- */
- if (s->idma_stalled[i] == 0)
- s->idma_stalled[i] = HZ;
- else
- s->idma_stalled[i] += RX_QCHECK_PERIOD;
-
- if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH)
- continue;
-
- /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */
- if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0)
- continue;
-
- /* Read and save the SGE IDMA State and Queue ID information.
- * We do this every time in case it changes across time ...
- */
- t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0);
- debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
- s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
-
- t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11);
- debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
- s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
-
- CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n",
- i, s->idma_qid[i], s->idma_state[i],
- s->idma_stalled[i]/HZ, debug0, debug11);
- t4_sge_decode_idma_state(adap, s->idma_state[i]);
- }
+ t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD);
+done:
mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
}
@@ -2437,9 +2383,12 @@ static void __iomem *bar2_address(struct adapter *adapter,
return adapter->bar2 + bar2_qoffset;
}
+/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0
+ * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map
+ */
int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
struct net_device *dev, int intr_idx,
- struct sge_fl *fl, rspq_handler_t hnd)
+ struct sge_fl *fl, rspq_handler_t hnd, int cong)
{
int ret, flsz = 0;
struct fw_iq_cmd c;
@@ -2471,8 +2420,19 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4));
c.iqsize = htons(iq->size);
c.iqaddr = cpu_to_be64(iq->phys_addr);
+ if (cong >= 0)
+ c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F);
if (fl) {
+ /* Allocate the ring for the hardware free list (with space
+ * for its status page) along with the associated software
+ * descriptor ring. The free list size needs to be a multiple
+ * of the Egress Queue Unit and at least 2 Egress Units larger
+ * than the SGE's Egress Congrestion Threshold
+ * (fl_starve_thres - 1).
+ */
+ if (fl->size < s->fl_starve_thres - 1 + 2 * 8)
+ fl->size = s->fl_starve_thres - 1 + 2 * 8;
fl->size = roundup(fl->size, 8);
fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
sizeof(struct rx_sw_desc), &fl->addr,
@@ -2481,10 +2441,15 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
goto fl_nomem;
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
- c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F |
- FW_IQ_CMD_FL0FETCHRO_F |
- FW_IQ_CMD_FL0DATARO_F |
- FW_IQ_CMD_FL0PADEN_F);
+ c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
+ FW_IQ_CMD_FL0FETCHRO_F |
+ FW_IQ_CMD_FL0DATARO_F |
+ FW_IQ_CMD_FL0PADEN_F);
+ if (cong >= 0)
+ c.iqns_to_fl0congen |=
+ htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) |
+ FW_IQ_CMD_FL0CONGCIF_F |
+ FW_IQ_CMD_FL0CONGEN_F);
c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) |
FW_IQ_CMD_FL0FBMAX_V(3));
c.fl0size = htons(flsz);
@@ -2532,6 +2497,41 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
&fl->bar2_qid);
refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
}
+
+ /* For T5 and later we attempt to set up the Congestion Manager values
+ * of the new RX Ethernet Queue. This should really be handled by
+ * firmware because it's more complex than any host driver wants to
+ * get involved with and it's different per chip and this is almost
+ * certainly wrong. Firmware would be wrong as well, but it would be
+ * a lot easier to fix in one place ... For now we do something very
+ * simple (and hopefully less wrong).
+ */
+ if (!is_t4(adap->params.chip) && cong >= 0) {
+ u32 param, val;
+ int i;
+
+ param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
+ FW_PARAMS_PARAM_YZ_V(iq->cntxt_id));
+ if (cong == 0) {
+ val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X);
+ } else {
+ val =
+ CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X);
+ for (i = 0; i < 4; i++) {
+ if (cong & (1 << i))
+ val |=
+ CONMCTXT_CNGCHMAP_V(1 << (i << 2));
+ }
+ }
+ ret = t4_set_params(adap, adap->mbox, adap->fn, 0, 1,
+ &param, &val);
+ if (ret)
+ dev_warn(adap->pdev_dev, "Failed to set Congestion"
+ " Manager Context for Ingress Queue %d: %d\n",
+ iq->cntxt_id, -ret);
+ }
+
return 0;
fl_nomem:
@@ -2637,7 +2637,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
sizeof(struct tx_desc), 0, &txq->q.phys_addr,
- NULL, 0, NUMA_NO_NODE);
+ NULL, 0, dev_to_node(adap->pdev_dev));
if (!txq->q.desc)
return -ENOMEM;
@@ -3067,11 +3067,11 @@ int t4_sge_init(struct adapter *adap)
egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
s->fl_starve_thres = 2*egress_threshold + 1;
+ t4_idma_monitor_init(adap, &s->idma_monitor);
+
setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
- s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */
- s->idma_stalled[0] = 0;
- s->idma_stalled[1] = 0;
+
spin_lock_init(&s->intrq_lock);
return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index e8578a742f2a..6164ef3e1376 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3401,7 +3401,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[])
}
/**
- * get_mps_bg_map - return the buffer groups associated with a port
+ * t4_get_mps_bg_map - return the buffer groups associated with a port
* @adap: the adapter
* @idx: the port index
*
@@ -3409,7 +3409,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[])
* with the given port. Bit i is set if buffer group i is used by the
* port.
*/
-static unsigned int get_mps_bg_map(struct adapter *adap, int idx)
+unsigned int t4_get_mps_bg_map(struct adapter *adap, int idx)
{
u32 n = NUMPORTS_G(t4_read_reg(adap, MPS_CMN_CTL_A));
@@ -3460,7 +3460,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type)
*/
void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
{
- u32 bgmap = get_mps_bg_map(adap, idx);
+ u32 bgmap = t4_get_mps_bg_map(adap, idx);
#define GET_STAT(name) \
t4_read_reg64(adap, \
@@ -5717,3 +5717,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr)
t4_write_reg(adap, TP_DBG_LA_CONFIG_A,
cfg | adap->params.tp.la_mask);
}
+
+/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in
+ * seconds). If we find one of the SGE Ingress DMA State Machines in the same
+ * state for more than the Warning Threshold then we'll issue a warning about
+ * a potential hang. We'll repeat the warning as the SGE Ingress DMA Channel
+ * appears to be hung every Warning Repeat second till the situation clears.
+ * If the situation clears, we'll note that as well.
+ */
+#define SGE_IDMA_WARN_THRESH 1
+#define SGE_IDMA_WARN_REPEAT 300
+
+/**
+ * t4_idma_monitor_init - initialize SGE Ingress DMA Monitor
+ * @adapter: the adapter
+ * @idma: the adapter IDMA Monitor state
+ *
+ * Initialize the state of an SGE Ingress DMA Monitor.
+ */
+void t4_idma_monitor_init(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma)
+{
+ /* Initialize the state variables for detecting an SGE Ingress DMA
+ * hang. The SGE has internal counters which count up on each clock
+ * tick whenever the SGE finds its Ingress DMA State Engines in the
+ * same state they were on the previous clock tick. The clock used is
+ * the Core Clock so we have a limit on the maximum "time" they can
+ * record; typically a very small number of seconds. For instance,
+ * with a 600MHz Core Clock, we can only count up to a bit more than
+ * 7s. So we'll synthesize a larger counter in order to not run the
+ * risk of having the "timers" overflow and give us the flexibility to
+ * maintain a Hung SGE State Machine of our own which operates across
+ * a longer time frame.
+ */
+ idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */
+ idma->idma_stalled[0] = 0;
+ idma->idma_stalled[1] = 0;
+}
+
+/**
+ * t4_idma_monitor - monitor SGE Ingress DMA state
+ * @adapter: the adapter
+ * @idma: the adapter IDMA Monitor state
+ * @hz: number of ticks/second
+ * @ticks: number of ticks since the last IDMA Monitor call
+ */
+void t4_idma_monitor(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma,
+ int hz, int ticks)
+{
+ int i, idma_same_state_cnt[2];
+
+ /* Read the SGE Debug Ingress DMA Same State Count registers. These
+ * are counters inside the SGE which count up on each clock when the
+ * SGE finds its Ingress DMA State Engines in the same states they
+ * were in the previous clock. The counters will peg out at
+ * 0xffffffff without wrapping around so once they pass the 1s
+ * threshold they'll stay above that till the IDMA state changes.
+ */
+ t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13);
+ idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A);
+ idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
+
+ for (i = 0; i < 2; i++) {
+ u32 debug0, debug11;
+
+ /* If the Ingress DMA Same State Counter ("timer") is less
+ * than 1s, then we can reset our synthesized Stall Timer and
+ * continue. If we have previously emitted warnings about a
+ * potential stalled Ingress Queue, issue a note indicating
+ * that the Ingress Queue has resumed forward progress.
+ */
+ if (idma_same_state_cnt[i] < idma->idma_1s_thresh) {
+ if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz)
+ dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, "
+ "resumed after %d seconds\n",
+ i, idma->idma_qid[i],
+ idma->idma_stalled[i] / hz);
+ idma->idma_stalled[i] = 0;
+ continue;
+ }
+
+ /* Synthesize an SGE Ingress DMA Same State Timer in the Hz
+ * domain. The first time we get here it'll be because we
+ * passed the 1s Threshold; each additional time it'll be
+ * because the RX Timer Callback is being fired on its regular
+ * schedule.
+ *
+ * If the stall is below our Potential Hung Ingress Queue
+ * Warning Threshold, continue.
+ */
+ if (idma->idma_stalled[i] == 0) {
+ idma->idma_stalled[i] = hz;
+ idma->idma_warn[i] = 0;
+ } else {
+ idma->idma_stalled[i] += ticks;
+ idma->idma_warn[i] -= ticks;
+ }
+
+ if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz)
+ continue;
+
+ /* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds.
+ */
+ if (idma->idma_warn[i] > 0)
+ continue;
+ idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz;
+
+ /* Read and save the SGE IDMA State and Queue ID information.
+ * We do this every time in case it changes across time ...
+ * can't be too careful ...
+ */
+ t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0);
+ debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
+ idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
+
+ t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11);
+ debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
+ idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
+
+ dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in "
+ "state %u for %d seconds (debug0=%#x, debug11=%#x)\n",
+ i, idma->idma_qid[i], idma->idma_state[i],
+ idma->idma_stalled[i] / hz,
+ debug0, debug11);
+ t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
+ }
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
index 19b2dcf6acde..c4d9952f814b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
@@ -61,6 +61,15 @@
#define SGE_TIMERREGS 6
#define TIMERREG_COUNTER0_X 0
+/* Congestion Manager Definitions.
+ */
+#define CONMCTXT_CNGTPMODE_S 19
+#define CONMCTXT_CNGTPMODE_V(x) ((x) << CONMCTXT_CNGTPMODE_S)
+#define CONMCTXT_CNGCHMAP_S 0
+#define CONMCTXT_CNGCHMAP_V(x) ((x) << CONMCTXT_CNGCHMAP_S)
+#define CONMCTXT_CNGTPMODE_CHANNEL_X 2
+#define CONMCTXT_CNGTPMODE_QUEUE_X 1
+
/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
* The User Doorbells are each 128 bytes in length with a Simple Doorbell at
* offsets 8x and a Write Combining single 64-byte Egress Queue Unit
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 03fbfd1fb3df..d75fca7695eb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1123,6 +1123,7 @@ enum fw_params_param_dmaq {
FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13,
+ FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
};
enum fw_params_param_dev_diag {
@@ -1377,6 +1378,7 @@ struct fw_iq_cmd {
#define FW_IQ_CMD_IQFLINTCONGEN_S 27
#define FW_IQ_CMD_IQFLINTCONGEN_V(x) ((x) << FW_IQ_CMD_IQFLINTCONGEN_S)
+#define FW_IQ_CMD_IQFLINTCONGEN_F FW_IQ_CMD_IQFLINTCONGEN_V(1U)
#define FW_IQ_CMD_IQFLINTISCSIC_S 26
#define FW_IQ_CMD_IQFLINTISCSIC_V(x) ((x) << FW_IQ_CMD_IQFLINTISCSIC_S)
@@ -1399,6 +1401,7 @@ struct fw_iq_cmd {
#define FW_IQ_CMD_FL0CONGCIF_S 11
#define FW_IQ_CMD_FL0CONGCIF_V(x) ((x) << FW_IQ_CMD_FL0CONGCIF_S)
+#define FW_IQ_CMD_FL0CONGCIF_F FW_IQ_CMD_FL0CONGCIF_V(1U)
#define FW_IQ_CMD_FL0ONCHIP_S 10
#define FW_IQ_CMD_FL0ONCHIP_V(x) ((x) << FW_IQ_CMD_FL0ONCHIP_S)
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 482f6de6817d..98cd47c373c5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1160,7 +1160,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
{
u32 wr_mid;
u64 cntrl, *end;
- int qidx, credits;
+ int qidx, credits, max_pkt_len;
unsigned int flits, ndesc;
struct adapter *adapter;
struct sge_eth_txq *txq;
@@ -1183,6 +1183,13 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(skb->len < fw_hdr_copy_len))
goto out_free;
+ /* Discard the packet if the length is greater than mtu */
+ max_pkt_len = ETH_HLEN + dev->mtu;
+ if (skb_vlan_tag_present(skb))
+ max_pkt_len += VLAN_HLEN;
+ if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+ goto out_free;
+
/*
* Figure out which TX Queue we're going to use.
*/
@@ -2243,8 +2250,12 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
* Allocate the ring for the hardware free list (with space
* for its status page) along with the associated software
* descriptor ring. The free list size needs to be a multiple
- * of the Egress Queue Unit.
+ * of the Egress Queue Unit and at least 2 Egress Units larger
+ * than the SGE's Egress Congrestion Threshold
+ * (fl_starve_thres - 1).
*/
+ if (fl->size < s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT)
+ fl->size = s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT;
fl->size = roundup(fl->size, FL_PER_EQ_UNIT);
fl->desc = alloc_ring(adapter->pdev_dev, fl->size,
sizeof(__be64), sizeof(struct rx_sw_desc),