summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp/ipoib
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h58
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c209
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c46
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c82
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c209
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c83
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c69
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
10 files changed, 521 insertions, 262 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 1f76bad020f3..9d9a9dc51f18 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_IPOIB
tristate "IP-over-InfiniBand"
depends on NETDEVICES && INET && (IPV6 || IPV6=n)
+ select INET_LRO
---help---
Support for the IP-over-InfiniBand protocol (IPoIB). This
transports IP packets over InfiniBand so you can use your IB
@@ -10,16 +11,17 @@ config INFINIBAND_IPOIB
config INFINIBAND_IPOIB_CM
bool "IP-over-InfiniBand Connected Mode support"
- depends on INFINIBAND_IPOIB && EXPERIMENTAL
+ depends on INFINIBAND_IPOIB
default n
---help---
- This option enables experimental support for IPoIB connected mode.
- After enabling this option, you need to switch to connected mode through
- /sys/class/net/ibXXX/mode to actually create connections, and then increase
- the interface MTU with e.g. ifconfig ib0 mtu 65520.
+ This option enables support for IPoIB connected mode. After
+ enabling this option, you need to switch to connected mode
+ through /sys/class/net/ibXXX/mode to actually create
+ connections, and then increase the interface MTU with
+ e.g. ifconfig ib0 mtu 65520.
- WARNING: Enabling connected mode will trigger some
- packet drops for multicast and UD mode traffic from this interface,
+ WARNING: Enabling connected mode will trigger some packet
+ drops for multicast and UD mode traffic from this interface,
unless you limit mtu for these destinations to 2044.
config INFINIBAND_IPOIB_DEBUG
@@ -32,9 +34,10 @@ config INFINIBAND_IPOIB_DEBUG
debug_level and mcast_debug_level module parameters (which
can also be set after the driver is loaded through sysfs).
- This option also creates an "ipoib_debugfs," which can be
- mounted to expose debugging information about IB multicast
- groups used by the IPoIB driver.
+ This option also creates a directory tree under ipoib/ in
+ debugfs, which contains files that expose debugging
+ information about IB multicast groups used by the IPoIB
+ driver.
config INFINIBAND_IPOIB_DEBUG_DATA
bool "IP-over-InfiniBand data path debugging"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca126fc2b853..68ba5c3482e4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $
*/
#ifndef _IPOIB_H
@@ -52,9 +50,16 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
+#include <linux/inet_lro.h>
/* constants */
+enum ipoib_flush_level {
+ IPOIB_FLUSH_LIGHT,
+ IPOIB_FLUSH_NORMAL,
+ IPOIB_FLUSH_HEAVY
+};
+
enum {
IPOIB_ENCAP_LEN = 4,
@@ -65,8 +70,8 @@ enum {
IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
- IPOIB_RX_RING_SIZE = 128,
- IPOIB_TX_RING_SIZE = 64,
+ IPOIB_RX_RING_SIZE = 256,
+ IPOIB_TX_RING_SIZE = 128,
IPOIB_MAX_QUEUE_SIZE = 8192,
IPOIB_MIN_QUEUE_SIZE = 2,
IPOIB_CM_MAX_CONN_QP = 4096,
@@ -84,7 +89,6 @@ enum {
IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
- IPOIB_MCAST_STARTED = 8,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
IPOIB_FLAG_CSUM = 11,
@@ -96,7 +100,11 @@ enum {
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3,
+ IPOIB_MAX_LRO_DESCRIPTORS = 8,
+ IPOIB_LRO_MAX_AGGR = 64,
+
MAX_SEND_CQE = 16,
+ IPOIB_CM_COPYBREAK = 256,
};
#define IPOIB_OP_RECV (1ul << 31)
@@ -149,6 +157,11 @@ struct ipoib_tx_buf {
u64 mapping[MAX_SKB_FRAGS + 1];
};
+struct ipoib_cm_tx_buf {
+ struct sk_buff *skb;
+ u64 mapping;
+};
+
struct ib_cm_id;
struct ipoib_cm_data {
@@ -207,7 +220,7 @@ struct ipoib_cm_tx {
struct net_device *dev;
struct ipoib_neigh *neigh;
struct ipoib_path *path;
- struct ipoib_tx_buf *tx_ring;
+ struct ipoib_cm_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
unsigned long flags;
@@ -249,11 +262,15 @@ struct ipoib_ethtool_st {
u16 max_coalesced_frames;
};
+struct ipoib_lro {
+ struct net_lro_mgr lro_mgr;
+ struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
+};
+
/*
- * Device private locking: tx_lock protects members used in TX fast
- * path (and we use LLTX so upper layers don't do extra locking).
- * lock protects everything else. lock nests inside of tx_lock (ie
- * tx_lock must be acquired first if needed).
+ * Device private locking: network stack tx_lock protects members used
+ * in TX fast path, lock protects everything else. lock nests inside
+ * of tx_lock (ie tx_lock must be acquired first if needed).
*/
struct ipoib_dev_priv {
spinlock_t lock;
@@ -264,7 +281,6 @@ struct ipoib_dev_priv {
unsigned long flags;
- struct mutex mcast_mutex;
struct mutex vlan_mutex;
struct rb_root path_tree;
@@ -276,10 +292,12 @@ struct ipoib_dev_priv {
struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
- struct work_struct flush_task;
+ struct work_struct carrier_on_task;
+ struct work_struct flush_light;
+ struct work_struct flush_normal;
+ struct work_struct flush_heavy;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
- struct work_struct pkey_event_task;
struct ib_device *ca;
u8 port;
@@ -301,7 +319,6 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring;
- spinlock_t tx_lock;
struct ipoib_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
@@ -335,6 +352,8 @@ struct ipoib_dev_priv {
int hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
+
+ struct ipoib_lro lro;
};
struct ipoib_ah {
@@ -359,6 +378,7 @@ struct ipoib_path {
struct rb_node rb_node;
struct list_head list;
+ int valid;
};
struct ipoib_neigh {
@@ -423,11 +443,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn);
void ipoib_reap_ah(struct work_struct *work);
+void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
-void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_ib_dev_flush_light(struct work_struct *work);
+void ipoib_ib_dev_flush_normal(struct work_struct *work);
+void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
@@ -440,6 +463,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
+void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
@@ -466,9 +490,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
#endif
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
- union ib_gid *mgid);
-int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
- union ib_gid *mgid);
+ union ib_gid *mgid, int set_qkey);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 97e67d36378f..7b14c2c39500 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
#include <rdma/ib_cm.h>
@@ -113,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
}
static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
- struct ipoib_cm_rx *rx, int id)
+ struct ipoib_cm_rx *rx,
+ struct ib_recv_wr *wr,
+ struct ib_sge *sge, int id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
- priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+ wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
for (i = 0; i < IPOIB_CM_RX_SG; ++i)
- priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i];
+ sge[i].addr = rx->rx_ring[id].mapping[i];
- ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr);
+ ret = ib_post_recv(rx->qp, wr, &bad_wr);
if (unlikely(ret)) {
ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -202,7 +202,7 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev,
dev_kfree_skb_any(rx_ring[i].skb);
}
- kfree(rx_ring);
+ vfree(rx_ring);
}
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
@@ -322,16 +322,52 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
return 0;
}
+static void ipoib_cm_init_rx_wr(struct net_device *dev,
+ struct ib_recv_wr *wr,
+ struct ib_sge *sge)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < priv->cm.num_frags; ++i)
+ sge[i].lkey = priv->mr->lkey;
+
+ sge[0].length = IPOIB_CM_HEAD_SIZE;
+ for (i = 1; i < priv->cm.num_frags; ++i)
+ sge[i].length = PAGE_SIZE;
+
+ wr->next = NULL;
+ wr->sg_list = sge;
+ wr->num_sge = priv->cm.num_frags;
+}
+
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
struct ipoib_cm_rx *rx)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct {
+ struct ib_recv_wr wr;
+ struct ib_sge sge[IPOIB_CM_RX_SG];
+ } *t;
int ret;
int i;
- rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL);
- if (!rx->rx_ring)
+ rx->rx_ring = vmalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
+ if (!rx->rx_ring) {
+ printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n",
+ priv->ca->name, ipoib_recvq_size);
return -ENOMEM;
+ }
+
+ memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring);
+
+ t = kmalloc(sizeof *t, GFP_KERNEL);
+ if (!t) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
spin_lock_irq(&priv->lock);
@@ -351,8 +387,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
ret = -ENOMEM;
goto err_count;
- }
- ret = ipoib_cm_post_receive_nonsrq(dev, rx, i);
+ }
+ ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
if (ret) {
ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
"failed for buf %d\n", i);
@@ -363,6 +399,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
rx->recv_count = ipoib_recvq_size;
+ kfree(t);
+
return 0;
err_count:
@@ -371,6 +409,7 @@ err_count:
spin_unlock_irq(&priv->lock);
err_free:
+ kfree(t);
ipoib_cm_free_rx_ring(dev, rx->rx_ring);
return ret;
@@ -525,6 +564,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
u64 mapping[IPOIB_CM_RX_SG];
int frags;
int has_srq;
+ struct sk_buff *small_skb;
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -579,6 +619,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
}
+ if (wc->byte_len < IPOIB_CM_COPYBREAK) {
+ int dlen = wc->byte_len;
+
+ small_skb = dev_alloc_skb(dlen + 12);
+ if (small_skb) {
+ skb_reserve(small_skb, 12);
+ ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
+ dlen, DMA_FROM_DEVICE);
+ skb_copy_from_linear_data(skb, small_skb->data, dlen);
+ ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
+ dlen, DMA_FROM_DEVICE);
+ skb_put(small_skb, dlen);
+ skb = small_skb;
+ goto copied;
+ }
+ }
+
frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
(unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
@@ -601,6 +658,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
+copied:
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN);
@@ -620,7 +678,10 @@ repost:
ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
"for buf %d\n", wr_id);
} else {
- if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) {
+ if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
+ &priv->cm.rx_wr,
+ priv->cm.rx_sge,
+ wr_id))) {
--p->recv_count;
ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
"for buf %d\n", wr_id);
@@ -647,7 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_tx_buf *tx_req;
+ struct ipoib_cm_tx_buf *tx_req;
u64 addr;
if (unlikely(skb->len > tx->mtu)) {
@@ -678,7 +739,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
return;
}
- tx_req->mapping[0] = addr;
+ tx_req->mapping = addr;
if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
addr, skb->len))) {
@@ -703,7 +764,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
- struct ipoib_tx_buf *tx_req;
+ struct ipoib_cm_tx_buf *tx_req;
unsigned long flags;
ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
@@ -717,7 +778,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
tx_req = &tx->tx_ring[wr_id];
- ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE);
+ ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
/* FIXME: is this right? Shouldn't we only increment on success? */
++dev->stats.tx_packets;
@@ -725,7 +786,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
dev_kfree_skb_any(tx_req->skb);
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock(dev);
+
++tx->tx_tail;
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
@@ -740,7 +802,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;
if (neigh) {
@@ -760,10 +822,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock(dev);
}
int ipoib_cm_dev_open(struct net_device *dev)
@@ -1087,8 +1149,7 @@ err_tx:
static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
- struct ipoib_tx_buf *tx_req;
- unsigned long flags;
+ struct ipoib_cm_tx_buf *tx_req;
unsigned long begin;
ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
@@ -1115,16 +1176,16 @@ timeout:
while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
- ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len,
+ ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock_bh(p->dev);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock_bh(p->dev);
}
if (p->qp)
@@ -1141,6 +1202,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
+ unsigned long flags;
int ret;
switch (event->event) {
@@ -1159,8 +1221,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
case IB_CM_REJ_RECEIVED:
case IB_CM_TIMEWAIT_EXIT:
ipoib_dbg(priv, "CM error %d.\n", event->event);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;
if (neigh) {
@@ -1178,8 +1240,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
queue_work(ipoib_workqueue, &priv->cm.reap_task);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
break;
default:
break;
@@ -1233,19 +1295,24 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ib_sa_path_rec pathrec;
u32 qpn;
- spin_lock_irqsave(&priv->tx_lock, flags);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
while (!list_empty(&priv->cm.start_list)) {
p = list_entry(priv->cm.start_list.next, typeof(*p), list);
list_del_init(&p->list);
neigh = p->neigh;
qpn = IPOIB_QPN(neigh->neighbour->ha);
memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
- spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+
ret = ipoib_cm_tx_init(p, qpn, &pathrec);
- spin_lock_irqsave(&priv->tx_lock, flags);
- spin_lock(&priv->lock);
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
if (ret) {
neigh = p->neigh;
if (neigh) {
@@ -1259,44 +1326,52 @@ static void ipoib_cm_tx_start(struct work_struct *work)
kfree(p);
}
}
- spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
static void ipoib_cm_tx_reap(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.reap_task);
+ struct net_device *dev = priv->dev;
struct ipoib_cm_tx *p;
+ unsigned long flags;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
while (!list_empty(&priv->cm.reap_list)) {
p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
list_del(&p->list);
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
ipoib_cm_tx_destroy(p);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
static void ipoib_cm_skb_reap(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.skb_task);
+ struct net_device *dev = priv->dev;
struct sk_buff *skb;
-
+ unsigned long flags;
unsigned mtu = priv->mcast_mtu;
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+
if (skb->protocol == htons(ETH_P_IP))
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -1304,11 +1379,13 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
#endif
dev_kfree_skb_any(skb);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
@@ -1384,7 +1461,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
ipoib_warn(priv, "enabling connected mode "
"will cause multicast packet drops\n");
+ rtnl_lock();
dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
+ rtnl_unlock();
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
ipoib_flush_paths(dev);
@@ -1393,14 +1472,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
if (!strcmp(buf, "datagram\n")) {
clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
- dev->mtu = min(priv->mcast_mtu, dev->mtu);
- ipoib_flush_paths(dev);
+ rtnl_lock();
if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
dev->features |= NETIF_F_TSO;
}
+ dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
+ rtnl_unlock();
+ ipoib_flush_paths(dev);
return count;
}
@@ -1434,14 +1515,16 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
return;
}
- priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
- GFP_KERNEL);
+ priv->cm.srq_ring = vmalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring);
if (!priv->cm.srq_ring) {
printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
priv->ca->name, ipoib_recvq_size);
ib_destroy_srq(priv->cm.srq);
priv->cm.srq = NULL;
+ return;
}
+
+ memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring);
}
int ipoib_cm_dev_init(struct net_device *dev)
@@ -1485,15 +1568,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
priv->cm.num_frags = IPOIB_CM_RX_SG;
}
- for (i = 0; i < priv->cm.num_frags; ++i)
- priv->cm.rx_sge[i].lkey = priv->mr->lkey;
-
- priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
- for (i = 1; i < priv->cm.num_frags; ++i)
- priv->cm.rx_sge[i].length = PAGE_SIZE;
- priv->cm.rx_wr.next = NULL;
- priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
- priv->cm.rx_wr.num_sge = priv->cm.num_frags;
+ ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
if (ipoib_cm_has_srq(dev)) {
for (i = 0; i < ipoib_recvq_size; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 10279b79c44d..66af5c1a76e5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
return 0;
}
+static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
+ "LRO aggregated", "LRO flushed",
+ "LRO avg aggr", "LRO no desc"
+};
+
+static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ switch (stringset) {
+ case ETH_SS_STATS:
+ memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys));
+ break;
+ }
+}
+
+static int ipoib_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ARRAY_SIZE(ipoib_stats_keys);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void ipoib_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, uint64_t *data)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int index = 0;
+
+ /* Get LRO statistics */
+ data[index++] = priv->lro.lro_mgr.stats.aggregated;
+ data[index++] = priv->lro.lro_mgr.stats.flushed;
+ if (priv->lro.lro_mgr.stats.flushed)
+ data[index++] = priv->lro.lro_mgr.stats.aggregated /
+ priv->lro.lro_mgr.stats.flushed;
+ else
+ data[index++] = 0;
+ data[index++] = priv->lro.lro_mgr.stats.no_desc;
+}
+
static const struct ethtool_ops ipoib_ethtool_ops = {
.get_drvinfo = ipoib_get_drvinfo,
.get_tso = ethtool_op_get_tso,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
+ .get_flags = ethtool_op_get_flags,
+ .set_flags = ethtool_op_set_flags,
+ .get_strings = ipoib_get_strings,
+ .get_sset_count = ipoib_get_sset_count,
+ .get_ethtool_stats = ipoib_get_ethtool_stats,
};
void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 8b882bbd1d05..961c585da216 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $
*/
#include <linux/err.h>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f429bce24c20..0e748aeeae99 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -31,8 +31,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $
*/
#include <linux/delay.h>
@@ -290,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
skb->ip_summed = CHECKSUM_UNNECESSARY;
- netif_receive_skb(skb);
+ if (dev->features & NETIF_F_LRO)
+ lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
+ else
+ netif_receive_skb(skb);
repost:
if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -442,6 +443,9 @@ poll_more:
}
if (done < budget) {
+ if (dev->features & NETIF_F_LRO)
+ lro_flush_all(&priv->lro.lro_mgr);
+
netif_rx_complete(dev, napi);
if (unlikely(ib_req_notify_cq(priv->recv_cq,
IB_CQ_NEXT_COMP |
@@ -464,21 +468,22 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
static void drain_tx_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- unsigned long flags;
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock(dev);
while (poll_tx(priv))
; /* nothing */
if (netif_queue_stopped(dev))
mod_timer(&priv->poll_timer, jiffies + 1);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock(dev);
}
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
- drain_tx_cq((struct net_device *)dev_ptr);
+ struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+
+ mod_timer(&priv->poll_timer, jiffies);
}
static inline int post_send(struct ipoib_dev_priv *priv,
@@ -610,17 +615,20 @@ static void __ipoib_reap_ah(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
+ unsigned long flags;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
ib_destroy_ah(ah->ah);
kfree(ah);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
void ipoib_reap_ah(struct work_struct *work)
@@ -757,6 +765,14 @@ void ipoib_drain_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int i, n;
+
+ /*
+ * We call completion handling routines that expect to be
+ * called from the BH-disabled NAPI poll context, so disable
+ * BHs here too.
+ */
+ local_bh_disable();
+
do {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
@@ -780,6 +796,8 @@ void ipoib_drain_cq(struct net_device *dev)
while (poll_tx(priv))
; /* nothing */
+
+ local_bh_enable();
}
int ipoib_ib_dev_stop(struct net_device *dev, int flush)
@@ -898,7 +916,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
return 0;
}
-static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
+ enum ipoib_flush_level level)
{
struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
@@ -911,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
* the parent is down.
*/
list_for_each_entry(cpriv, &priv->child_intfs, list)
- __ipoib_ib_dev_flush(cpriv, pkey_event);
+ __ipoib_ib_dev_flush(cpriv, level);
mutex_unlock(&priv->vlan_mutex);
@@ -925,7 +944,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
return;
}
- if (pkey_event) {
+ if (level == IPOIB_FLUSH_HEAVY) {
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ipoib_ib_dev_down(dev, 0);
@@ -943,11 +962,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
priv->pkey_index = new_index;
}
- ipoib_dbg(priv, "flushing\n");
+ if (level == IPOIB_FLUSH_LIGHT) {
+ ipoib_mark_paths_invalid(dev);
+ ipoib_mcast_dev_flush(dev);
+ }
- ipoib_ib_dev_down(dev, 0);
+ if (level >= IPOIB_FLUSH_NORMAL)
+ ipoib_ib_dev_down(dev, 0);
- if (pkey_event) {
+ if (level == IPOIB_FLUSH_HEAVY) {
ipoib_ib_dev_stop(dev, 0);
ipoib_ib_dev_open(dev);
}
@@ -957,27 +980,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
* we get here, don't bring it back up if it's not configured up
*/
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
- ipoib_ib_dev_up(dev);
+ if (level >= IPOIB_FLUSH_NORMAL)
+ ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task);
}
}
-void ipoib_ib_dev_flush(struct work_struct *work)
+void ipoib_ib_dev_flush_light(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, flush_light);
+
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT);
+}
+
+void ipoib_ib_dev_flush_normal(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, flush_task);
+ container_of(work, struct ipoib_dev_priv, flush_normal);
- ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
- __ipoib_ib_dev_flush(priv, 0);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL);
}
-void ipoib_pkey_event(struct work_struct *work)
+void ipoib_ib_dev_flush_heavy(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, pkey_event_task);
+ container_of(work, struct ipoib_dev_priv, flush_heavy);
- ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
- __ipoib_ib_dev_flush(priv, 1);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2442090ac8d1..c0ee514396df 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $
*/
#include "ipoib.h"
@@ -62,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
+static int lro;
+module_param(lro, bool, 0444);
+MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
+
+static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
+module_param(lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
+ "(default = 64)");
+
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
int ipoib_debug_level;
@@ -149,14 +156,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- /*
- * Now flush workqueue to make sure a scheduled task doesn't
- * bring our internal state back up.
- */
- flush_workqueue(ipoib_workqueue);
-
- ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_down(dev, 0);
+ ipoib_ib_dev_stop(dev, 0);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -350,14 +351,32 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
+void ipoib_mark_paths_invalid(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_path *path, *tp;
+
+ spin_lock_irq(&priv->lock);
+
+ list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+ ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n",
+ be16_to_cpu(path->pathrec.dlid),
+ IPOIB_GID_ARG(path->pathrec.dgid));
+ path->valid = 0;
+ }
+
+ spin_unlock_irq(&priv->lock);
+}
+
void ipoib_flush_paths(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path, *tp;
LIST_HEAD(remove_list);
+ unsigned long flags;
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->path_list, &remove_list);
@@ -367,15 +386,16 @@ void ipoib_flush_paths(struct net_device *dev)
list_for_each_entry_safe(path, tp, &remove_list, list) {
if (path->query)
ib_sa_cancel_query(path->query_id, path->query);
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
wait_for_completion(&path->done);
path_free(dev, path);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
static void path_rec_completion(int status,
@@ -386,6 +406,7 @@ static void path_rec_completion(int status,
struct net_device *dev = path->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah = NULL;
+ struct ipoib_ah *old_ah = NULL;
struct ipoib_neigh *neigh, *tn;
struct sk_buff_head skqueue;
struct sk_buff *skb;
@@ -409,11 +430,12 @@ static void path_rec_completion(int status,
spin_lock_irqsave(&priv->lock, flags);
- path->ah = ah;
-
if (ah) {
path->pathrec = *pathrec;
+ old_ah = path->ah;
+ path->ah = ah;
+
ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
@@ -421,6 +443,17 @@ static void path_rec_completion(int status,
__skb_queue_tail(&skqueue, skb);
list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
+ if (neigh->ah) {
+ WARN_ON(neigh->ah != old_ah);
+ /*
+ * Dropping the ah reference inside
+ * priv->lock is safe here, because we
+ * will hold one more reference from
+ * the original value of path->ah (ie
+ * old_ah).
+ */
+ ipoib_put_ah(neigh->ah);
+ }
kref_get(&path->ah->ref);
neigh->ah = path->ah;
memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
@@ -443,6 +476,7 @@ static void path_rec_completion(int status,
while ((skb = __skb_dequeue(&neigh->queue)))
__skb_queue_tail(&skqueue, skb);
}
+ path->valid = 1;
}
path->query = NULL;
@@ -450,6 +484,9 @@ static void path_rec_completion(int status,
spin_unlock_irqrestore(&priv->lock, flags);
+ if (old_ah)
+ ipoib_put_ah(old_ah);
+
while ((skb = __skb_dequeue(&skqueue))) {
skb->dev = dev;
if (dev_queue_xmit(skb))
@@ -507,7 +544,7 @@ static int path_rec_start(struct net_device *dev,
path_rec_completion,
path, &path->query);
if (path->query_id < 0) {
- ipoib_warn(priv, "ib_sa_path_rec_get failed\n");
+ ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
path->query = NULL;
return path->query_id;
}
@@ -520,6 +557,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
+ unsigned long flags;
neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
if (!neigh) {
@@ -528,11 +566,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
return;
}
- /*
- * We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
- */
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
path = __path_find(dev, skb->dst->neighbour->ha + 4);
if (!path) {
@@ -579,7 +613,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
__skb_queue_tail(&neigh->queue, skb);
}
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
return;
err_list:
@@ -591,7 +625,7 @@ err_drop:
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
@@ -615,23 +649,21 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
+ unsigned long flags;
- /*
- * We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
- */
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
path = __path_find(dev, phdr->hwaddr + 4);
- if (!path) {
- path = path_rec_create(dev, phdr->hwaddr + 4);
+ if (!path || !path->valid) {
+ if (!path)
+ path = path_rec_create(dev, phdr->hwaddr + 4);
if (path) {
/* put pseudoheader back on for next time */
skb_push(skb, sizeof *phdr);
__skb_queue_tail(&path->queue, skb);
if (path_rec_start(dev, path)) {
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
path_free(dev, path);
return;
} else
@@ -641,7 +673,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
dev_kfree_skb_any(skb);
}
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
return;
}
@@ -660,7 +692,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
dev_kfree_skb_any(skb);
}
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -669,13 +701,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct ipoib_neigh *neigh;
unsigned long flags;
- if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
- return NETDEV_TX_LOCKED;
-
if (likely(skb->dst && skb->dst->neighbour)) {
if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
ipoib_path_lookup(skb, dev);
- goto out;
+ return NETDEV_TX_OK;
}
neigh = *to_ipoib_neigh(skb->dst->neighbour);
@@ -685,7 +714,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb->dst->neighbour->ha + 4,
sizeof(union ib_gid))) ||
(neigh->dev != dev))) {
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
/*
* It's safe to call ipoib_put_ah() inside
* priv->lock here, because we know that
@@ -696,25 +725,25 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
ipoib_put_ah(neigh->ah);
list_del(&neigh->list);
ipoib_neigh_free(dev, neigh);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_path_lookup(skb, dev);
- goto out;
+ return NETDEV_TX_OK;
}
if (ipoib_cm_get(neigh)) {
if (ipoib_cm_up(neigh)) {
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
- goto out;
+ return NETDEV_TX_OK;
}
} else if (neigh->ah) {
ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
- goto out;
+ return NETDEV_TX_OK;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
@@ -743,16 +772,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
IPOIB_GID_RAW_ARG(phdr->hwaddr + 4));
dev_kfree_skb_any(skb);
++dev->stats.tx_dropped;
- goto out;
+ return NETDEV_TX_OK;
}
unicast_arp_send(skb, dev, phdr);
}
}
-out:
- spin_unlock_irqrestore(&priv->tx_lock, flags);
-
return NETDEV_TX_OK;
}
@@ -938,6 +964,54 @@ static const struct header_ops ipoib_header_ops = {
.create = ipoib_hard_header,
};
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+ void **tcph, u64 *hdr_flags, void *priv)
+{
+ unsigned int ip_len;
+ struct iphdr *iph;
+
+ if (unlikely(skb->protocol != htons(ETH_P_IP)))
+ return -1;
+
+ /*
+ * In the future we may add an else clause that verifies the
+ * checksum and allows devices which do not calculate checksum
+ * to use LRO.
+ */
+ if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
+ return -1;
+
+ /* Check for non-TCP packet */
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_TCP)
+ return -1;
+
+ ip_len = ip_hdrlen(skb);
+ skb_set_transport_header(skb, ip_len);
+ *tcph = tcp_hdr(skb);
+
+ /* check if IP header and TCP header are complete */
+ if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
+ return -1;
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *iphdr = iph;
+
+ return 0;
+}
+
+static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
+{
+ priv->lro.lro_mgr.max_aggr = lro_max_aggr;
+ priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
+ priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
+ priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
+ priv->lro.lro_mgr.features = LRO_F_NAPI;
+ priv->lro.lro_mgr.dev = priv->dev;
+ priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
static void ipoib_setup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -968,7 +1042,6 @@ static void ipoib_setup(struct net_device *dev)
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = ipoib_sendq_size * 2;
dev->features = (NETIF_F_VLAN_CHALLENGED |
- NETIF_F_LLTX |
NETIF_F_HIGHDMA);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
@@ -977,10 +1050,10 @@ static void ipoib_setup(struct net_device *dev)
priv->dev = dev;
+ ipoib_lro_setup(priv);
+
spin_lock_init(&priv->lock);
- spin_lock_init(&priv->tx_lock);
- mutex_init(&priv->mcast_mutex);
mutex_init(&priv->vlan_mutex);
INIT_LIST_HEAD(&priv->path_list);
@@ -989,9 +1062,11 @@ static void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->multicast_list);
INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
- INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
- INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
+ INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
+ INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
+ INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
+ INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
}
@@ -1154,6 +1229,9 @@ static struct net_device *ipoib_add_port(const char *format,
priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
}
+ if (lro)
+ priv->dev->features |= NETIF_F_LRO;
+
/*
* Set the full membership bit, so that we join the right
* broadcast group, etc.
@@ -1219,7 +1297,7 @@ sysfs_failed:
register_failed:
ib_unregister_event_handler(&priv->event_handler);
- flush_scheduled_work();
+ flush_workqueue(ipoib_workqueue);
event_failed:
ipoib_dev_cleanup(priv->dev);
@@ -1278,7 +1356,12 @@ static void ipoib_remove_one(struct ib_device *device)
list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler);
- flush_scheduled_work();
+
+ rtnl_lock();
+ dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
+ rtnl_unlock();
+
+ flush_workqueue(ipoib_workqueue);
unregister_netdev(priv->dev);
ipoib_dev_cleanup(priv->dev);
@@ -1304,6 +1387,12 @@ static int __init ipoib_init_module(void)
ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
#endif
+ /*
+ * When copying small received packets, we only copy from the
+ * linear data part of the SKB, so we rely on this condition.
+ */
+ BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
+
ret = ipoib_register_debugfs();
if (ret)
return ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 3f663fb852c1..d9d1223c3fd5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $
*/
#include <linux/skbuff.h>
@@ -71,14 +69,13 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh, *tmp;
- unsigned long flags;
int tx_dropped = 0;
ipoib_dbg_mcast(netdev_priv(dev),
"deleting multicast group " IPOIB_GID_FMT "\n",
IPOIB_GID_ARG(mcast->mcmember.mgid));
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
/*
@@ -92,7 +89,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
ipoib_neigh_free(dev, neigh);
}
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
if (mcast->ah)
ipoib_put_ah(mcast->ah);
@@ -102,9 +99,9 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock_bh(dev);
dev->stats.tx_dropped += tx_dropped;
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock_bh(dev);
kfree(mcast);
}
@@ -188,6 +185,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah;
int ret;
+ int set_qkey = 0;
mcast->mcmember = *mcmember;
@@ -202,6 +200,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+ set_qkey = 1;
}
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -214,7 +213,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
}
ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid);
+ &mcast->mcmember.mgid, set_qkey);
if (ret < 0) {
ipoib_warn(priv, "couldn't attach QP to multicast group "
IPOIB_GID_FMT "\n",
@@ -259,10 +258,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
}
/* actually send any queued packets */
- spin_lock_irq(&priv->tx_lock);
+ netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
- spin_unlock_irq(&priv->tx_lock);
+ netif_tx_unlock_bh(dev);
skb->dev = dev;
@@ -273,9 +272,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
if (dev_queue_xmit(skb))
ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
- spin_lock_irq(&priv->tx_lock);
+ netif_tx_lock_bh(dev);
}
- spin_unlock_irq(&priv->tx_lock);
+ netif_tx_unlock_bh(dev);
return 0;
}
@@ -286,7 +285,6 @@ ipoib_mcast_sendonly_join_complete(int status,
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
/* We trap for port events ourselves. */
if (status == -ENETRESET)
@@ -302,12 +300,12 @@ ipoib_mcast_sendonly_join_complete(int status,
IPOIB_GID_ARG(mcast->mcmember.mgid), status);
/* Flush out any queued packets */
- spin_lock_irq(&priv->tx_lock);
+ netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
- spin_unlock_irq(&priv->tx_lock);
+ netif_tx_unlock_bh(dev);
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
@@ -366,6 +364,21 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
return ret;
}
+void ipoib_mcast_carrier_on_task(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ carrier_on_task);
+
+ /*
+ * Take rtnl_lock to avoid racing with ipoib_stop() and
+ * turning the carrier back on while a device is being
+ * removed.
+ */
+ rtnl_lock();
+ netif_carrier_on(priv->dev);
+ rtnl_unlock();
+}
+
static int ipoib_mcast_join_complete(int status,
struct ib_sa_multicast *multicast)
{
@@ -392,8 +405,12 @@ static int ipoib_mcast_join_complete(int status,
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
+ /*
+ * Defer carrier on work to ipoib_workqueue to avoid a
+ * deadlock on rtnl_lock here.
+ */
if (mcast == priv->broadcast)
- netif_carrier_on(dev);
+ queue_work(ipoib_workqueue, &priv->carrier_on_task);
return 0;
}
@@ -575,8 +592,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
- if (!ipoib_cm_admin_enabled(dev))
- dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+ if (!ipoib_cm_admin_enabled(dev)) {
+ rtnl_lock();
+ dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
+ rtnl_unlock();
+ }
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
@@ -594,10 +614,6 @@ int ipoib_mcast_start_thread(struct net_device *dev)
queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
- spin_lock_irq(&priv->lock);
- set_bit(IPOIB_MCAST_STARTED, &priv->flags);
- spin_unlock_irq(&priv->lock);
-
return 0;
}
@@ -607,10 +623,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
- spin_lock_irq(&priv->lock);
- clear_bit(IPOIB_MCAST_STARTED, &priv->flags);
- spin_unlock_irq(&priv->lock);
-
mutex_lock(&mcast_mutex);
clear_bit(IPOIB_MCAST_RUN, &priv->flags);
cancel_delayed_work(&priv->mcast_task);
@@ -635,10 +647,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
IPOIB_GID_ARG(mcast->mcmember.mgid));
/* Remove ourselves from the multicast group */
- ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid);
+ ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid));
if (ret)
- ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+ ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
}
return 0;
@@ -648,12 +660,9 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_mcast *mcast;
+ unsigned long flags;
- /*
- * We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
- */
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||
!priv->broadcast ||
@@ -724,7 +733,7 @@ out:
}
unlock:
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
void ipoib_mcast_dev_flush(struct net_device *dev)
@@ -774,7 +783,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
ipoib_mcast_stop_thread(dev, 0);
local_irq_save(flags);
- netif_tx_lock(dev);
+ netif_addr_lock(dev);
spin_lock(&priv->lock);
/*
@@ -851,7 +860,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
}
spin_unlock(&priv->lock);
- netif_tx_unlock(dev);
+ netif_addr_unlock(dev);
local_irq_restore(flags);
/* We have to cancel outside of the spinlock */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 8766d29ce3b7..68325119f740 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -29,24 +29,17 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
*/
#include "ipoib.h"
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
+int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_qp_attr *qp_attr;
+ struct ib_qp_attr *qp_attr = NULL;
int ret;
u16 pkey_index;
- ret = -ENOMEM;
- qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
- if (!qp_attr)
- goto out;
-
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO;
@@ -54,18 +47,23 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- /* set correct QKey for QP */
- qp_attr->qkey = priv->qkey;
- ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
- if (ret) {
- ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
- goto out;
+ if (set_qkey) {
+ ret = -ENOMEM;
+ qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+ if (!qp_attr)
+ goto out;
+
+ /* set correct QKey for QP */
+ qp_attr->qkey = priv->qkey;
+ ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
+ goto out;
+ }
}
/* attach QP to multicast group */
- mutex_lock(&priv->mcast_mutex);
ret = ib_attach_mcast(priv->qp, mgid, mlid);
- mutex_unlock(&priv->mcast_mutex);
if (ret)
ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);
@@ -74,20 +72,6 @@ out:
return ret;
}
-int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int ret;
-
- mutex_lock(&priv->mcast_mutex);
- ret = ib_detach_mcast(priv->qp, mgid, mlid);
- mutex_unlock(&priv->mcast_mutex);
- if (ret)
- ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
-
- return ret;
-}
-
int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -201,7 +185,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.recv_cq = priv->recv_cq;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
- init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
+ init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+
+ if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
+ init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
@@ -289,15 +276,17 @@ void ipoib_event(struct ib_event_handler *handler,
if (record->element.port_num != priv->port)
return;
- if (record->event == IB_EVENT_PORT_ERR ||
- record->event == IB_EVENT_PORT_ACTIVE ||
- record->event == IB_EVENT_LID_CHANGE ||
- record->event == IB_EVENT_SM_CHANGE ||
+ ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
+ record->device->name, record->element.port_num);
+
+ if (record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) {
- ipoib_dbg(priv, "Port state change event\n");
- queue_work(ipoib_workqueue, &priv->flush_task);
+ queue_work(ipoib_workqueue, &priv->flush_light);
+ } else if (record->event == IB_EVENT_PORT_ERR ||
+ record->event == IB_EVENT_PORT_ACTIVE ||
+ record->event == IB_EVENT_LID_CHANGE) {
+ queue_work(ipoib_workqueue, &priv->flush_normal);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
- ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
- queue_work(ipoib_workqueue, &priv->pkey_event_task);
+ queue_work(ipoib_workqueue, &priv->flush_heavy);
}
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 1cdb5cfb0ff1..b08eb56196d3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/module.h>