Skip to content

Commit

Permalink
IPoIB: Add send gather support
Browse files Browse the repository at this point in the history
This patch acts as a preparation for using checksum offload for IB
devices capable of inserting/verifying checksum in IP packets.  The
patch does not actaully turn on NETIF_F_SG - we defer that to the
patches adding checksum offload capabilities.

We only add support for send gathers for datagram mode, since existing
HW does not support checksum offload on connected QPs.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Eli Cohen authored and Roland Dreier committed Feb 8, 2008
1 parent eb14032 commit 7143740
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 30 deletions.
4 changes: 2 additions & 2 deletions drivers/infiniband/ulp/ipoib/ipoib.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ struct ipoib_rx_buf {

struct ipoib_tx_buf {
struct sk_buff *skb;
u64 mapping;
u64 mapping[MAX_SKB_FRAGS + 1];
};

struct ib_cm_id;
Expand Down Expand Up @@ -296,7 +296,7 @@ struct ipoib_dev_priv {
struct ipoib_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
struct ib_sge tx_sge;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_send_wr tx_wr;
unsigned tx_outstanding;

Expand Down
10 changes: 5 additions & 5 deletions drivers/infiniband/ulp/ipoib/ipoib_cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -634,8 +634,8 @@ static inline int post_send(struct ipoib_dev_priv *priv,
{
struct ib_send_wr *bad_wr;

priv->tx_sge.addr = addr;
priv->tx_sge.length = len;
priv->tx_sge[0].addr = addr;
priv->tx_sge[0].length = len;

priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;

Expand Down Expand Up @@ -676,7 +676,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
return;
}

tx_req->mapping = addr;
tx_req->mapping[0] = addr;

if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
addr, skb->len))) {
Expand Down Expand Up @@ -715,7 +715,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)

tx_req = &tx->tx_ring[wr_id];

ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE);

/* FIXME: is this right? Shouldn't we only increment on success? */
++dev->stats.tx_packets;
Expand Down Expand Up @@ -1110,7 +1110,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)

while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len,
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
Expand Down
89 changes: 69 additions & 20 deletions drivers/infiniband/ulp/ipoib/ipoib_ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,54 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
"for buf %d\n", wr_id);
}

static int ipoib_dma_map_tx(struct ib_device *ca,
struct ipoib_tx_buf *tx_req)
{
struct sk_buff *skb = tx_req->skb;
u64 *mapping = tx_req->mapping;
int i;

mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
return -EIO;

for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
mapping[i + 1] = ib_dma_map_page(ca, frag->page,
frag->page_offset, frag->size,
DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
goto partial_error;
}
return 0;

partial_error:
ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);

for (; i > 0; --i) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
}
return -EIO;
}

static void ipoib_dma_unmap_tx(struct ib_device *ca,
struct ipoib_tx_buf *tx_req)
{
struct sk_buff *skb = tx_req->skb;
u64 *mapping = tx_req->mapping;
int i;

ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);

for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
DMA_TO_DEVICE);
}
}

static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
Expand All @@ -257,8 +305,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)

tx_req = &priv->tx_ring[wr_id];

ib_dma_unmap_single(priv->ca, tx_req->mapping,
tx_req->skb->len, DMA_TO_DEVICE);
ipoib_dma_unmap_tx(priv->ca, tx_req);

++dev->stats.tx_packets;
dev->stats.tx_bytes += tx_req->skb->len;
Expand Down Expand Up @@ -341,16 +388,23 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
struct ib_ah *address, u32 qpn,
u64 addr, int len)
u64 *mapping, int headlen,
skb_frag_t *frags,
int nr_frags)
{
struct ib_send_wr *bad_wr;
int i;

priv->tx_sge.addr = addr;
priv->tx_sge.length = len;

priv->tx_wr.wr_id = wr_id;
priv->tx_wr.wr.ud.remote_qpn = qpn;
priv->tx_wr.wr.ud.ah = address;
priv->tx_sge[0].addr = mapping[0];
priv->tx_sge[0].length = headlen;
for (i = 0; i < nr_frags; ++i) {
priv->tx_sge[i + 1].addr = mapping[i + 1];
priv->tx_sge[i + 1].length = frags[i].size;
}
priv->tx_wr.num_sge = nr_frags + 1;
priv->tx_wr.wr_id = wr_id;
priv->tx_wr.wr.ud.remote_qpn = qpn;
priv->tx_wr.wr.ud.ah = address;

return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
}
Expand All @@ -360,7 +414,6 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
u64 addr;

if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
Expand All @@ -383,20 +436,19 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
*/
tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
tx_req->skb = skb;
addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
tx_req->mapping = addr;

if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
address->ah, qpn, addr, skb->len))) {
address->ah, qpn,
tx_req->mapping, skb_headlen(skb),
skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
ipoib_warn(priv, "post_send failed\n");
++dev->stats.tx_errors;
ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
Expand Down Expand Up @@ -615,10 +667,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
tx_req = &priv->tx_ring[priv->tx_tail &
(ipoib_sendq_size - 1)];
ib_dma_unmap_single(priv->ca,
tx_req->mapping,
tx_req->skb->len,
DMA_TO_DEVICE);
ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
--priv->tx_outstanding;
Expand Down
10 changes: 7 additions & 3 deletions drivers/infiniband/ulp/ipoib/ipoib_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
};

int ret, size;
int i;

priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
Expand Down Expand Up @@ -191,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.send_cq = priv->cq;
init_attr.recv_cq = priv->cq;

if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;

priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
Expand All @@ -201,11 +205,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;

priv->tx_sge.lkey = priv->mr->lkey;
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->mr->lkey;

priv->tx_wr.opcode = IB_WR_SEND;
priv->tx_wr.sg_list = &priv->tx_sge;
priv->tx_wr.num_sge = 1;
priv->tx_wr.sg_list = priv->tx_sge;
priv->tx_wr.send_flags = IB_SEND_SIGNALED;

return 0;
Expand Down

0 comments on commit 7143740

Please sign in to comment.