[PATCH v3 05/13] tile: support jumbo frames in the tilegx network driver

From: Chris Metcalf
Date: Thu Aug 01 2013 - 15:35:38 EST


Signed-off-by: Chris Metcalf <cmetcalf@xxxxxxxxxx>
---
arch/tile/gxio/iorpc_mpipe.c | 47 +++++
arch/tile/gxio/mpipe.c | 18 +-
arch/tile/include/gxio/iorpc_mpipe.h | 4 +
arch/tile/include/gxio/mpipe.h | 101 +++++++++-
drivers/net/ethernet/tile/tilegx.c | 347 +++++++++++++++++++----------------
5 files changed, 351 insertions(+), 166 deletions(-)

diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c
index 31b87bf..c2fb1516 100644
--- a/arch/tile/gxio/iorpc_mpipe.c
+++ b/arch/tile/gxio/iorpc_mpipe.c
@@ -387,6 +387,27 @@ int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac)

EXPORT_SYMBOL(gxio_mpipe_link_close_aux);

+struct link_set_attr_aux_param {
+ int mac;
+ uint32_t attr;
+ int64_t val;
+};
+
+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac,
+ uint32_t attr, int64_t val)
+{
+ struct link_set_attr_aux_param temp;
+ struct link_set_attr_aux_param *params = &temp;
+
+ params->mac = mac;
+ params->attr = attr;
+ params->val = val;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_LINK_SET_ATTR_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_link_set_attr_aux);

struct get_timestamp_aux_param {
uint64_t sec;
@@ -454,6 +475,32 @@ int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context,

EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux);

+struct config_edma_ring_blks_param {
+ unsigned int ering;
+ unsigned int max_blks;
+ unsigned int min_snf_blks;
+ unsigned int db;
+};
+
+int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context,
+ unsigned int ering, unsigned int max_blks,
+ unsigned int min_snf_blks, unsigned int db)
+{
+ struct config_edma_ring_blks_param temp;
+ struct config_edma_ring_blks_param *params = &temp;
+
+ params->ering = ering;
+ params->max_blks = max_blks;
+ params->min_snf_blks = min_snf_blks;
+ params->db = db;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_CONFIG_EDMA_RING_BLKS);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks);
+
struct arm_pollfd_param {
union iorpc_pollfd pollfd;
};
diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c
index e71c633..0567cf0 100644
--- a/arch/tile/gxio/mpipe.c
+++ b/arch/tile/gxio/mpipe.c
@@ -383,7 +383,7 @@ EXPORT_SYMBOL_GPL(gxio_mpipe_iqueue_init);

int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
gxio_mpipe_context_t *context,
- unsigned int edma_ring_id,
+ unsigned int ering,
unsigned int channel,
void *mem, unsigned int mem_size,
unsigned int mem_flags)
@@ -394,7 +394,7 @@ int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
/* Offset used to read number of completed commands. */
MPIPE_EDMA_POST_REGION_ADDR_t offset;

- int result = gxio_mpipe_init_edma_ring(context, edma_ring_id, channel,
+ int result = gxio_mpipe_init_edma_ring(context, ering, channel,
mem, mem_size, mem_flags);
if (result < 0)
return result;
@@ -405,7 +405,7 @@ int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
offset.region =
MPIPE_MMIO_ADDR__REGION_VAL_EDMA -
MPIPE_MMIO_ADDR__REGION_VAL_IDMA;
- offset.ring = edma_ring_id;
+ offset.ring = ering;

__gxio_dma_queue_init(&equeue->dma_queue,
context->mmio_fast_base + offset.word,
@@ -413,6 +413,9 @@ int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
equeue->edescs = mem;
equeue->mask_num_entries = num_entries - 1;
equeue->log2_num_entries = __builtin_ctz(num_entries);
+ equeue->context = context;
+ equeue->ering = ering;
+ equeue->channel = channel;

return 0;
}
@@ -543,3 +546,12 @@ int gxio_mpipe_link_close(gxio_mpipe_link_t *link)
}

EXPORT_SYMBOL_GPL(gxio_mpipe_link_close);
+
+int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr,
+ int64_t val)
+{
+ return gxio_mpipe_link_set_attr_aux(link->context, link->mac, attr,
+ val);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_link_set_attr);
diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h
index 9d50fce..eef60fd 100644
--- a/arch/tile/include/gxio/iorpc_mpipe.h
+++ b/arch/tile/include/gxio/iorpc_mpipe.h
@@ -44,10 +44,12 @@
#define GXIO_MPIPE_OP_REGISTER_CLIENT_MEMORY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1210)
#define GXIO_MPIPE_OP_LINK_OPEN_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1211)
#define GXIO_MPIPE_OP_LINK_CLOSE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1212)
+#define GXIO_MPIPE_OP_LINK_SET_ATTR_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1213)

#define GXIO_MPIPE_OP_GET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x121e)
#define GXIO_MPIPE_OP_SET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x121f)
#define GXIO_MPIPE_OP_ADJUST_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1220)
+#define GXIO_MPIPE_OP_CONFIG_EDMA_RING_BLKS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1221)
#define GXIO_MPIPE_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000)
#define GXIO_MPIPE_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001)
#define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
@@ -114,6 +116,8 @@ int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context,

int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac);

+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac,
+ uint32_t attr, int64_t val);

int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec,
uint64_t * nsec, uint64_t * cycles);
diff --git a/arch/tile/include/gxio/mpipe.h b/arch/tile/include/gxio/mpipe.h
index b74f470..ed742e3 100644
--- a/arch/tile/include/gxio/mpipe.h
+++ b/arch/tile/include/gxio/mpipe.h
@@ -810,7 +810,7 @@ extern int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
/* Initialize an eDMA ring, using the given memory and size.
*
* @param context An initialized mPIPE context.
- * @param ring The eDMA ring index.
+ * @param ering The eDMA ring index.
* @param channel The channel to use. This must be one of the channels
* associated with the context's set of open links.
* @param mem A physically contiguous region of memory to be filled
@@ -823,10 +823,37 @@ extern int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
* ::GXIO_ERR_INVAL_MEMORY_SIZE on failure.
*/
extern int gxio_mpipe_init_edma_ring(gxio_mpipe_context_t *context,
- unsigned int ring, unsigned int channel,
+ unsigned int ering, unsigned int channel,
void *mem, size_t mem_size,
unsigned int mem_flags);

+/* Set the "max_blks", "min_snf_blks", and "db" fields of
+ * ::MPIPE_EDMA_RG_INIT_DAT_THRESH_t for a given edma ring.
+ *
+ * The global pool of dynamic blocks will be automatically adjusted.
+ *
+ * This function should not be called after any egress has been done
+ * on the edma ring.
+ *
+ * Most applications should just use gxio_mpipe_equeue_set_snf_size().
+ *
+ * @param context An initialized mPIPE context.
+ * @param ering The eDMA ring index.
+ * @param max_blks The number of blocks to dedicate to the ring
+ * (normally min_snf_blks + 1). Must be greater than min_snf_blocks.
+ * @param min_snf_blks The number of blocks which must be stored
+ * prior to starting to send the packet (normally 12).
+ * @param db Whether to allow use of dynamic blocks by the ring
+ * (normally 1).
+ *
+ * @return 0 on success, negative on error.
+ */
+extern int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context,
+ unsigned int ering,
+ unsigned int max_blks,
+ unsigned int min_snf_blks,
+ unsigned int db);
+
/*****************************************************************
* Classifier Program *
******************************************************************/
@@ -1288,15 +1315,39 @@ typedef struct {
/* The log2() of the number of entries. */
unsigned long log2_num_entries;

+ /* The context. */
+ gxio_mpipe_context_t *context;
+
+ /* The ering. */
+ unsigned int ering;
+
+ /* The channel. */
+ unsigned int channel;
+
} gxio_mpipe_equeue_t;

/* Initialize an "equeue".
*
- * Takes the equeue plus the same args as gxio_mpipe_init_edma_ring().
+ * This function uses gxio_mpipe_init_edma_ring() to initialize the
+ * underlying edma_ring using the provided arguments.
+ *
+ * @param equeue An egress queue to be initialized.
+ * @param context An initialized mPIPE context.
+ * @param ering The eDMA ring index.
+ * @param channel The channel to use. This must be one of the channels
+ * associated with the context's set of open links.
+ * @param mem A physically contiguous region of memory to be filled
+ * with a ring of ::gxio_mpipe_edesc_t structures.
+ * @param mem_size Number of bytes in the ring. Must be 512, 2048,
+ * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)).
+ * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
+ *
+ * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or
+ * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure.
*/
extern int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
gxio_mpipe_context_t *context,
- unsigned int edma_ring_id,
+ unsigned int ering,
unsigned int channel,
void *mem, unsigned int mem_size,
unsigned int mem_flags);
@@ -1494,6 +1545,37 @@ static inline int gxio_mpipe_equeue_is_complete(gxio_mpipe_equeue_t *equeue,
completion_slot, update);
}

+/* Set the snf (store and forward) size for an equeue.
+ *
+ * The snf size for an equeue defaults to 1536, and encodes the size
+ * of the largest packet for which egress is guaranteed to avoid
+ * transmission underruns and/or corrupt checksums under heavy load.
+ *
+ * The snf size affects a global resource pool which cannot support,
+ * for example, all 24 equeues each requesting an snf size of 8K.
+ *
+ * To ensure that jumbo packets can be egressed properly, the snf size
+ * should be set to the size of the largest possible packet, which
+ * will usually be limited by the size of the app's largest buffer.
+ *
+ * This is a convenience wrapper around
+ * gxio_mpipe_config_edma_ring_blks().
+ *
+ * This function should not be called after any egress has been done
+ * on the equeue.
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param size The snf size, in bytes.
+ * @return Zero on success, negative error otherwise.
+ */
+static inline int gxio_mpipe_equeue_set_snf_size(gxio_mpipe_equeue_t *equeue,
+ size_t size)
+{
+ int blks = (size + 127) / 128;
+ return gxio_mpipe_config_edma_ring_blks(equeue->context, equeue->ering,
+ blks + 1, blks, 1);
+}
+
/*****************************************************************
* Link Management *
******************************************************************/
@@ -1697,6 +1779,17 @@ static inline int gxio_mpipe_link_channel(gxio_mpipe_link_t *link)
return link->channel;
}

+/* Set a link attribute.
+ *
+ * @param link A properly initialized link state object.
+ * @param attr An attribute from the set of @ref gxio_mpipe_link_attrs.
+ * @param val New value of the attribute.
+ * @return 0 if the attribute was successfully set, or a negative error
+ * code.
+ */
+extern int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr,
+ int64_t val);
+
///////////////////////////////////////////////////////////////////
// Timestamp //
///////////////////////////////////////////////////////////////////
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 6085571..39c1e9e 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -76,6 +76,9 @@

#define MAX_FRAGS (MAX_SKB_FRAGS + 1)

+/* The "kinds" of buffer stacks (small/large/jumbo). */
+#define MAX_KINDS 3
+
/* Size of completions data to allocate.
* ISSUE: Probably more than needed since we don't use all the channels.
*/
@@ -141,10 +144,8 @@ struct tile_net_info {
/* NAPI flags. */
bool napi_added;
bool napi_enabled;
- /* Number of small sk_buffs which must still be provided. */
- unsigned int num_needed_small_buffers;
- /* Number of large sk_buffs which must still be provided. */
- unsigned int num_needed_large_buffers;
+ /* Number of buffers (by kind) which must still be provided. */
+ unsigned int num_needed_buffers[MAX_KINDS];
/* A timer for handling egress completions. */
struct hrtimer egress_timer;
/* True if "egress_timer" is scheduled. */
@@ -200,24 +201,25 @@ static DEFINE_PER_CPU(struct tile_net_info, per_cpu_info);
/* The "context" for all devices. */
static gxio_mpipe_context_t context;

-/* Buffer sizes and mpipe enum codes for buffer stacks.
+/* The buffer size enums for each buffer stack.
* See arch/tile/include/gxio/mpipe.h for the set of possible values.
+ * We avoid the "10384" size because it can induce "false chaining"
+ * on "cut-through" jumbo packets.
*/
-#define BUFFER_SIZE_SMALL_ENUM GXIO_MPIPE_BUFFER_SIZE_128
-#define BUFFER_SIZE_SMALL 128
-#define BUFFER_SIZE_LARGE_ENUM GXIO_MPIPE_BUFFER_SIZE_1664
-#define BUFFER_SIZE_LARGE 1664
+static gxio_mpipe_buffer_size_enum_t buffer_size_enums[MAX_KINDS] = {
+ GXIO_MPIPE_BUFFER_SIZE_128,
+ GXIO_MPIPE_BUFFER_SIZE_1664,
+ GXIO_MPIPE_BUFFER_SIZE_16384
+};

-/* The small/large "buffer stacks". */
-static int small_buffer_stack = -1;
-static int large_buffer_stack = -1;
+/* The actual memory allocated for the buffer stacks. */
+static void *buffer_stack_vas[MAX_KINDS];

-/* Amount of memory allocated for each buffer stack. */
-static size_t buffer_stack_size;
+/* The amount of memory allocated for each buffer stack. */
+static size_t buffer_stack_bytes[MAX_KINDS];

-/* The actual memory allocated for the buffer stacks. */
-static void *small_buffer_stack_va;
-static void *large_buffer_stack_va;
+/* The first buffer stack index (small = +0, large = +1, jumbo = +2). */
+static int first_buffer_stack = -1;

/* The buckets. */
static int first_bucket = -1;
@@ -238,6 +240,9 @@ static char *loopify_link_name;
/* If "tile_net.custom" was specified, this is non-NULL. */
static char *custom_str;

+/* If "tile_net.jumbo=NUM" was specified, this is "NUM". */
+static uint jumbo_num;
+
/* The "tile_net.cpus" argument specifies the cpus that are dedicated
* to handle ingress packets.
*
@@ -292,6 +297,12 @@ MODULE_PARM_DESC(loopify, "name the device to use loop0/1 for ingress/egress");
module_param_named(custom, custom_str, charp, 0444);
MODULE_PARM_DESC(custom, "indicates a (heavily) customized classifier");

+/* The "tile_net.jumbo" argument causes us to support "jumbo" packets,
+ * and to allocate the given number of "jumbo" buffers.
+ */
+module_param_named(jumbo, jumbo_num, uint, 0444);
+MODULE_PARM_DESC(jumbo, "the number of buffers to support jumbo packets");
+
/* Atomically update a statistics field.
* Note that on TILE-Gx, this operation is fire-and-forget on the
* issuing core (single-cycle dispatch) and takes only a few cycles
@@ -305,15 +316,15 @@ static void tile_net_stats_add(unsigned long value, unsigned long *field)
}

/* Allocate and push a buffer. */
-static bool tile_net_provide_buffer(bool small)
+static bool tile_net_provide_buffer(int kind)
{
- int stack = small ? small_buffer_stack : large_buffer_stack;
+ gxio_mpipe_buffer_size_enum_t bse = buffer_size_enums[kind];
+ size_t bs = gxio_mpipe_buffer_size_enum_to_buffer_size(bse);
const unsigned long buffer_alignment = 128;
struct sk_buff *skb;
int len;

- len = sizeof(struct sk_buff **) + buffer_alignment;
- len += (small ? BUFFER_SIZE_SMALL : BUFFER_SIZE_LARGE);
+ len = sizeof(struct sk_buff **) + buffer_alignment + bs;
skb = dev_alloc_skb(len);
if (skb == NULL)
return false;
@@ -328,7 +339,7 @@ static bool tile_net_provide_buffer(bool small)
/* Make sure "skb" and the back-pointer have been flushed. */
wmb();

- gxio_mpipe_push_buffer(&context, stack,
+ gxio_mpipe_push_buffer(&context, first_buffer_stack + kind,
(void *)va_to_tile_io_addr(skb->data));

return true;
@@ -369,24 +380,19 @@ static void tile_net_pop_all_buffers(int stack)
static void tile_net_provide_needed_buffers(void)
{
struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
-
- while (info->num_needed_small_buffers != 0) {
- if (!tile_net_provide_buffer(true))
- goto oops;
- info->num_needed_small_buffers--;
- }
-
- while (info->num_needed_large_buffers != 0) {
- if (!tile_net_provide_buffer(false))
- goto oops;
- info->num_needed_large_buffers--;
+ int kind;
+
+ for (kind = 0; kind < MAX_KINDS; kind++) {
+ while (info->num_needed_buffers[kind] != 0) {
+ if (!tile_net_provide_buffer(kind)) {
+ /* Add info to the allocation failure dump. */
+ pr_notice("Tile %d still needs some buffers\n",
+ info->my_cpu);
+ return;
+ }
+ info->num_needed_buffers[kind]--;
+ }
}
-
- return;
-
-oops:
- /* Add a description to the page allocation failure dump. */
- pr_notice("Tile %d still needs some buffers\n", info->my_cpu);
}

static inline bool filter_packet(struct net_device *dev, void *buf)
@@ -426,10 +432,12 @@ static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb,
tile_net_stats_add(len, &priv->stats.rx_bytes);

/* Need a new buffer. */
- if (idesc->size == BUFFER_SIZE_SMALL_ENUM)
- info->num_needed_small_buffers++;
+ if (idesc->size == buffer_size_enums[0])
+ info->num_needed_buffers[0]++;
+ else if (idesc->size == buffer_size_enums[1])
+ info->num_needed_buffers[1]++;
else
- info->num_needed_large_buffers++;
+ info->num_needed_buffers[2]++;
}

/* Handle a packet. Return true if "processed", false if "filtered". */
@@ -437,29 +445,29 @@ static bool tile_net_handle_packet(gxio_mpipe_idesc_t *idesc)
{
struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
struct net_device *dev = tile_net_devs_for_channel[idesc->channel];
+ struct tile_net_priv *priv = netdev_priv(dev);
uint8_t l2_offset;
void *va;
void *buf;
unsigned long len;
bool filter;

- /* Drop packets for which no buffer was available.
- * NOTE: This happens under heavy load.
+ /* Drop packets for which no buffer was available (which can
+ * happen under heavy load), or for which the me/tr/ce flags
+ * are set (which can happen for jumbo cut-through packets,
+ * or with a customized classifier).
*/
- if (idesc->be) {
- struct tile_net_priv *priv = netdev_priv(dev);
- tile_net_stats_add(1, &priv->stats.rx_dropped);
- gxio_mpipe_iqueue_consume(&info->iqueue, idesc);
- if (net_ratelimit())
- pr_info("Dropping packet (insufficient buffers).\n");
- return false;
+ if (idesc->be || idesc->me || idesc->tr || idesc->ce) {
+ if (dev)
+ tile_net_stats_add(1, &priv->stats.rx_errors);
+ goto drop;
}

/* Get the "l2_offset", if allowed. */
l2_offset = custom_str ? 0 : gxio_mpipe_idesc_get_l2_offset(idesc);

- /* Get the raw buffer VA (includes "headroom"). */
- va = tile_io_addr_to_va((unsigned long)(long)idesc->va);
+ /* Get the VA (including NET_IP_ALIGN bytes of "headroom"). */
+ va = tile_io_addr_to_va((unsigned long)idesc->va);

/* Get the actual packet start/length. */
buf = va + l2_offset;
@@ -470,6 +478,9 @@ static bool tile_net_handle_packet(gxio_mpipe_idesc_t *idesc)

filter = filter_packet(dev, buf);
if (filter) {
+ if (dev)
+ tile_net_stats_add(1, &priv->stats.rx_dropped);
+drop:
gxio_mpipe_iqueue_drop(&info->iqueue, idesc);
} else {
struct sk_buff *skb = mpipe_buf_to_skb(va);
@@ -722,86 +733,95 @@ static int tile_net_update(struct net_device *dev)
return 0;
}

-/* Allocate and initialize mpipe buffer stacks, and register them in
- * the mPIPE TLBs, for both small and large packet sizes.
- * This routine supports tile_net_init_mpipe(), below.
- */
-static int init_buffer_stacks(struct net_device *dev, int num_buffers)
+/* Initialize a buffer stack. */
+static int create_buffer_stack(struct net_device *dev,
+ int kind, size_t num_buffers)
{
pte_t hash_pte = pte_set_home((pte_t) { 0 }, PAGE_HOME_HASH);
- int rc;
+ size_t needed = gxio_mpipe_calc_buffer_stack_bytes(num_buffers);
+ int stack_idx = first_buffer_stack + kind;
+ void *va;
+ int i, rc;

- /* Compute stack bytes; we round up to 64KB and then use
- * alloc_pages() so we get the required 64KB alignment as well.
+ /* Round up to 64KB and then use alloc_pages() so we get the
+ * required 64KB alignment.
*/
- buffer_stack_size =
- ALIGN(gxio_mpipe_calc_buffer_stack_bytes(num_buffers),
- 64 * 1024);
-
- /* Allocate two buffer stack indices. */
- rc = gxio_mpipe_alloc_buffer_stacks(&context, 2, 0, 0);
- if (rc < 0) {
- netdev_err(dev, "gxio_mpipe_alloc_buffer_stacks failed: %d\n",
- rc);
- return rc;
- }
- small_buffer_stack = rc;
- large_buffer_stack = rc + 1;
+ buffer_stack_bytes[kind] = ALIGN(needed, 64 * 1024);

- /* Allocate the small memory stack. */
- small_buffer_stack_va =
- alloc_pages_exact(buffer_stack_size, GFP_KERNEL);
- if (small_buffer_stack_va == NULL) {
+ va = alloc_pages_exact(buffer_stack_bytes[kind], GFP_KERNEL);
+ if (va == NULL) {
netdev_err(dev,
- "Could not alloc %zd bytes for buffer stacks\n",
- buffer_stack_size);
+ "Could not alloc %zd bytes for buffer stack %d\n",
+ buffer_stack_bytes[kind], kind);
return -ENOMEM;
}
- rc = gxio_mpipe_init_buffer_stack(&context, small_buffer_stack,
- BUFFER_SIZE_SMALL_ENUM,
- small_buffer_stack_va,
- buffer_stack_size, 0);
+
+ /* Initialize the buffer stack. */
+ rc = gxio_mpipe_init_buffer_stack(&context, stack_idx,
+ buffer_size_enums[kind],
+ va, buffer_stack_bytes[kind], 0);
if (rc != 0) {
netdev_err(dev, "gxio_mpipe_init_buffer_stack: %d\n", rc);
+ free_pages_exact(va, buffer_stack_bytes[kind]);
return rc;
}
- rc = gxio_mpipe_register_client_memory(&context, small_buffer_stack,
+
+ buffer_stack_vas[kind] = va;
+
+ rc = gxio_mpipe_register_client_memory(&context, stack_idx,
hash_pte, 0);
if (rc != 0) {
- netdev_err(dev,
- "gxio_mpipe_register_buffer_memory failed: %d\n",
- rc);
+ netdev_err(dev, "gxio_mpipe_register_client_memory: %d\n", rc);
return rc;
}

- /* Allocate the large buffer stack. */
- large_buffer_stack_va =
- alloc_pages_exact(buffer_stack_size, GFP_KERNEL);
- if (large_buffer_stack_va == NULL) {
- netdev_err(dev,
- "Could not alloc %zd bytes for buffer stacks\n",
- buffer_stack_size);
- return -ENOMEM;
- }
- rc = gxio_mpipe_init_buffer_stack(&context, large_buffer_stack,
- BUFFER_SIZE_LARGE_ENUM,
- large_buffer_stack_va,
- buffer_stack_size, 0);
- if (rc != 0) {
- netdev_err(dev, "gxio_mpipe_init_buffer_stack failed: %d\n",
- rc);
- return rc;
+ /* Provide initial buffers. */
+ for (i = 0; i < num_buffers; i++) {
+ if (!tile_net_provide_buffer(kind)) {
+ netdev_err(dev, "Cannot allocate initial sk_bufs!\n");
+ return -ENOMEM;
+ }
}
- rc = gxio_mpipe_register_client_memory(&context, large_buffer_stack,
- hash_pte, 0);
- if (rc != 0) {
- netdev_err(dev,
- "gxio_mpipe_register_buffer_memory failed: %d\n",
- rc);
+
+ return 0;
+}
+
+/* Allocate and initialize mpipe buffer stacks, and register them in
+ * the mPIPE TLBs, for small, large, and (possibly) jumbo packet sizes.
+ * This routine supports tile_net_init_mpipe(), below.
+ */
+static int init_buffer_stacks(struct net_device *dev,
+ int network_cpus_count)
+{
+ int num_kinds = MAX_KINDS - (jumbo_num == 0);
+ size_t num_buffers;
+ int rc;
+
+ /* Allocate the buffer stacks. */
+ rc = gxio_mpipe_alloc_buffer_stacks(&context, num_kinds, 0, 0);
+ if (rc < 0) {
+ netdev_err(dev, "gxio_mpipe_alloc_buffer_stacks: %d\n", rc);
return rc;
}
+ first_buffer_stack = rc;

- return 0;
+ /* Enough small/large buffers to (normally) avoid buffer errors. */
+ num_buffers =
+ network_cpus_count * (IQUEUE_ENTRIES + TILE_NET_BATCH);
+
+ /* Allocate the small memory stack. */
+ if (rc >= 0)
+ rc = create_buffer_stack(dev, 0, num_buffers);
+
+ /* Allocate the large buffer stack. */
+ if (rc >= 0)
+ rc = create_buffer_stack(dev, 1, num_buffers);
+
+ /* Allocate the jumbo buffer stack if needed. */
+ if (rc >= 0 && jumbo_num != 0)
+ rc = create_buffer_stack(dev, 2, jumbo_num);
+
+ return rc;
}

/* Allocate per-cpu resources (memory for completions and idescs).
@@ -940,13 +960,14 @@ static int tile_net_setup_interrupts(struct net_device *dev)
/* Undo any state set up partially by a failed call to tile_net_init_mpipe. */
static void tile_net_init_mpipe_fail(void)
{
- int cpu;
+ int kind, cpu;

/* Do cleanups that require the mpipe context first. */
- if (small_buffer_stack >= 0)
- tile_net_pop_all_buffers(small_buffer_stack);
- if (large_buffer_stack >= 0)
- tile_net_pop_all_buffers(large_buffer_stack);
+ for (kind = 0; kind < MAX_KINDS; kind++) {
+ if (buffer_stack_vas[kind] != NULL) {
+ tile_net_pop_all_buffers(first_buffer_stack + kind);
+ }
+ }

/* Destroy mpipe context so the hardware no longer owns any memory. */
gxio_mpipe_destroy(&context);
@@ -961,15 +982,15 @@ static void tile_net_init_mpipe_fail(void)
info->iqueue.idescs = NULL;
}

- if (small_buffer_stack_va)
- free_pages_exact(small_buffer_stack_va, buffer_stack_size);
- if (large_buffer_stack_va)
- free_pages_exact(large_buffer_stack_va, buffer_stack_size);
+ for (kind = 0; kind < MAX_KINDS; kind++) {
+ if (buffer_stack_vas[kind] != NULL) {
+ free_pages_exact(buffer_stack_vas[kind],
+ buffer_stack_bytes[kind]);
+ buffer_stack_vas[kind] = NULL;
+ }
+ }

- small_buffer_stack_va = NULL;
- large_buffer_stack_va = NULL;
- large_buffer_stack = -1;
- small_buffer_stack = -1;
+ first_buffer_stack = -1;
first_bucket = -1;
}

@@ -984,7 +1005,7 @@ static void tile_net_init_mpipe_fail(void)
*/
static int tile_net_init_mpipe(struct net_device *dev)
{
- int i, num_buffers, rc;
+ int rc;
int cpu;
int first_ring, ring;
int network_cpus_count = cpus_weight(network_cpus_map);
@@ -1001,27 +1022,10 @@ static int tile_net_init_mpipe(struct net_device *dev)
}

/* Set up the buffer stacks. */
- num_buffers =
- network_cpus_count * (IQUEUE_ENTRIES + TILE_NET_BATCH);
- rc = init_buffer_stacks(dev, num_buffers);
+ rc = init_buffer_stacks(dev, network_cpus_count);
if (rc != 0)
goto fail;

- /* Provide initial buffers. */
- rc = -ENOMEM;
- for (i = 0; i < num_buffers; i++) {
- if (!tile_net_provide_buffer(true)) {
- netdev_err(dev, "Cannot allocate initial sk_bufs!\n");
- goto fail;
- }
- }
- for (i = 0; i < num_buffers; i++) {
- if (!tile_net_provide_buffer(false)) {
- netdev_err(dev, "Cannot allocate initial sk_bufs!\n");
- goto fail;
- }
- }
-
/* Allocate one NotifRing for each network cpu. */
rc = gxio_mpipe_alloc_notif_rings(&context, network_cpus_count, 0, 0);
if (rc < 0) {
@@ -1063,13 +1067,13 @@ fail:
*/
static int tile_net_init_egress(struct net_device *dev, int echannel)
{
+ static int ering = -1;
struct page *headers_page, *edescs_page, *equeue_page;
gxio_mpipe_edesc_t *edescs;
gxio_mpipe_equeue_t *equeue;
unsigned char *headers;
int headers_order, edescs_order, equeue_order;
size_t edescs_size;
- int edma;
int rc = -ENOMEM;

/* Only initialize once. */
@@ -1110,25 +1114,37 @@ static int tile_net_init_egress(struct net_device *dev, int echannel)
}
equeue = pfn_to_kaddr(page_to_pfn(equeue_page));

- /* Allocate an edma ring. Note that in practice this can't
- * fail, which is good, because we will leak an edma ring if so.
- */
- rc = gxio_mpipe_alloc_edma_rings(&context, 1, 0, 0);
- if (rc < 0) {
- netdev_warn(dev, "gxio_mpipe_alloc_edma_rings failed: %d\n",
- rc);
- goto fail_equeue;
+ /* Allocate an edma ring (using a one entry "free list"). */
+ if (ering < 0) {
+ rc = gxio_mpipe_alloc_edma_rings(&context, 1, 0, 0);
+ if (rc < 0) {
+ netdev_warn(dev, "gxio_mpipe_alloc_edma_rings: %d\n",
+ rc);
+ goto fail_equeue;
+ }
+ ering = rc;
}
- edma = rc;

/* Initialize the equeue. */
- rc = gxio_mpipe_equeue_init(equeue, &context, edma, echannel,
+ rc = gxio_mpipe_equeue_init(equeue, &context, ering, echannel,
edescs, edescs_size, 0);
if (rc != 0) {
netdev_err(dev, "gxio_mpipe_equeue_init failed: %d\n", rc);
goto fail_equeue;
}

+ /* Don't reuse the ering later. */
+ ering = -1;
+
+ if (jumbo_num != 0) {
+ /* Make sure "jumbo" packets can be egressed safely. */
+ if (gxio_mpipe_equeue_set_snf_size(equeue, 10368) < 0) {
+ /* ISSUE: There is no "gxio_mpipe_equeue_destroy()". */
+ netdev_warn(dev, "Jumbo packets may not be egressed"
+ " properly on channel %d\n", echannel);
+ }
+ }
+
/* Done. */
egress_for_echannel[echannel].equeue = equeue;
egress_for_echannel[echannel].headers = headers;
@@ -1156,6 +1172,17 @@ static int tile_net_link_open(struct net_device *dev, gxio_mpipe_link_t *link,
netdev_err(dev, "Failed to open '%s'\n", link_name);
return rc;
}
+ if (jumbo_num != 0) {
+ u32 attr = GXIO_MPIPE_LINK_RECEIVE_JUMBO;
+ rc = gxio_mpipe_link_set_attr(link, attr, 1);
+ if (rc != 0) {
+ netdev_err(dev,
+ "Cannot receive jumbo packets on '%s'\n",
+ link_name);
+ gxio_mpipe_link_close(link);
+ return rc;
+ }
+ }
rc = gxio_mpipe_link_channel(link);
if (rc < 0 || rc >= TILE_NET_CHANNELS) {
netdev_err(dev, "gxio_mpipe_link_channel bad value: %d\n", rc);
@@ -1499,8 +1526,8 @@ static void tso_egress(struct net_device *dev, gxio_mpipe_equeue_t *equeue,
edesc_head.xfer_size = sh_len;

/* This is only used to specify the TLB. */
- edesc_head.stack_idx = large_buffer_stack;
- edesc_body.stack_idx = large_buffer_stack;
+ edesc_head.stack_idx = first_buffer_stack;
+ edesc_body.stack_idx = first_buffer_stack;

/* Egress all the edescs. */
for (segment = 0; segment < sh->gso_segs; segment++) {
@@ -1660,7 +1687,7 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
num_edescs = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));

/* This is only used to specify the TLB. */
- edesc.stack_idx = large_buffer_stack;
+ edesc.stack_idx = first_buffer_stack;

/* Prepare the edescs. */
for (i = 0; i < num_edescs; i++) {
@@ -1740,7 +1767,9 @@ static struct net_device_stats *tile_net_get_stats(struct net_device *dev)
/* Change the MTU. */
static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
{
- if ((new_mtu < 68) || (new_mtu > 1500))
+ if (new_mtu < 68)
+ return -EINVAL;
+ if (new_mtu > ((jumbo_num != 0) ? 9000 : 1500))
return -EINVAL;
dev->mtu = new_mtu;
return 0;
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/