[PATCH V2 net-next] net: fec: add CBS offload support

From: wei . fang
Date: Mon Feb 13 2023 - 04:35:23 EST


From: Wei Fang <wei.fang@xxxxxxx>

The FEC hardware supports the Credit-based shaper (CBS) which control
the bandwidth distribution between normal traffic and time-sensitive
traffic with respect to the total link bandwidth available.
But notice that the bandwidth allocation of hardware is restricted to
certain values. Below is the equation which is used to calculate the
BW (bandwidth) fraction for per class:
BW fraction = 1 / (1 + 512 / idle_slope)

For values of idle_slope less than 128, idle_slope = 2 ^ n, when n =
0,1,2,...,6. For values equal to or greater than 128, idle_slope =
128 * m, where m = 1,2,3,...,12.
Example 1. idle_slope = 64, therefore BW fraction = 0.111.
Example 2. idle_slope = 128, therefore BW fraction = 0.200.

Here is an example command to set 200Mbps bandwidth on 1000Mbps port
for TC 2 and 111Mbps for TC 3.
tc qdisc add dev eth0 parent root handle 100 mqprio num_tc 3 map \
0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@0 1@1 1@2 hw 0
tc qdisc replace dev eth0 parent 100:2 cbs idleslope 200000 \
sendslope -800000 hicredit 153 locredit -1389 offload 1
tc qdisc replace dev eth0 parent 100:3 cbs idleslope 111000 \
sendslope -889000 hicredit 90 locredit -892 offload 1

Signed-off-by: Wei Fang <wei.fang@xxxxxxx>
---
V2 changes:
1. Based on Simon's suggestion, modified the description in
fec_enet_get_idle_slope to make it more clear.
2. Adopted Simon's suggestion to use macro DIV_ROUND_CLOSEST to calculate
idle_slope. And also amended some nits.
3. According to Andrew's comments, the speed may be equal to 0 when the
link is not up, so added a check to see if speed is equal to 0. In
addtion, the change in link speed also need to be taken into account.
Considering that the change of link speed has invalidated the original
configuration, so we just fall back to the default setting.
4. Considering that some events will cause the MAC reset and clear the CBS
registers (such as link status change, transmit timeout, checksum offload
feature change and so on), so reconfigure the CBS in fec_restart.
5. Added more checks for parameters passed in from user space.
---
drivers/net/ethernet/freescale/fec.h | 13 ++
drivers/net/ethernet/freescale/fec_main.c | 179 ++++++++++++++++++++++
2 files changed, 192 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 5ba1e0d71c68..5383681ac273 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -340,6 +340,10 @@ struct bufdesc_ex {
#define RCMR_CMP(X) (((X) == 1) ? RCMR_CMP_1 : RCMR_CMP_2)
#define FEC_TX_BD_FTYPE(X) (((X) & 0xf) << 20)

+#define FEC_QOS_TX_SHEME_MASK GENMASK(2, 0)
+#define CREDIT_BASED_SCHEME 0
+#define ROUND_ROBIN_SCHEME 1
+
/* The number of Tx and Rx buffers. These are allocated from the page
* pool. The code may assume these are power of two, so it it best
* to keep them that size.
@@ -571,6 +575,12 @@ struct fec_stop_mode_gpr {
u8 bit;
};

+struct fec_cbs_params {
+ bool enable[FEC_ENET_MAX_TX_QS];
+ int idleslope[FEC_ENET_MAX_TX_QS];
+ int sendslope[FEC_ENET_MAX_TX_QS];
+};
+
/* The FEC buffer descriptors track the ring buffers. The rx_bd_base and
* tx_bd_base always point to the base of the buffer descriptors. The
* cur_rx and cur_tx point to the currently available buffer.
@@ -679,6 +689,9 @@ struct fec_enet_private {
/* XDP BPF Program */
struct bpf_prog *xdp_prog;

+ /* CBS parameters */
+ struct fec_cbs_params cbs;
+
u64 ethtool_stats[];
};

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c73e25f8995e..91394ad05121 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -66,6 +66,7 @@
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
#include <soc/imx/cpuidle.h>
+#include <net/pkt_sched.h>
#include <linux/filter.h>
#include <linux/bpf.h>

@@ -1023,6 +1024,174 @@ static void fec_enet_reset_skb(struct net_device *ndev)
}
}

+static u32 fec_enet_get_idle_slope(u8 bw)
+{
+ int msb, power;
+ u32 idle_slope;
+
+ if (bw >= 100)
+ return 0;
+
+ /* Convert bw to hardware idle slope */
+ idle_slope = (512 * bw) / (100 - bw);
+
+ if (idle_slope >= 128) {
+ /* For values greater than or equal to 128, idle_slope
+ * rounded to the nearest multiple of 128.
+ */
+ idle_slope = DIV_ROUND_CLOSEST(idle_slope, 128U) * 128U;
+
+ return idle_slope;
+ }
+
+ /* For values less than 128, idle_slope is rounded to
+ * nearst power of 2.
+ */
+ if (idle_slope <= 1)
+ return 1;
+
+ msb = __fls(idle_slope);
+ power = BIT(msb);
+ idle_slope = DIV_ROUND_CLOSEST(idle_slope, power) * power;
+
+ return idle_slope;
+}
+
+static void fec_enet_set_cbs_idle_slope(struct fec_enet_private *fep)
+{
+ u32 bw, val, idle_slope;
+ int speed = fep->speed;
+ int idle_slope_sum = 0;
+ int i;
+
+ if (!speed)
+ return;
+
+ for (i = 1; i < FEC_ENET_MAX_TX_QS; i++) {
+ int port_tx_rate;
+
+ /* As defined in IEEE 802.1Q-2014 Section 8.6.8.2 item:
+ * sendslope = idleslope - port_tx_rate
+ * So we need to check whether port_tx_rate is equal to
+ * the current link rate.
+ */
+ port_tx_rate = fep->cbs.idleslope[i] - fep->cbs.sendslope[i];
+ if (port_tx_rate != speed * 1000)
+ return;
+
+ idle_slope_sum += fep->cbs.idleslope[i];
+ }
+
+ /* The all bandwidth of Queue 1 and Queue 2 can't greater than
+ * the link rate.
+ */
+ if (idle_slope_sum > speed * 1000)
+ return;
+
+ /* idleslope is in kilobits per second.
+ * speed is the port rate in megabits per second.
+ * So bandwidth the ratio, bw, is idleslope / (speed * 1000) * 100,
+ * the unit of bw is percentage.
+ */
+ for (i = 1; i < FEC_ENET_MAX_TX_QS; i++) {
+ bw = fep->cbs.idleslope[i] / (speed * 10);
+ idle_slope = fec_enet_get_idle_slope(bw);
+
+ val = readl(fep->hwp + FEC_DMA_CFG(i));
+ val &= ~IDLE_SLOPE_MASK;
+ val |= idle_slope & IDLE_SLOPE_MASK;
+ writel(val, fep->hwp + FEC_DMA_CFG(i));
+ }
+
+ /* Enable Credit-based shaper. */
+ val = readl(fep->hwp + FEC_QOS_SCHEME);
+ val &= ~FEC_QOS_TX_SHEME_MASK;
+ val |= CREDIT_BASED_SCHEME;
+ writel(val, fep->hwp + FEC_QOS_SCHEME);
+}
+
+static int fec_enet_setup_tc_cbs(struct net_device *ndev, void *type_data)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ struct tc_cbs_qopt_offload *cbs = type_data;
+ int queue = cbs->queue;
+ int speed = fep->speed;
+ int queue2;
+
+ if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
+ return -EOPNOTSUPP;
+
+ /* Queue 1 for Class A, Queue 2 for Class B, so the ENET must
+ * have three queues.
+ */
+ if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
+ return -EOPNOTSUPP;
+
+ if (!speed) {
+ netdev_err(ndev, "Link speed is 0!\n");
+ return -ECANCELED;
+ }
+
+ /* Queue 0 is not AVB capable */
+ if (queue <= 0 || queue >= fep->num_tx_queues) {
+ netdev_err(ndev, "The queue: %d is invalid!\n", queue);
+ return -EINVAL;
+ }
+
+ if (!cbs->enable) {
+ u32 val;
+
+ val = readl(fep->hwp + FEC_QOS_SCHEME);
+ val &= ~FEC_QOS_TX_SHEME_MASK;
+ val |= ROUND_ROBIN_SCHEME;
+ writel(val, fep->hwp + FEC_QOS_SCHEME);
+
+ memset(&fep->cbs, 0, sizeof(fep->cbs));
+
+ return 0;
+ }
+
+ if (cbs->idleslope - cbs->sendslope != speed * 1000 ||
+ cbs->idleslope <= 0 || cbs->sendslope >= 0)
+ return -EINVAL;
+
+ /* Another AVB queue */
+ queue2 = (queue == 1) ? 2 : 1;
+ if (cbs->idleslope + fep->cbs.idleslope[queue2] > speed * 1000) {
+ netdev_err(ndev,
+ "The sum of all idle slope can't exceed link speed!\n");
+ return -EINVAL;
+ }
+
+ fep->cbs.enable[queue] = true;
+ fep->cbs.idleslope[queue] = cbs->idleslope;
+ fep->cbs.sendslope[queue] = cbs->sendslope;
+ /* We need to configure the credit-based shaper of hardware after
+ * the CBS parameters of queue 1 and queue 2 are both configured.
+ * Avoid parameter conflicts between queue 1 and queue 2, causing
+ * one of the queues to fail to be configured. Additionally, once
+ * the FEC_QOS_SCHEME field is set to credit-based scheme, queue 1
+ * and queue 2 are taking effective as AVB queues immediately. So
+ * it's better to set credit-based shaper after both queues are
+ * configured.
+ */
+ if (fep->cbs.enable[queue2])
+ fec_enet_set_cbs_idle_slope(fep);
+
+ return 0;
+}
+
+static int fec_enet_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_QDISC_CBS:
+ return fec_enet_setup_tc_cbs(ndev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
/*
* This function is called to start or restart the FEC during a link
* change, transmit timeout, or to reconfigure the FEC. The network
@@ -1173,6 +1342,15 @@ fec_restart(struct net_device *ndev)

writel(rcntl, fep->hwp + FEC_R_CNTRL);

+ /* We need to reconfigure the CBS due to some events will cause
+ * the MAC reset such as link change, transmit timeout, checksum
+ * feature change and so on.
+ */
+ if (fep->quirks & FEC_QUIRK_HAS_AVB &&
+ fep->num_tx_queues == FEC_ENET_MAX_TX_QS &&
+ fep->cbs.enable[1] && fep->cbs.enable[2])
+ fec_enet_set_cbs_idle_slope(fep);
+
/* Setup multicast filter. */
set_multicast_list(ndev);
#ifndef CONFIG_M5272
@@ -3882,6 +4060,7 @@ static const struct net_device_ops fec_netdev_ops = {
.ndo_tx_timeout = fec_timeout,
.ndo_set_mac_address = fec_set_mac_address,
.ndo_eth_ioctl = fec_enet_ioctl,
+ .ndo_setup_tc = fec_enet_setup_tc,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = fec_poll_controller,
#endif
--
2.25.1