Re: [net-next v5 1/2] seg6: inherit DSCP of inner IPv4 packets

From: David Ahern
Date: Wed Aug 26 2020 - 15:41:12 EST


On 8/26/20 6:12 AM, Ahmed Abdelsalam wrote:
>
> On 26/08/2020 02:45, David Ahern wrote:
>> On 8/25/20 5:45 PM, Ahmed Abdelsalam wrote:
>>>
>>> Hi David
>>>
>>> The seg6 encap is implemented through the seg6_lwt rather than
>>> seg6_local_lwt.
>>
>> ok. I don't know the seg6 code; just taking a guess from a quick look.
>>
>>> We can add a flag(SEG6_IPTUNNEL_DSCP) in seg6_iptunnel.h if we do not
>>> want to go the sysctl direction.
>>
>> sysctl is just a big hammer with side effects.
>>
>> It struck me that the DSCP propagation is very similar to the TTL
>> propagation with MPLS which is per route entry (MPLS_IPTUNNEL_TTL and
>> stored as ttl_propagate in mpls_iptunnel_encap). Hence the question of
>> whether SR could make this a per route attribute. Consistency across
>> implementations is best.
>> SRv6 does not have an issue of having this per route.
> Actually, as SRv6 leverage IPv6 encapsulation, I would say it should
> consistent with ip6_tunnel not MPLS.
>
> In ip6_tunnel, both ttl and flowinfo (tclass and flowlabel) are provided.
>
> Ideally, SRv6 code should have done the same with:
> TTL       := VLAUE | DEFAULT | inherit.
> TCLASS    := 0x00 .. 0xFF | inherit
> FLOWLABEL := { 0x00000 .. 0xfffff | inherit | compute.
>

New attributes get added all the time. Why does something like this now
work for these features:

diff --git a/include/uapi/linux/seg6_iptunnel.h
b/include/uapi/linux/seg6_iptunnel.h
index eb815e0d0ac3..b628333ba100 100644
--- a/include/uapi/linux/seg6_iptunnel.h
+++ b/include/uapi/linux/seg6_iptunnel.h
@@ -20,6 +20,8 @@
enum {
SEG6_IPTUNNEL_UNSPEC,
SEG6_IPTUNNEL_SRH,
+ SEG6_IPTUNNEL_TTL, /* u8 */
+ SEG6_IPTUNNEL_TCLASS, /* u8 */
__SEG6_IPTUNNEL_MAX,
};
#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1)
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..7cb512b65bc3 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -46,6 +46,11 @@ static size_t seg6_lwt_headroom(struct
seg6_iptunnel_encap *tuninfo)

struct seg6_lwt {
struct dst_cache cache;
+ u8 ttl_propagate; /* propagate ttl from inner header */
+ u8 default_ttl; /* ttl value to use */
+ u8 tclass_inherit; /* inherit tclass from inner header */
+ u8 tclass; /* tclass value to use */
+
struct seg6_iptunnel_encap tuninfo[];
};

@@ -61,7 +66,10 @@ seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
}

static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX +
1] = {
- [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
+ [SEG6_IPTUNNEL_UNSPEC] = { .strict_start_type =
SEG6_IPTUNNEL_SRH + 1 },
+ [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
+ [SEG6_IPTUNNEL_TTL] = { .type = NLA_U8 },
+ [SEG6_IPTUNNEL_TCLASS] = { .type = NLA_U8 },
};

static int nla_put_srh(struct sk_buff *skb, int attrtype,
@@ -460,6 +468,22 @@ static int seg6_build_state(struct net *net, struct
nlattr *nla,

memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);

+ if (tb[SEG6_IPTUNNEL_TTL]) {
+ slwt->default_ttl = nla_get_u8(tb[SEG6_IPTUNNEL_TTL]);
+ slwt->ttl_propagate = slwt->default_ttl ? 0 : 1;
+ }
+ if (tb[SEG6_IPTUNNEL_TCLASS]) {
+ u32 tmp = nla_get_u32(tb[SEG6_IPTUNNEL_TCLASS]);
+
+ if (tmp == (u32)-1) {
+ slwt->tclass_inherit = true;
+ } else if (tmp & <some valid range mask>) {
+ error
+ } else {
+ slwt->tclass = ...
+ }
+ }
+
newts->type = LWTUNNEL_ENCAP_SEG6;
newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;


And the use the values in slwt as needed.