Re: Linux 5.1.16

From: Greg KH
Date: Wed Jul 03 2019 - 08:30:26 EST


diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index 6c42c75103eb..6361fb01c9c1 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -218,5 +218,4 @@ All other architectures should build just fine too - but they won't have
the new syscalls yet.

Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
-inline function before writing up the syscalls (that function returns
--ENOSYS right now).
+inline function before writing up the syscalls.
diff --git a/Makefile b/Makefile
index d7b3c8e3ff3e..46a0ae537182 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 1
-SUBLEVEL = 15
+SUBLEVEL = 16
EXTRAVERSION =
NAME = Shy Crocodile

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 8fbd583b18e1..e9d2e578cbe6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -51,7 +51,7 @@ endif

KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS += -Wno-psabi
+KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)

KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 2d78ea6932b7..bdb3c05070a2 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -134,7 +134,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
: "memory");
uaccess_disable();

- *uval = val;
+ if (!ret)
+ *uval = val;
+
return ret;
}

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 9c01f04db64d..f71b84d9f294 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -277,6 +277,7 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
@@ -394,6 +395,13 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register state,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size);
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant,
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 7820a4a688fa..9e2b5882cdeb 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
state);
}

+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size)
+{
+ u32 insn = aarch64_insn_get_ldadd_value();
+
+ switch (size) {
+ case AARCH64_INSN_SIZE_32:
+ case AARCH64_INSN_SIZE_64:
+ break;
+ default:
+ pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_ldst_size(size, insn);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+ result);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ address);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+ value);
+}
+
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size)
+{
+ /*
+ * STADD is simply encoded as an alias for LDADD with XZR as
+ * the destination register.
+ */
+ return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
+ value, size);
+}
+
static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
enum aarch64_insn_prfm_policy policy,
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 6c881659ee8a..76606e87233f 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -100,6 +100,10 @@
#define A64_STXR(sf, Rt, Rn, Rs) \
A64_LSX(sf, Rt, Rn, Rs, STORE_EX)

+/* LSE atomics */
+#define A64_STADD(sf, Rn, Rs) \
+ aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
+
/* Add/subtract (immediate) */
#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a1420626fca2..df845cee438e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -365,7 +365,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
BPF_CLASS(code) == BPF_JMP;
const bool isdw = BPF_SIZE(code) == BPF_DW;
- u8 jmp_cond;
+ u8 jmp_cond, reg;
s32 jmp_offset;

#define check_imm(bits, imm) do { \
@@ -756,18 +756,28 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
}
break;
+
/* STX XADD: lock *(u32 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_W:
/* STX XADD: lock *(u64 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_DW:
- emit_a64_mov_i(1, tmp, off, ctx);
- emit(A64_ADD(1, tmp, tmp, dst), ctx);
- emit(A64_LDXR(isdw, tmp2, tmp), ctx);
- emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
- emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
- jmp_offset = -3;
- check_imm19(jmp_offset);
- emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+ if (!off) {
+ reg = dst;
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_ADD(1, tmp, tmp, dst), ctx);
+ reg = tmp;
+ }
+ if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
+ emit(A64_STADD(isdw, reg, src), ctx);
+ } else {
+ emit(A64_LDXR(isdw, tmp2, reg), ctx);
+ emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
+ emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
+ jmp_offset = -3;
+ check_imm19(jmp_offset);
+ emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+ }
break;

default:
diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h
index 558059a8f218..0277b56157af 100644
--- a/arch/mips/include/asm/mips-gic.h
+++ b/arch/mips/include/asm/mips-gic.h
@@ -314,6 +314,36 @@ static inline bool mips_gic_present(void)
return IS_ENABLED(CONFIG_MIPS_GIC) && mips_gic_base;
}

+/**
+ * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset
+ * @intr: A GIC local interrupt
+ *
+ * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register
+ * within the block of GIC map registers. This is almost the same as the order
+ * of interrupts in the pending & mask registers, as used by enum
+ * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the
+ * interrupts after it...
+ *
+ * Return: The map register index corresponding to @intr.
+ *
+ * The return value is suitable for use with the (read|write)_gic_v[lo]_map
+ * accessor functions.
+ */
+static inline unsigned int
+mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr)
+{
+ /* WD, Compare & Timer are 1:1 */
+ if (intr <= GIC_LOCAL_INT_TIMER)
+ return intr;
+
+ /* FDC moves to after Timer... */
+ if (intr == GIC_LOCAL_INT_FDC)
+ return GIC_LOCAL_INT_TIMER + 1;
+
+ /* As a result everything else is offset by 1 */
+ return intr + 1;
+}
+
/**
* gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq
*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03b4cc0ec3a7..66ca906aa790 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -835,6 +835,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
break;
}

+ /*
+ * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+ * bit in the mask to allow guests to use the mitigation even in the
+ * case where the host does not enable it.
+ */
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ }
+
/*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
}
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index f53658dde639..dfc36cd56676 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -793,13 +793,16 @@ static struct syscore_ops mc_syscore_ops = {
.resume = mc_bp_resume,
};

-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
{
- struct device *dev;
-
- dev = get_cpu_device(cpu);
microcode_update_cpu(cpu);
pr_debug("CPU%d added\n", cpu);
+ return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);

if (sysfs_create_group(&dev->kobj, &mc_attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
@@ -876,7 +879,9 @@ int __init microcode_init(void)
goto out_ucode_group;

register_syscore_ops(&mc_syscore_ops);
- cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
+ cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+ mc_cpu_starting, NULL);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
mc_cpu_online, mc_cpu_down_prep);

pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index c51b56e29948..f70a617b31b0 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- u32 sw_shareable = 0, hw_shareable = 0;
- u32 exclusive = 0, pseudo_locked = 0;
+ /*
+ * Use unsigned long even though only 32 bits are used to ensure
+ * test_bit() is used safely.
+ */
+ unsigned long sw_shareable = 0, hw_shareable = 0;
+ unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
@@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
}
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
- hwb = test_bit(i, (unsigned long *)&hw_shareable);
- swb = test_bit(i, (unsigned long *)&sw_shareable);
- excl = test_bit(i, (unsigned long *)&exclusive);
- psl = test_bit(i, (unsigned long *)&pseudo_locked);
+ hwb = test_bit(i, &hw_shareable);
+ swb = test_bit(i, &sw_shareable);
+ excl = test_bit(i, &exclusive);
+ psl = test_bit(i, &pseudo_locked);
if (hwb && swb)
seq_putc(seq, 'X');
else if (hwb && !swb)
@@ -2494,26 +2498,19 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
*/
static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
{
- /*
- * Convert the u32 _val to an unsigned long required by all the bit
- * operations within this function. No more than 32 bits of this
- * converted value can be accessed because all bit operations are
- * additionally provided with cbm_len that is initialized during
- * hardware enumeration using five bits from the EAX register and
- * thus never can exceed 32 bits.
- */
- unsigned long *val = (unsigned long *)_val;
+ unsigned long val = *_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;

- if (*val == 0)
+ if (val == 0)
return;

- first_bit = find_first_bit(val, cbm_len);
- zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);

/* Clear any remaining bits to ensure contiguous region */
- bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+ bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+ *_val = (u32)val;
}

/**
diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c
index 8281dfbf38c2..5bed36e12951 100644
--- a/drivers/clk/socfpga/clk-s10.c
+++ b/drivers/clk/socfpga/clk-s10.c
@@ -103,9 +103,9 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = {
{ STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux),
0, 0, 0, 0x3C, 1},
{ STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux),
- 0, 0, 4, 0xB0, 0},
+ 0, 0, 2, 0xB0, 0},
{ STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux),
- 0, 0, 4, 0xB0, 1},
+ 0, 0, 2, 0xB0, 1},
{ STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux,
ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2},
{ STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux,
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 7545af763d7a..af4ace8f7369 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -3377,6 +3377,8 @@ static struct tegra_clk_init_table init_table[] __initdata = {
{ TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
{ TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
{ TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
+ { TEGRA210_CLK_HDA, TEGRA210_CLK_PLL_P, 51000000, 0 },
+ { TEGRA210_CLK_HDA2CODEC_2X, TEGRA210_CLK_PLL_P, 48000000, 0 },
/* This MUST be the last entry. */
{ TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 },
};
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 55b77c576c42..6d9995cbf651 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -1007,14 +1007,16 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)

/* first try to find a slot in an existing linked list entry */
for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
- rsv = __va(prsv);
+ rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size);
if (index < rsv->size) {
rsv->entry[index].base = addr;
rsv->entry[index].size = size;

+ memunmap(rsv);
return 0;
}
+ memunmap(rsv);
}

/* no slot found - allocate a new linked list entry */
@@ -1022,7 +1024,13 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
if (!rsv)
return -ENOMEM;

- rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE);
+ /*
+ * The memremap() call above assumes that a linux_efi_memreserve entry
+ * never crosses a page boundary, so let's ensure that this remains true
+ * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by
+ * using SZ_4K explicitly in the size calculation below.
+ */
+ rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K);
atomic_set(&rsv->count, 1);
rsv->entry[0].base = addr;
rsv->entry[0].size = size;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a67a63b5aa84..0c4a76bca5c6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -280,7 +280,8 @@ struct drm_i915_display_funcs {
void (*get_cdclk)(struct drm_i915_private *dev_priv,
struct intel_cdclk_state *cdclk_state);
void (*set_cdclk)(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state);
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe);
int (*get_fifo_size)(struct drm_i915_private *dev_priv,
enum i9xx_plane_id i9xx_plane);
int (*compute_pipe_wm)(struct intel_crtc_state *cstate);
@@ -1622,6 +1623,8 @@ struct drm_i915_private {
struct intel_cdclk_state actual;
/* The current hardware cdclk state */
struct intel_cdclk_state hw;
+
+ int force_min_cdclk;
} cdclk;

/**
@@ -1741,6 +1744,7 @@ struct drm_i915_private {
*
*/
struct mutex av_mutex;
+ int audio_power_refcount;

struct {
struct mutex mutex;
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 5104c6bbd66f..2f3fd5bf0f11 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -741,15 +741,71 @@ void intel_init_audio_hooks(struct drm_i915_private *dev_priv)
}
}

+static void glk_force_audio_cdclk(struct drm_i915_private *dev_priv,
+ bool enable)
+{
+ struct drm_modeset_acquire_ctx ctx;
+ struct drm_atomic_state *state;
+ int ret;
+
+ drm_modeset_acquire_init(&ctx, 0);
+ state = drm_atomic_state_alloc(&dev_priv->drm);
+ if (WARN_ON(!state))
+ return;
+
+ state->acquire_ctx = &ctx;
+
+retry:
+ to_intel_atomic_state(state)->cdclk.force_min_cdclk_changed = true;
+ to_intel_atomic_state(state)->cdclk.force_min_cdclk =
+ enable ? 2 * 96000 : 0;
+
+ /*
+ * Protects dev_priv->cdclk.force_min_cdclk
+ * Need to lock this here in case we have no active pipes
+ * and thus wouldn't lock it during the commit otherwise.
+ */
+ ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
+ &ctx);
+ if (!ret)
+ ret = drm_atomic_commit(state);
+
+ if (ret == -EDEADLK) {
+ drm_atomic_state_clear(state);
+ drm_modeset_backoff(&ctx);
+ goto retry;
+ }
+
+ WARN_ON(ret);
+
+ drm_atomic_state_put(state);
+
+ drm_modeset_drop_locks(&ctx);
+ drm_modeset_acquire_fini(&ctx);
+}
+
static void i915_audio_component_get_power(struct device *kdev)
{
- intel_display_power_get(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO);
+ struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
+
+ intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
+
+ /* Force CDCLK to 2*BCLK as long as we need audio to be powered. */
+ if (dev_priv->audio_power_refcount++ == 0)
+ if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+ glk_force_audio_cdclk(dev_priv, true);
}

static void i915_audio_component_put_power(struct device *kdev)
{
- intel_display_power_put_unchecked(kdev_to_i915(kdev),
- POWER_DOMAIN_AUDIO);
+ struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
+
+ /* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */
+ if (--dev_priv->audio_power_refcount == 0)
+ if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+ glk_force_audio_cdclk(dev_priv, false);
+
+ intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_AUDIO);
}

static void i915_audio_component_codec_wake_override(struct device *kdev,
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 15ba950dee00..00f76261924c 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -516,7 +516,8 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv)
}

static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
int cdclk = cdclk_state->cdclk;
u32 val, cmd = cdclk_state->voltage_level;
@@ -598,7 +599,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
}

static void chv_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
int cdclk = cdclk_state->cdclk;
u32 val, cmd = cdclk_state->voltage_level;
@@ -697,7 +699,8 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv,
}

static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
int cdclk = cdclk_state->cdclk;
u32 val;
@@ -987,7 +990,8 @@ static void skl_dpll0_disable(struct drm_i915_private *dev_priv)
}

static void skl_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
int cdclk = cdclk_state->cdclk;
int vco = cdclk_state->vco;
@@ -1158,7 +1162,7 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv)
cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco);
cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk);

- skl_set_cdclk(dev_priv, &cdclk_state);
+ skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
}

/**
@@ -1176,7 +1180,7 @@ void skl_uninit_cdclk(struct drm_i915_private *dev_priv)
cdclk_state.vco = 0;
cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk);

- skl_set_cdclk(dev_priv, &cdclk_state);
+ skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
}

static int bxt_calc_cdclk(int min_cdclk)
@@ -1355,7 +1359,8 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco)
}

static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
int cdclk = cdclk_state->cdclk;
int vco = cdclk_state->vco;
@@ -1408,11 +1413,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
bxt_de_pll_enable(dev_priv, vco);

val = divider | skl_cdclk_decimal(cdclk);
- /*
- * FIXME if only the cd2x divider needs changing, it could be done
- * without shutting off the pipe (if only one pipe is active).
- */
- val |= BXT_CDCLK_CD2X_PIPE_NONE;
+ if (pipe == INVALID_PIPE)
+ val |= BXT_CDCLK_CD2X_PIPE_NONE;
+ else
+ val |= BXT_CDCLK_CD2X_PIPE(pipe);
/*
* Disable SSA Precharge when CD clock frequency < 500 MHz,
* enable otherwise.
@@ -1421,6 +1425,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
I915_WRITE(CDCLK_CTL, val);

+ if (pipe != INVALID_PIPE)
+ intel_wait_for_vblank(dev_priv, pipe);
+
mutex_lock(&dev_priv->pcu_lock);
/*
* The timeout isn't specified, the 2ms used here is based on
@@ -1525,7 +1532,7 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv)
}
cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk);

- bxt_set_cdclk(dev_priv, &cdclk_state);
+ bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
}

/**
@@ -1543,7 +1550,7 @@ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv)
cdclk_state.vco = 0;
cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk);

- bxt_set_cdclk(dev_priv, &cdclk_state);
+ bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
}

static int cnl_calc_cdclk(int min_cdclk)
@@ -1663,7 +1670,8 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco)
}

static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
int cdclk = cdclk_state->cdclk;
int vco = cdclk_state->vco;
@@ -1704,13 +1712,15 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
cnl_cdclk_pll_enable(dev_priv, vco);

val = divider | skl_cdclk_decimal(cdclk);
- /*
- * FIXME if only the cd2x divider needs changing, it could be done
- * without shutting off the pipe (if only one pipe is active).
- */
- val |= BXT_CDCLK_CD2X_PIPE_NONE;
+ if (pipe == INVALID_PIPE)
+ val |= BXT_CDCLK_CD2X_PIPE_NONE;
+ else
+ val |= BXT_CDCLK_CD2X_PIPE(pipe);
I915_WRITE(CDCLK_CTL, val);

+ if (pipe != INVALID_PIPE)
+ intel_wait_for_vblank(dev_priv, pipe);
+
/* inform PCU of the change */
mutex_lock(&dev_priv->pcu_lock);
sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
@@ -1847,7 +1857,8 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk)
}

static void icl_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
unsigned int cdclk = cdclk_state->cdclk;
unsigned int vco = cdclk_state->vco;
@@ -1872,6 +1883,11 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
if (dev_priv->cdclk.hw.vco != vco)
cnl_cdclk_pll_enable(dev_priv, vco);

+ /*
+ * On ICL CD2X_DIV can only be 1, so we'll never end up changing the
+ * divider here synchronized to a pipe while CDCLK is on, nor will we
+ * need the corresponding vblank wait.
+ */
I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE |
skl_cdclk_decimal(cdclk));

@@ -2002,7 +2018,7 @@ void icl_init_cdclk(struct drm_i915_private *dev_priv)
sanitized_state.voltage_level =
icl_calc_voltage_level(sanitized_state.cdclk);

- icl_set_cdclk(dev_priv, &sanitized_state);
+ icl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE);
}

/**
@@ -2020,7 +2036,7 @@ void icl_uninit_cdclk(struct drm_i915_private *dev_priv)
cdclk_state.vco = 0;
cdclk_state.voltage_level = icl_calc_voltage_level(cdclk_state.cdclk);

- icl_set_cdclk(dev_priv, &cdclk_state);
+ icl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
}

/**
@@ -2048,7 +2064,7 @@ void cnl_init_cdclk(struct drm_i915_private *dev_priv)
cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk);
cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk);

- cnl_set_cdclk(dev_priv, &cdclk_state);
+ cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
}

/**
@@ -2066,7 +2082,7 @@ void cnl_uninit_cdclk(struct drm_i915_private *dev_priv)
cdclk_state.vco = 0;
cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk);

- cnl_set_cdclk(dev_priv, &cdclk_state);
+ cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE);
}

/**
@@ -2085,6 +2101,27 @@ bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a,
a->ref != b->ref;
}

+/**
+ * intel_cdclk_needs_cd2x_update - Determine if two CDCLK states require a cd2x divider update
+ * @a: first CDCLK state
+ * @b: second CDCLK state
+ *
+ * Returns:
+ * True if the CDCLK states require just a cd2x divider update, false if not.
+ */
+bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_state *a,
+ const struct intel_cdclk_state *b)
+{
+ /* Older hw doesn't have the capability */
+ if (INTEL_GEN(dev_priv) < 10 && !IS_GEN9_LP(dev_priv))
+ return false;
+
+ return a->cdclk != b->cdclk &&
+ a->vco == b->vco &&
+ a->ref == b->ref;
+}
+
/**
* intel_cdclk_changed - Determine if two CDCLK states are different
* @a: first CDCLK state
@@ -2100,6 +2137,26 @@ bool intel_cdclk_changed(const struct intel_cdclk_state *a,
a->voltage_level != b->voltage_level;
}

+/**
+ * intel_cdclk_swap_state - make atomic CDCLK configuration effective
+ * @state: atomic state
+ *
+ * This is the CDCLK version of drm_atomic_helper_swap_state() since the
+ * helper does not handle driver-specific global state.
+ *
+ * Similarly to the atomic helpers this function does a complete swap,
+ * i.e. it also puts the old state into @state. This is used by the commit
+ * code to determine how CDCLK has changed (for instance did it increase or
+ * decrease).
+ */
+void intel_cdclk_swap_state(struct intel_atomic_state *state)
+{
+ struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+
+ swap(state->cdclk.logical, dev_priv->cdclk.logical);
+ swap(state->cdclk.actual, dev_priv->cdclk.actual);
+}
+
void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state,
const char *context)
{
@@ -2113,12 +2170,14 @@ void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state,
* intel_set_cdclk - Push the CDCLK state to the hardware
* @dev_priv: i915 device
* @cdclk_state: new CDCLK state
+ * @pipe: pipe with which to synchronize the update
*
* Program the hardware based on the passed in CDCLK state,
* if necessary.
*/
-void intel_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state)
+static void intel_set_cdclk(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_state *cdclk_state,
+ enum pipe pipe)
{
if (!intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state))
return;
@@ -2128,7 +2187,7 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv,

intel_dump_cdclk_state(cdclk_state, "Changing CDCLK to");

- dev_priv->display.set_cdclk(dev_priv, cdclk_state);
+ dev_priv->display.set_cdclk(dev_priv, cdclk_state, pipe);

if (WARN(intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state),
"cdclk state doesn't match!\n")) {
@@ -2137,6 +2196,46 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv,
}
}

+/**
+ * intel_set_cdclk_pre_plane_update - Push the CDCLK state to the hardware
+ * @dev_priv: i915 device
+ * @old_state: old CDCLK state
+ * @new_state: new CDCLK state
+ * @pipe: pipe with which to synchronize the update
+ *
+ * Program the hardware before updating the HW plane state based on the passed
+ * in CDCLK state, if necessary.
+ */
+void
+intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_state *old_state,
+ const struct intel_cdclk_state *new_state,
+ enum pipe pipe)
+{
+ if (pipe == INVALID_PIPE || old_state->cdclk <= new_state->cdclk)
+ intel_set_cdclk(dev_priv, new_state, pipe);
+}
+
+/**
+ * intel_set_cdclk_post_plane_update - Push the CDCLK state to the hardware
+ * @dev_priv: i915 device
+ * @old_state: old CDCLK state
+ * @new_state: new CDCLK state
+ * @pipe: pipe with which to synchronize the update
+ *
+ * Program the hardware after updating the HW plane state based on the passed
+ * in CDCLK state, if necessary.
+ */
+void
+intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_state *old_state,
+ const struct intel_cdclk_state *new_state,
+ enum pipe pipe)
+{
+ if (pipe != INVALID_PIPE && old_state->cdclk > new_state->cdclk)
+ intel_set_cdclk(dev_priv, new_state, pipe);
+}
+
static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv,
int pixel_rate)
{
@@ -2187,19 +2286,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
/*
* According to BSpec, "The CD clock frequency must be at least twice
* the frequency of the Azalia BCLK." and BCLK is 96 MHz by default.
- *
- * FIXME: Check the actual, not default, BCLK being used.
- *
- * FIXME: This does not depend on ->has_audio because the higher CDCLK
- * is required for audio probe, also when there are no audio capable
- * displays connected at probe time. This leads to unnecessarily high
- * CDCLK when audio is not required.
- *
- * FIXME: This limit is only applied when there are displays connected
- * at probe time. If we probe without displays, we'll still end up using
- * the platform minimum CDCLK, failing audio probe.
*/
- if (INTEL_GEN(dev_priv) >= 9)
+ if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9)
min_cdclk = max(2 * 96000, min_cdclk);

/*
@@ -2239,7 +2327,7 @@ static int intel_compute_min_cdclk(struct drm_atomic_state *state)
intel_state->min_cdclk[i] = min_cdclk;
}

- min_cdclk = 0;
+ min_cdclk = intel_state->cdclk.force_min_cdclk;
for_each_pipe(dev_priv, pipe)
min_cdclk = max(intel_state->min_cdclk[pipe], min_cdclk);

@@ -2300,7 +2388,8 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state)
vlv_calc_voltage_level(dev_priv, cdclk);

if (!intel_state->active_crtcs) {
- cdclk = vlv_calc_cdclk(dev_priv, 0);
+ cdclk = vlv_calc_cdclk(dev_priv,
+ intel_state->cdclk.force_min_cdclk);

intel_state->cdclk.actual.cdclk = cdclk;
intel_state->cdclk.actual.voltage_level =
@@ -2333,7 +2422,7 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state)
bdw_calc_voltage_level(cdclk);

if (!intel_state->active_crtcs) {
- cdclk = bdw_calc_cdclk(0);
+ cdclk = bdw_calc_cdclk(intel_state->cdclk.force_min_cdclk);

intel_state->cdclk.actual.cdclk = cdclk;
intel_state->cdclk.actual.voltage_level =
@@ -2405,7 +2494,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
skl_calc_voltage_level(cdclk);

if (!intel_state->active_crtcs) {
- cdclk = skl_calc_cdclk(0, vco);
+ cdclk = skl_calc_cdclk(intel_state->cdclk.force_min_cdclk, vco);

intel_state->cdclk.actual.vco = vco;
intel_state->cdclk.actual.cdclk = cdclk;
@@ -2444,10 +2533,10 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state)

if (!intel_state->active_crtcs) {
if (IS_GEMINILAKE(dev_priv)) {
- cdclk = glk_calc_cdclk(0);
+ cdclk = glk_calc_cdclk(intel_state->cdclk.force_min_cdclk);
vco = glk_de_pll_vco(dev_priv, cdclk);
} else {
- cdclk = bxt_calc_cdclk(0);
+ cdclk = bxt_calc_cdclk(intel_state->cdclk.force_min_cdclk);
vco = bxt_de_pll_vco(dev_priv, cdclk);
}

@@ -2483,7 +2572,7 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state)
cnl_compute_min_voltage_level(intel_state));

if (!intel_state->active_crtcs) {
- cdclk = cnl_calc_cdclk(0);
+ cdclk = cnl_calc_cdclk(intel_state->cdclk.force_min_cdclk);
vco = cnl_cdclk_pll_vco(dev_priv, cdclk);

intel_state->cdclk.actual.vco = vco;
@@ -2519,7 +2608,7 @@ static int icl_modeset_calc_cdclk(struct drm_atomic_state *state)
cnl_compute_min_voltage_level(intel_state));

if (!intel_state->active_crtcs) {
- cdclk = icl_calc_cdclk(0, ref);
+ cdclk = icl_calc_cdclk(intel_state->cdclk.force_min_cdclk, ref);
vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk);

intel_state->cdclk.actual.vco = vco;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index be4024f0e3a8..9803d713f746 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12770,10 +12770,16 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
return -EINVAL;
}

+ /* keep the current setting */
+ if (!intel_state->cdclk.force_min_cdclk_changed)
+ intel_state->cdclk.force_min_cdclk =
+ dev_priv->cdclk.force_min_cdclk;
+
intel_state->modeset = true;
intel_state->active_crtcs = dev_priv->active_crtcs;
intel_state->cdclk.logical = dev_priv->cdclk.logical;
intel_state->cdclk.actual = dev_priv->cdclk.actual;
+ intel_state->cdclk.pipe = INVALID_PIPE;

for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
if (new_crtc_state->active)
@@ -12793,6 +12799,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
* adjusted_mode bits in the crtc directly.
*/
if (dev_priv->display.modeset_calc_cdclk) {
+ enum pipe pipe;
+
ret = dev_priv->display.modeset_calc_cdclk(state);
if (ret < 0)
return ret;
@@ -12809,12 +12817,36 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
return ret;
}

+ if (is_power_of_2(intel_state->active_crtcs)) {
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *crtc_state;
+
+ pipe = ilog2(intel_state->active_crtcs);
+ crtc = &intel_get_crtc_for_pipe(dev_priv, pipe)->base;
+ crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
+ if (crtc_state && needs_modeset(crtc_state))
+ pipe = INVALID_PIPE;
+ } else {
+ pipe = INVALID_PIPE;
+ }
+
/* All pipes must be switched off while we change the cdclk. */
- if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual,
- &intel_state->cdclk.actual)) {
+ if (pipe != INVALID_PIPE &&
+ intel_cdclk_needs_cd2x_update(dev_priv,
+ &dev_priv->cdclk.actual,
+ &intel_state->cdclk.actual)) {
+ ret = intel_lock_all_pipes(state);
+ if (ret < 0)
+ return ret;
+
+ intel_state->cdclk.pipe = pipe;
+ } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual,
+ &intel_state->cdclk.actual)) {
ret = intel_modeset_all_pipes(state);
if (ret < 0)
return ret;
+
+ intel_state->cdclk.pipe = INVALID_PIPE;
}

DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n",
@@ -12823,8 +12855,6 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n",
intel_state->cdclk.logical.voltage_level,
intel_state->cdclk.actual.voltage_level);
- } else {
- to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical;
}

intel_modeset_clear_plls(state);
@@ -12892,7 +12922,7 @@ static int intel_atomic_check(struct drm_device *dev,
struct drm_crtc *crtc;
struct drm_crtc_state *old_crtc_state, *crtc_state;
int ret, i;
- bool any_ms = false;
+ bool any_ms = intel_state->cdclk.force_min_cdclk_changed;

/* Catch I915_MODE_FLAG_INHERITED */
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
@@ -13248,7 +13278,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
if (intel_state->modeset) {
drm_atomic_helper_update_legacy_modeset_state(state->dev, state);

- intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual);
+ intel_set_cdclk_pre_plane_update(dev_priv,
+ &intel_state->cdclk.actual,
+ &dev_priv->cdclk.actual,
+ intel_state->cdclk.pipe);

/*
* SKL workaround: bspec recommends we disable the SAGV when we
@@ -13277,6 +13310,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
/* Now enable the clocks, plane, pipe, and connectors that we set up. */
dev_priv->display.update_crtcs(state);

+ if (intel_state->modeset)
+ intel_set_cdclk_post_plane_update(dev_priv,
+ &intel_state->cdclk.actual,
+ &dev_priv->cdclk.actual,
+ intel_state->cdclk.pipe);
+
/* FIXME: We should call drm_atomic_helper_commit_hw_done() here
* already, but still need the state for the delayed optimization. To
* fix this:
@@ -13478,8 +13517,10 @@ static int intel_atomic_commit(struct drm_device *dev,
intel_state->min_voltage_level,
sizeof(intel_state->min_voltage_level));
dev_priv->active_crtcs = intel_state->active_crtcs;
- dev_priv->cdclk.logical = intel_state->cdclk.logical;
- dev_priv->cdclk.actual = intel_state->cdclk.actual;
+ dev_priv->cdclk.force_min_cdclk =
+ intel_state->cdclk.force_min_cdclk;
+
+ intel_cdclk_swap_state(intel_state);
}

drm_atomic_state_get(state);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 18e17b422701..ceaa05dfa0d0 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -480,6 +480,11 @@ struct intel_atomic_state {
* state only when all crtc's are DPMS off.
*/
struct intel_cdclk_state actual;
+
+ int force_min_cdclk;
+ bool force_min_cdclk_changed;
+ /* pipe to which cd2x update is synchronized */
+ enum pipe pipe;
} cdclk;

bool dpll_set, modeset;
@@ -1590,12 +1595,24 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv);
void intel_update_max_cdclk(struct drm_i915_private *dev_priv);
void intel_update_cdclk(struct drm_i915_private *dev_priv);
void intel_update_rawclk(struct drm_i915_private *dev_priv);
+bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_state *a,
+ const struct intel_cdclk_state *b);
bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a,
const struct intel_cdclk_state *b);
bool intel_cdclk_changed(const struct intel_cdclk_state *a,
const struct intel_cdclk_state *b);
-void intel_set_cdclk(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_state *cdclk_state);
+void intel_cdclk_swap_state(struct intel_atomic_state *state);
+void
+intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_state *old_state,
+ const struct intel_cdclk_state *new_state,
+ enum pipe pipe);
+void
+intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_state *old_state,
+ const struct intel_cdclk_state *new_state,
+ enum pipe pipe);
void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state,
const char *context);

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0dce94e3c495..d0b04b0d309f 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -730,8 +730,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
if (rec->roce.route_resolved)
return 0;

- rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
- rdma_gid2ip(&dgid._sockaddr, &rec->dgid);
+ rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid);
+ rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid);

if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
return -EINVAL;
@@ -742,7 +742,7 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
dev_addr.net = &init_net;
dev_addr.sgid_attr = attr;

- ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
+ ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid,
&dev_addr, false, true, 0);
if (ret)
return ret;
@@ -814,22 +814,22 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
union {
- struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
int ret;

- rdma_gid2ip(&sgid_addr._sockaddr, sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
+ rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid);

memset(&dev_addr, 0, sizeof(dev_addr));
dev_addr.net = &init_net;
dev_addr.sgid_attr = sgid_attr;

init_completion(&ctx.comp);
- ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
- &dev_addr, 1000, resolve_cb, true, &ctx);
+ ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr,
+ (struct sockaddr *)&dgid_addr, &dev_addr, 1000,
+ resolve_cb, true, &ctx);
if (ret)
return ret;

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index a7295322efbc..f4500d77f314 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -83,7 +83,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct iphdr ipv4;
const struct ib_global_route *ib_grh;
union {
- struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
@@ -133,9 +132,9 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
ipv4.tot_len = htons(0);
ipv4.ttl = ib_grh->hop_limit;
ipv4.protocol = nxthdr;
- rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
- rdma_gid2ip(&dgid_addr._sockaddr, &ib_grh->dgid);
+ rdma_gid2ip((struct sockaddr*)&dgid_addr, &ib_grh->dgid);
ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
} else {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 097e5ab2a19f..94ca0f4a4073 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -2499,7 +2499,6 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
u32 vlan_id = 0xFFFF;
u8 mac_addr[6], hdr_type;
union {
- struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
@@ -2541,8 +2540,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,

hdr_type = rdma_gid_attr_network_type(sgid_attr);
if (hdr_type == RDMA_NETWORK_IPV4) {
- rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid);
- rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid_attr->gid);
+ rdma_gid2ip((struct sockaddr *)&dgid_addr, &grh->dgid);
memcpy(&cmd->params.dgid[0],
&dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
memcpy(&cmd->params.sgid[0],
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index d32268cc1174..f3985469c221 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online(struct irq_data *d)
intr = GIC_HWIRQ_TO_LOCAL(d->hwirq);
cd = irq_data_get_irq_chip_data(d);

- write_gic_vl_map(intr, cd->map);
+ write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
if (cd->mask)
write_gic_vl_smask(BIT(intr));
}
@@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
spin_lock_irqsave(&gic_lock, flags);
for_each_online_cpu(cpu) {
write_gic_vl_other(mips_cm_vp_id(cpu));
- write_gic_vo_map(intr, map);
+ write_gic_vo_map(mips_gic_vx_map_reg(intr), map);
}
spin_unlock_irqrestore(&gic_lock, flags);

diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index 352e803f566e..64611633e77c 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -140,8 +140,8 @@ static char __init *dm_parse_table_entry(struct dm_device *dev, char *str)
return ERR_PTR(-EINVAL);
}
/* target_args */
- dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL,
- DM_MAX_STR_SIZE);
+ dev->target_args_array[n] = kstrndup(field[3], DM_MAX_STR_SIZE,
+ GFP_KERNEL);
if (!dev->target_args_array[n])
return ERR_PTR(-ENOMEM);

@@ -275,7 +275,7 @@ static int __init dm_init_init(void)
DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE);
return -EINVAL;
}
- str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE);
+ str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL);
if (!str)
return -ENOMEM;

diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 9ea2b0291f20..e549392e0ea5 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -60,6 +60,7 @@

#define WRITE_LOG_VERSION 1ULL
#define WRITE_LOG_MAGIC 0x6a736677736872ULL
+#define WRITE_LOG_SUPER_SECTOR 0

/*
* The disk format for this is braindead simple.
@@ -115,6 +116,7 @@ struct log_writes_c {
struct list_head logging_blocks;
wait_queue_head_t wait;
struct task_struct *log_kthread;
+ struct completion super_done;
};

struct pending_block {
@@ -180,6 +182,14 @@ static void log_end_io(struct bio *bio)
bio_put(bio);
}

+static void log_end_super(struct bio *bio)
+{
+ struct log_writes_c *lc = bio->bi_private;
+
+ complete(&lc->super_done);
+ log_end_io(bio);
+}
+
/*
* Meant to be called if there is an error, it will free all the pages
* associated with the block.
@@ -215,7 +225,8 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
- bio->bi_end_io = log_end_io;
+ bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
+ log_end_super : log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

@@ -418,11 +429,18 @@ static int log_super(struct log_writes_c *lc)
super.nr_entries = cpu_to_le64(lc->logged_entries);
super.sectorsize = cpu_to_le32(lc->sectorsize);

- if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) {
+ if (write_metadata(lc, &super, sizeof(super), NULL, 0,
+ WRITE_LOG_SUPER_SECTOR)) {
DMERR("Couldn't write super");
return -1;
}

+ /*
+ * Super sector should be writen in-order, otherwise the
+ * nr_entries could be rewritten incorrectly by an old bio.
+ */
+ wait_for_completion_io(&lc->super_done);
+
return 0;
}

@@ -531,6 +549,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_LIST_HEAD(&lc->unflushed_blocks);
INIT_LIST_HEAD(&lc->logging_blocks);
init_waitqueue_head(&lc->wait);
+ init_completion(&lc->super_done);
atomic_set(&lc->io_blocks, 0);
atomic_set(&lc->pending_blocks, 0);

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f96efa363d34..59e919b92873 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4321,12 +4321,12 @@ void bond_setup(struct net_device *bond_dev)
bond_dev->features |= NETIF_F_NETNS_LOCAL;

bond_dev->hw_features = BOND_VLAN_FEATURES |
- NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;

bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
bond_dev->features |= bond_dev->hw_features;
+ bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
}

/* Destroy a bonding device.
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 18bc035da850..1fff462a4175 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -843,9 +843,14 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
return err;

if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
- if (hweight < AQ_VLAN_MAX_FILTERS)
- err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, true);
+ if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) {
+ err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw,
+ !(aq_nic->packet_filter & IFF_PROMISC));
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = false;
+ } else {
/* otherwise left in promiscue mode */
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
+ }
}

return err;
@@ -866,6 +871,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic)
if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl))
return -EOPNOTSUPP;

+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
if (err)
return err;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index ff83667410bd..550abfe6973d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -117,6 +117,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)

cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk;
cfg->features = cfg->aq_hw_caps->hw_features;
+ cfg->is_vlan_force_promisc = true;
}

static int aq_nic_update_link_status(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 8e34c1e49bf2..65e681be9b5d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -36,6 +36,7 @@ struct aq_nic_cfg_s {
u32 flow_control;
u32 link_speed_msk;
u32 wol;
+ bool is_vlan_force_promisc;
u16 is_mc_list_enabled;
u16 mc_list_count;
bool is_autoneg;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index ec302fdfec63..a4cc04741115 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -771,8 +771,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
unsigned int packet_filter)
{
unsigned int i = 0U;
+ struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+
+ hw_atl_rpfl2promiscuous_mode_en_set(self,
+ IS_FILTER_ENABLED(IFF_PROMISC));
+
+ hw_atl_rpf_vlan_prom_mode_en_set(self,
+ IS_FILTER_ENABLED(IFF_PROMISC) ||
+ cfg->is_vlan_force_promisc);

- hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC));
hw_atl_rpfl2multicast_flr_en_set(self,
IS_FILTER_ENABLED(IFF_ALLMULTI), 0);

@@ -781,13 +788,13 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,

hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));

- self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
+ cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);

for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
hw_atl_rpfl2_uc_flr_en_set(self,
- (self->aq_nic_cfg->is_mc_list_enabled &&
- (i <= self->aq_nic_cfg->mc_list_count)) ?
- 1U : 0U, i);
+ (cfg->is_mc_list_enabled &&
+ (i <= cfg->mc_list_count)) ?
+ 1U : 0U, i);

return aq_hw_err_from_flags(self);
}
@@ -1079,7 +1086,7 @@ static int hw_atl_b0_hw_vlan_set(struct aq_hw_s *self,
static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
{
/* set promisc in case of disabing the vland filter */
- hw_atl_rpf_vlan_prom_mode_en_set(self, !!!enable);
+ hw_atl_rpf_vlan_prom_mode_en_set(self, !enable);

return aq_hw_err_from_flags(self);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 8d9cc2157afd..7423262ce590 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -122,7 +122,7 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
* programmed with (2^32 â <new_sec_value>)
*/
if (gmac4)
- sec = (100000000ULL - sec);
+ sec = -sec;

value = readl(ioaddr + PTP_TCR);
if (value & PTP_TCR_TSCTRLSSR)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 635d88d82610..a634054dcb11 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2957,12 +2957,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)

/* Manage tx mitigation */
tx_q->tx_count_frames += nfrags + 1;
- if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+ (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- tx_q->tx_count_frames = 0;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}

skb_tx_timestamp(skb);
@@ -3176,12 +3179,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
* element in case of no SG.
*/
tx_q->tx_count_frames += nfrags + 1;
- if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+ (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- tx_q->tx_count_frames = 0;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}

skb_tx_timestamp(skb);
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 16963f7a88f7..351f25e1fc48 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2135,12 +2135,12 @@ static void team_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;

dev->hw_features = TEAM_VLAN_FEATURES |
- NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;

dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
dev->features |= dev->hw_features;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
}

static int team_newlink(struct net *src_net, struct net_device *dev,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f4c933ac6edf..d1cafb29eca9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1024,18 +1024,8 @@ static void tun_net_uninit(struct net_device *dev)
/* Net device open. */
static int tun_net_open(struct net_device *dev)
{
- struct tun_struct *tun = netdev_priv(dev);
- int i;
-
netif_tx_start_all_queues(dev);

- for (i = 0; i < tun->numqueues; i++) {
- struct tun_file *tfile;
-
- tfile = rtnl_dereference(tun->tfiles[i]);
- tfile->socket.sk->sk_write_space(tfile->socket.sk);
- }
-
return 0;
}

@@ -3636,6 +3626,7 @@ static int tun_device_event(struct notifier_block *unused,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tun_struct *tun = netdev_priv(dev);
+ int i;

if (dev->rtnl_link_ops != &tun_link_ops)
return NOTIFY_DONE;
@@ -3645,6 +3636,14 @@ static int tun_device_event(struct notifier_block *unused,
if (tun_queue_resize(tun))
return NOTIFY_BAD;
break;
+ case NETDEV_UP:
+ for (i = 0; i < tun->numqueues; i++) {
+ struct tun_file *tfile;
+
+ tfile = rtnl_dereference(tun->tfiles[i]);
+ tfile->socket.sk->sk_write_space(tfile->socket.sk);
+ }
+ break;
default:
break;
}
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index d9a6699abe59..e657d8947125 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1412,7 +1412,7 @@ static int qmi_wwan_probe(struct usb_interface *intf,
* different. Ignore the current interface if the number of endpoints
* equals the number for the diag interface (two).
*/
- info = (void *)&id->driver_info;
+ info = (void *)id->driver_info;

if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
if (desc->bNumEndpoints == 2)
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index ecee4b3ff073..377b07b2feeb 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -763,6 +763,7 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd
struct pvscsi_adapter *adapter = shost_priv(host);
struct pvscsi_ctx *ctx;
unsigned long flags;
+ unsigned char op;

spin_lock_irqsave(&adapter->hw_lock, flags);

@@ -775,13 +776,14 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd
}

cmd->scsi_done = done;
+ op = cmd->cmnd[0];

dev_dbg(&cmd->device->sdev_gendev,
- "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]);
+ "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);

spin_unlock_irqrestore(&adapter->hw_lock, flags);

- pvscsi_kick_io(adapter, cmd->cmnd[0]);
+ pvscsi_kick_io(adapter, op);

return 0;
}
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 82a48e830018..e4b59e76afb0 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -856,9 +856,14 @@ static int load_flat_file(struct linux_binprm *bprm,

static int load_flat_shared_library(int id, struct lib_info *libs)
{
+ /*
+ * This is a fake bprm struct; only the members "buf", "file" and
+ * "filename" are actually used.
+ */
struct linux_binprm bprm;
int res;
char buf[16];
+ loff_t pos = 0;

memset(&bprm, 0, sizeof(bprm));

@@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
if (IS_ERR(bprm.file))
return res;

- bprm.cred = prepare_exec_creds();
- res = -ENOMEM;
- if (!bprm.cred)
- goto out;
-
- /* We don't really care about recalculating credentials at this point
- * as we're past the point of no return and are dealing with shared
- * libraries.
- */
- bprm.called_set_creds = 1;
+ res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos);

- res = prepare_binprm(&bprm);
-
- if (!res)
+ if (res >= 0)
res = load_flat_file(&bprm, libs, id, NULL);

- abort_creds(bprm.cred);
-
-out:
allow_write_access(bprm.file);
fput(bprm.file);

diff --git a/fs/inode.c b/fs/inode.c
index 9a453f3637f8..5bd1dd2e942f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -349,7 +349,7 @@ EXPORT_SYMBOL(inc_nlink);

static void __address_space_init_once(struct address_space *mapping)
{
- xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e82adbf8adc1..b897695c91c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -533,6 +533,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
state->cur_req++;
}

+ req->file = NULL;
req->ctx = ctx;
req->flags = 0;
/* one is dropped after submission, the other at completion */
@@ -1684,10 +1685,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
flags = READ_ONCE(s->sqe->flags);
fd = READ_ONCE(s->sqe->fd);

- if (!io_op_needs_file(s->sqe)) {
- req->file = NULL;
+ if (!io_op_needs_file(s->sqe))
return 0;
- }

if (flags & IOSQE_FIXED_FILE) {
if (unlikely(!ctx->user_files ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index a809989807d6..19f856f45689 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -18,7 +18,7 @@

#define NFSDBG_FACILITY NFSDBG_PNFS_LD

-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
static unsigned int dataserver_retrans;

static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 63c6bb1f8c4d..8c286f8228e5 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -355,6 +355,10 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
/* Mark is just getting destroyed or created? */
if (!conn)
continue;
+ if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID))
+ continue;
+ /* Pairs with smp_wmb() in fsnotify_add_mark_list() */
+ smp_rmb();
fsid = conn->fsid;
if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
continue;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 22acb0a79b53..e9d49191d39e 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -495,10 +495,13 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
conn->type = type;
conn->obj = connp;
/* Cache fsid of filesystem containing the object */
- if (fsid)
+ if (fsid) {
conn->fsid = *fsid;
- else
+ conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID;
+ } else {
conn->fsid.val[0] = conn->fsid.val[1] = 0;
+ conn->flags = 0;
+ }
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
inode = igrab(fsnotify_conn_inode(conn));
/*
@@ -573,7 +576,12 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
if (err)
return err;
goto restart;
- } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) &&
+ } else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) {
+ conn->fsid = *fsid;
+ /* Pairs with smp_rmb() in fanotify_get_fsid() */
+ smp_wmb();
+ conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID;
+ } else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) &&
(fsid->val[0] != conn->fsid.val[0] ||
fsid->val[1] != conn->fsid.val[1])) {
/*
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 2edbb657f859..55180501b915 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
* a program is not able to use ptrace(2) in that case. It is
* safe because the task has stopped executing permanently.
*/
- if (permitted && (task->flags & PF_DUMPCORE)) {
+ if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
if (try_get_task_stack(task)) {
eip = KSTK_EIP(task);
esp = KSTK_ESP(task);
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index fcb61b4659b3..8666fe7f35d7 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -23,7 +23,9 @@
*
* Return:
* 0 - On success
- * <0 - On error
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
*/
static inline int
arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
@@ -85,7 +87,9 @@ arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
*
* Return:
* 0 - On success
- * <0 - On error
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
*/
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a4c644c1c091..ada0246d43ab 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -230,6 +230,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)

+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
+
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
@@ -319,6 +325,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 094b38f2d9a1..0f67cabbbec8 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -292,7 +292,9 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
*/
struct fsnotify_mark_connector {
spinlock_t lock;
- unsigned int type; /* Type of object [lock] */
+ unsigned short type; /* Type of object [lock] */
+#define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01
+ unsigned short flags; /* flags [lock] */
__kernel_fsid_t fsid; /* fsid of filesystem containing object */
union {
/* Object pointer [lock] */
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 0e01e6129145..5921599b6dc4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -265,6 +265,7 @@ enum xa_lock_type {
#define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U)
#define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U)
#define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U)
+#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U)
#define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
(__force unsigned)(mark)))

diff --git a/include/net/tls.h b/include/net/tls.h
index 053082d98906..a67ad7d56ff2 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -347,21 +347,6 @@ static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
return !!ctx->partially_sent_record;
}

-static inline int tls_complete_pending_work(struct sock *sk,
- struct tls_context *ctx,
- int flags, long *timeo)
-{
- int rc = 0;
-
- if (unlikely(sk->sk_write_pending))
- rc = wait_on_pending_writer(sk, timeo);
-
- if (!rc && tls_is_partially_sent_record(ctx))
- rc = tls_push_partial_record(sk, ctx, flags);
-
- return rc;
-}
-
static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
{
return tls_ctx->pending_open_record_frags;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 929c8e537a14..9d01f4788d3e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -187,6 +187,8 @@ enum bpf_attach_type {
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
+ BPF_CGROUP_UDP4_RECVMSG = 19,
+ BPF_CGROUP_UDP6_RECVMSG,
__MAX_BPF_ATTACH_TYPE
};

@@ -3104,8 +3106,8 @@ struct bpf_raw_tracepoint_args {
/* DIRECT: Skip the FIB rules and go to FIB table associated with device
* OUTPUT: Do lookup from egress perspective; default is ingress
*/
-#define BPF_FIB_LOOKUP_DIRECT BIT(0)
-#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+#define BPF_FIB_LOOKUP_DIRECT (1U << 0)
+#define BPF_FIB_LOOKUP_OUTPUT (1U << 1)

enum {
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 93a5cbbde421..3b03a7342f3c 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -715,9 +715,14 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
* have exact two children, so this function will never return NULL.
*/
for (node = search_root; node;) {
- if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+ if (node->flags & LPM_TREE_NODE_FLAG_IM) {
+ node = rcu_dereference(node->child[0]);
+ } else {
next_node = node;
- node = rcu_dereference(node->child[0]);
+ node = rcu_dereference(node->child[0]);
+ if (!node)
+ node = rcu_dereference(next_node->child[1]);
+ }
}
do_copy:
next_key->prefixlen = next_node->prefixlen;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index db6e825e2958..29b8f20d500c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1520,6 +1520,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
return 0;
default:
return -EINVAL;
@@ -1809,6 +1811,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
@@ -1891,6 +1895,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
@@ -1939,6 +1945,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 950fac024fbb..4ff130ddfbf6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5145,9 +5145,12 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum range = tnum_range(0, 1);

switch (env->prog->type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
+ env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
+ range = tnum_range(1, 1);
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
- case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
break;
@@ -5163,16 +5166,17 @@ static int check_return_code(struct bpf_verifier_env *env)
}

if (!tnum_in(range, reg->var_off)) {
+ char tn_buf[48];
+
verbose(env, "At program exit the register R0 ");
if (!tnum_is_unknown(reg->var_off)) {
- char tn_buf[48];
-
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
- verbose(env, " should have been 0 or 1\n");
+ tnum_strn(tn_buf, sizeof(tn_buf), range);
+ verbose(env, " should have been in %s\n", tn_buf);
return -EINVAL;
}
return 0;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9cc8b6fdb2dc..6170034f4118 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2315,6 +2315,9 @@ static int __init mitigations_parse_cmdline(char *arg)
cpu_mitigations = CPU_MITIGATIONS_AUTO;
else if (!strcmp(arg, "auto,nosmt"))
cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+ else
+ pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
+ arg);

return 0;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d64c00afceb5..568a0e839903 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -402,8 +402,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
.arg4_type = ARG_CONST_SIZE,
};

-static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
-
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_sample_data *sd)
@@ -434,24 +432,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
return perf_event_output(event, sd, regs);
}

+/*
+ * Support executing tracepoints in normal, irq, and nmi context that each call
+ * bpf_perf_event_output
+ */
+struct bpf_trace_sample_data {
+ struct perf_sample_data sds[3];
+};
+
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
+static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
- struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
+ struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
+ int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
struct perf_raw_record raw = {
.frag = {
.size = size,
.data = data,
},
};
+ struct perf_sample_data *sd;
+ int err;

- if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
- return -EINVAL;
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ sd = &sds->sds[nest_level - 1];
+
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
+ err = -EINVAL;
+ goto out;
+ }

perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;

- return __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, sd);
+
+out:
+ this_cpu_dec(bpf_trace_nest_level);
+ return err;
}

static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -808,16 +832,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
/*
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
* to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
+ *
+ * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
+ * in normal, irq, and nmi context.
*/
-static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
+struct bpf_raw_tp_regs {
+ struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
+static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
+static struct pt_regs *get_bpf_raw_tp_regs(void)
+{
+ struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
+
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ this_cpu_dec(bpf_raw_tp_nest_level);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return &tp_regs->regs[nest_level - 1];
+}
+
+static void put_bpf_raw_tp_regs(void)
+{
+ this_cpu_dec(bpf_raw_tp_nest_level);
+}
+
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags, void *, data, u64, size)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);

perf_fetch_caller_regs(regs);
- return ____bpf_perf_event_output(regs, map, flags, data, size);
+ ret = ____bpf_perf_event_output(regs, map, flags, data, size);
+
+ put_bpf_raw_tp_regs();
+ return ret;
}

static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
@@ -834,12 +890,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);

perf_fetch_caller_regs(regs);
/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
- return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
- flags, 0, 0);
+ ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+ flags, 0, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}

static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
@@ -854,11 +916,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
void *, buf, u32, size, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);

perf_fetch_caller_regs(regs);
- return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
- (unsigned long) size, flags, 0);
+ ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+ (unsigned long) size, flags, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}

static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 3ea65cdff30d..4ad967453b6f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -205,8 +205,6 @@ void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
void ftrace_likely_update(struct ftrace_likely_data *f, int val,
int expect, int is_constant)
{
- unsigned long flags = user_access_save();
-
/* A constant is always correct */
if (is_constant) {
f->constant++;
@@ -225,8 +223,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
f->data.correct++;
else
f->data.incorrect++;
-
- user_access_restore(flags);
}
EXPORT_SYMBOL(ftrace_likely_update);

diff --git a/lib/xarray.c b/lib/xarray.c
index 6be3acbb861f..446b956c9188 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -298,6 +298,8 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
xas_destroy(xas);
return false;
}
+ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
+ gfp |= __GFP_ACCOUNT;
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
if (!xas->xa_alloc)
return false;
@@ -325,6 +327,8 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
xas_destroy(xas);
return false;
}
+ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
+ gfp |= __GFP_ACCOUNT;
if (gfpflags_allow_blocking(gfp)) {
xas_unlock_type(xas, lock_type);
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
@@ -358,8 +362,12 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift)
if (node) {
xas->xa_alloc = NULL;
} else {
- node = kmem_cache_alloc(radix_tree_node_cachep,
- GFP_NOWAIT | __GFP_NOWARN);
+ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
+
+ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
+ gfp |= __GFP_ACCOUNT;
+
+ node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
if (!node) {
xas_set_err(xas, -ENOMEM);
return NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5b4f00be325d..ec81808830ae 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1491,16 +1491,29 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,

/*
* Dissolve a given free hugepage into free buddy pages. This function does
- * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the
- * dissolution fails because a give page is not a free hugepage, or because
- * free hugepages are fully reserved.
+ * nothing for in-use hugepages and non-hugepages.
+ * This function returns values like below:
+ *
+ * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use
+ * (allocated or reserved.)
+ * 0: successfully dissolved free hugepages or the page is not a
+ * hugepage (considered as already dissolved)
*/
int dissolve_free_huge_page(struct page *page)
{
int rc = -EBUSY;

+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (!PageHuge(page))
+ return 0;
+
spin_lock(&hugetlb_lock);
- if (PageHuge(page) && !page_count(page)) {
+ if (!PageHuge(page)) {
+ rc = 0;
+ goto out;
+ }
+
+ if (!page_count(page)) {
struct page *head = compound_head(page);
struct hstate *h = page_hstate(head);
int nid = page_to_nid(head);
@@ -1545,11 +1558,9 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)

for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
page = pfn_to_page(pfn);
- if (PageHuge(page) && !page_count(page)) {
- rc = dissolve_free_huge_page(page);
- if (rc)
- break;
- }
+ rc = dissolve_free_huge_page(page);
+ if (rc)
+ break;
}

return rc;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fc8b51744579..3a83e279cc98 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1733,6 +1733,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
if (!ret) {
if (set_hwpoison_free_buddy_page(page))
num_poisoned_pages_inc();
+ else
+ ret = -EBUSY;
}
}
return ret;
@@ -1857,11 +1859,8 @@ static int soft_offline_in_use_page(struct page *page, int flags)

static int soft_offline_free_page(struct page *page)
{
- int rc = 0;
- struct page *head = compound_head(page);
+ int rc = dissolve_free_huge_page(page);

- if (PageHuge(head))
- rc = dissolve_free_huge_page(page);
if (!rc) {
if (set_hwpoison_free_buddy_page(page))
num_poisoned_pages_inc();
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 2219e747df49..5b3bf1747c19 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -306,7 +306,7 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
else {
nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed,
*nodes);
- pol->w.cpuset_mems_allowed = tmp;
+ pol->w.cpuset_mems_allowed = *nodes;
}

if (nodes_empty(tmp))
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 0b39ec0c945c..295512465065 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -136,7 +136,7 @@ static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj,

end_pfn = pfn + count * BITS_PER_BYTE;
if (end_pfn > max_pfn)
- end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+ end_pfn = max_pfn;

for (; pfn < end_pfn; pfn++) {
bit = pfn % BITMAP_CHUNK_BITS;
@@ -181,7 +181,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj,

end_pfn = pfn + count * BITS_PER_BYTE;
if (end_pfn > max_pfn)
- end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+ end_pfn = max_pfn;

for (; pfn < end_pfn; pfn++) {
bit = pfn % BITMAP_CHUNK_BITS;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2e8019d0e048..189415852077 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -29,10 +29,9 @@
static struct bio *get_swap_bio(gfp_t gfp_flags,
struct page *page, bio_end_io_t end_io)
{
- int i, nr = hpage_nr_pages(page);
struct bio *bio;

- bio = bio_alloc(gfp_flags, nr);
+ bio = bio_alloc(gfp_flags, 1);
if (bio) {
struct block_device *bdev;

@@ -41,9 +40,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
bio->bi_end_io = end_io;

- for (i = 0; i < nr; i++)
- bio_add_page(bio, page + i, PAGE_SIZE, 0);
- VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
+ bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0);
}
return bio;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 27e61ffd9039..b76f14197128 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6420,6 +6420,7 @@ static bool sock_addr_is_valid_access(int off, int size,
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
break;
default:
return false;
@@ -6430,6 +6431,7 @@ static bool sock_addr_is_valid_access(int off, int size,
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
break;
default:
return false;
diff --git a/net/core/sock.c b/net/core/sock.c
index 067878a1e4c5..30afb072eecf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1482,9 +1482,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
u32 meminfo[SK_MEMINFO_VARS];

- if (get_user(len, optlen))
- return -EFAULT;
-
sk_get_meminfo(sk, meminfo);

len = min_t(unsigned int, len, sizeof(meminfo));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dc91c27bb788..3f6e95cb21b0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -201,7 +201,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
}
sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
iph->saddr, iph->daddr,
- skb->dev->ifindex, sdif);
+ dif, sdif);
}
out:
read_unlock(&raw_v4_hashinfo.lock);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3b179ce6170f..ee999cb5e670 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -503,7 +503,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table);
+ const struct iphdr *iph = ip_hdr(skb);
+
+ return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+ iph->daddr, dport, inet_iif(skb),
+ inet_sdif(skb), &udp_table, NULL);
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);

@@ -1783,6 +1787,10 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
+
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
+ (struct sockaddr *)sin);
}

if (udp_sk(sk)->gro_enabled)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 622eeaf5732b..3a01d3d2f105 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -242,7 +242,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,

return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), &udp_table, skb);
+ inet6_sdif(skb), &udp_table, NULL);
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);

@@ -370,6 +370,10 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
inet6_iif(skb));
}
*addr_len = sizeof(*sin6);
+
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
+ (struct sockaddr *)sin6);
}

if (udp_sk(sk)->gro_enabled)
@@ -516,7 +520,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);

sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), inet6_sdif(skb), udptable, skb);
+ inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
if (!sk) {
/* No socket for error: try tunnels before discarding */
sk = ERR_PTR(-ENOENT);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 71d5544243d2..21814acb862d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2409,6 +2409,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb)

ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
+
+ if (!packet_read_pending(&po->tx_ring))
+ complete(&po->skb_completion);
}

sock_wfree(skb);
@@ -2593,7 +2596,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,

static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct net_device *dev;
struct virtio_net_hdr *vnet_hdr = NULL;
struct sockcm_cookie sockc;
@@ -2608,6 +2611,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
+ long timeo = 0;

mutex_lock(&po->pg_vec_lock);

@@ -2654,12 +2658,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
size_max = dev->mtu + reserve + VLAN_HLEN;

+ reinit_completion(&po->skb_completion);
+
do {
ph = packet_current_frame(po, &po->tx_ring,
TP_STATUS_SEND_REQUEST);
if (unlikely(ph == NULL)) {
- if (need_wait && need_resched())
- schedule();
+ if (need_wait && skb) {
+ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+ timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
+ if (timeo <= 0) {
+ err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
+ goto out_put;
+ }
+ }
+ /* check for additional frames */
continue;
}

@@ -3215,6 +3228,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sock_init_data(sock, sk);

po = pkt_sk(sk);
+ init_completion(&po->skb_completion);
sk->sk_family = PF_PACKET;
po->num = proto;
po->xmit = dev_queue_xmit;
@@ -4327,7 +4341,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
req3->tp_sizeof_priv ||
req3->tp_feature_req_word) {
err = -EINVAL;
- goto out;
+ goto out_free_pg_vec;
}
}
break;
@@ -4391,6 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
prb_shutdown_retire_blk_timer(po, rb_queue);
}

+out_free_pg_vec:
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 3bb7c5fb3bff..c70a2794456f 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -128,6 +128,7 @@ struct packet_sock {
unsigned int tp_hdrlen;
unsigned int tp_reserve;
unsigned int tp_tstamp;
+ struct completion skb_completion;
struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb);
struct packet_type prot_hook ____cacheline_aligned_in_smp;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 0448b68fce74..bcfc81ee153d 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -133,10 +133,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Initialize the bind addr area */
sctp_bind_addr_init(&ep->base.bind_addr, 0);

- /* Remember who we are attached to. */
- ep->base.sk = sk;
- sock_hold(ep->base.sk);
-
/* Create the lists of associations. */
INIT_LIST_HEAD(&ep->asocs);

@@ -169,6 +165,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
ep->prsctp_enable = net->sctp.prsctp_enable;
ep->reconf_enable = net->sctp.reconf_enable;

+ /* Remember who we are attached to. */
+ ep->base.sk = sk;
+ sock_hold(ep->base.sk);
+
return ep;

nomem_shkey:
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 732d4b57411a..a437ee8ae482 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -950,6 +950,8 @@ static int xs_local_send_request(struct rpc_rqst *req)
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ rpc_fraghdr rm = xs_stream_record_marker(xdr);
+ unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
int status;
int sent = 0;

@@ -964,9 +966,7 @@ static int xs_local_send_request(struct rpc_rqst *req)

req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr,
- transport->xmit.offset,
- xs_stream_record_marker(xdr),
- &sent);
+ transport->xmit.offset, rm, &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);

@@ -976,7 +976,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
if (likely(sent > 0) || status == 0) {
transport->xmit.offset += sent;
req->rq_bytes_sent = transport->xmit.offset;
- if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
return 0;
@@ -1097,6 +1097,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ rpc_fraghdr rm = xs_stream_record_marker(xdr);
+ unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
bool vm_wait = false;
int status;
int sent;
@@ -1122,9 +1124,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
while (1) {
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
- transport->xmit.offset,
- xs_stream_record_marker(xdr),
- &sent);
+ transport->xmit.offset, rm, &sent);

dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - transport->xmit.offset, status);
@@ -1133,7 +1133,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
* reset the count of bytes sent. */
transport->xmit.offset += sent;
req->rq_bytes_sent = transport->xmit.offset;
- if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
return 0;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3ecca3b88bf8..eb0f701f9bf1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -132,7 +132,7 @@ static int __init tipc_init(void)
if (err)
goto out_sysctl;

- err = register_pernet_subsys(&tipc_net_ops);
+ err = register_pernet_device(&tipc_net_ops);
if (err)
goto out_pernet;

@@ -140,7 +140,7 @@ static int __init tipc_init(void)
if (err)
goto out_socket;

- err = register_pernet_subsys(&tipc_topsrv_net_ops);
+ err = register_pernet_device(&tipc_topsrv_net_ops);
if (err)
goto out_pernet_topsrv;

@@ -151,11 +151,11 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
out_bearer:
- unregister_pernet_subsys(&tipc_topsrv_net_ops);
+ unregister_pernet_device(&tipc_topsrv_net_ops);
out_pernet_topsrv:
tipc_socket_stop();
out_socket:
- unregister_pernet_subsys(&tipc_net_ops);
+ unregister_pernet_device(&tipc_net_ops);
out_pernet:
tipc_unregister_sysctl();
out_sysctl:
@@ -170,9 +170,9 @@ static int __init tipc_init(void)
static void __exit tipc_exit(void)
{
tipc_bearer_cleanup();
- unregister_pernet_subsys(&tipc_topsrv_net_ops);
+ unregister_pernet_device(&tipc_topsrv_net_ops);
tipc_socket_stop();
- unregister_pernet_subsys(&tipc_net_ops);
+ unregister_pernet_device(&tipc_net_ops);
tipc_netlink_stop();
tipc_netlink_compat_stop();
tipc_unregister_sysctl();
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 340a6e7c43a7..8836aebd6180 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;

- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;

@@ -537,7 +541,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,

name = (char *)TLV_DATA(msg->req);

- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;

@@ -815,7 +823,11 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
if (!link)
return -EMSGSIZE;

- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 4d85d71f16e2..c86f136e5962 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -176,7 +176,6 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
goto tx_error;
}

- skb->dev = rt->dst.dev;
ttl = ip4_dst_hoplimit(&rt->dst);
udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
dst->ipv4.s_addr, 0, ttl, 0, src->port,
@@ -195,10 +194,9 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
if (err)
goto tx_error;
ttl = ip6_dst_hoplimit(ndst);
- err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
- ndst->dev, &src->ipv6,
- &dst->ipv6, 0, ttl, 0, src->port,
- dst->port, false);
+ err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
+ &src->ipv6, &dst->ipv6, 0, ttl, 0,
+ src->port, dst->port, false);
#endif
}
return err;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 478603f43964..f4f632824247 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
goto skip_tx_cleanup;
}

- if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+ if (unlikely(sk->sk_write_pending) &&
+ !wait_on_pending_writer(sk, &timeo))
tls_handle_open_record(sk, 0);

/* We need these for tls_sw_fallback handling of other packets */
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 02d7c871862a..006be3963977 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -573,13 +573,13 @@ static void test_lpm_get_next_key(void)

/* add one more element (total two) */
key_p->prefixlen = 24;
- inet_pton(AF_INET, "192.168.0.0", key_p->data);
+ inet_pton(AF_INET, "192.168.128.0", key_p->data);
assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);

memset(key_p, 0, key_size);
assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
- key_p->data[1] == 168 && key_p->data[2] == 0);
+ key_p->data[1] == 168 && key_p->data[2] == 128);

memset(next_key_p, 0, key_size);
assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
@@ -592,7 +592,7 @@ static void test_lpm_get_next_key(void)

/* Add one more element (total three) */
key_p->prefixlen = 24;
- inet_pton(AF_INET, "192.168.128.0", key_p->data);
+ inet_pton(AF_INET, "192.168.0.0", key_p->data);
assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);

memset(key_p, 0, key_size);
@@ -643,6 +643,41 @@ static void test_lpm_get_next_key(void)
assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
errno == ENOENT);

+ /* Add one more element (total five) */
+ key_p->prefixlen = 28;
+ inet_pton(AF_INET, "192.168.1.128", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1 &&
+ next_key_p->data[3] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
/* no exact matching key should return the first one in post order */
key_p->prefixlen = 22;
inet_pton(AF_INET, "192.168.1.0", key_p->data);