Re: Linux 4.6.7

From: Greg KH
Date: Tue Aug 16 2016 - 16:06:11 EST


diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
index 0a94224ad296..9e3c3b33514c 100644
--- a/Documentation/cpu-freq/pcc-cpufreq.txt
+++ b/Documentation/cpu-freq/pcc-cpufreq.txt
@@ -159,8 +159,8 @@ to be strictly associated with a P-state.

2.2 cpuinfo_transition_latency:
-------------------------------
-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
-does not include a field to expose this value currently.
+The cpuinfo_transition_latency field is 0. The PCC specification does
+not include a field to expose this value currently.

2.3 cpuinfo_cur_freq:
---------------------
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 54944c71b819..2a4ee6302122 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt
@@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
"debugpat" boot parameter. With this parameter, various debug messages are
printed to dmesg log.

+PAT Initialization
+------------------
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ MTRR PAT Call Sequence PAT State PAT MSR
+ =========================================================
+ E E MTRR -> PAT init Enabled OS
+ E D MTRR -> PAT init Disabled -
+ D E MTRR -> PAT disable Disabled BIOS
+ D D MTRR -> PAT disable Disabled -
+ - np/E PAT -> PAT disable Disabled BIOS
+ - np/D PAT -> PAT disable Disabled -
+ E !P/E MTRR -> PAT init Disabled BIOS
+ D !P/E MTRR -> PAT disable Disabled BIOS
+ !M !P/E MTRR stub -> PAT disable Disabled BIOS
+
+ Legend
+ ------------------------------------------------
+ E Feature enabled in CPU
+ D Feature disabled/unsupported in CPU
+ np "nopat" boot option specified
+ !P CONFIG_X86_PAT option unset
+ !M CONFIG_MTRR option unset
+ Enabled PAT state set to enabled
+ Disabled PAT state set to disabled
+ OS PAT initializes PAT MSR with OS setting
+ BIOS PAT keeps PAT MSR with BIOS setting
+
diff --git a/Makefile b/Makefile
index bee1a1692fed..5fe9a7a9cd65 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 6
-SUBLEVEL = 6
+SUBLEVEL = 7
EXTRAVERSION =
NAME = Charred Weasel

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 087acb569b63..5f221acd21ae 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
mm_segment_t fs;
long ret, err, i;

- if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+ if (maxevents <= 0 ||
+ maxevents > (INT_MAX/sizeof(*kbuf)) ||
+ maxevents > (INT_MAX/sizeof(*events)))
return -EINVAL;
+ if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+ return -EFAULT;
kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,

if (nsops < 1 || nsops > SEMOPM)
return -EINVAL;
+ if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+ return -EFAULT;
sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
if (!sops)
return -ENOMEM;
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 71f99d5f7a06..6021318bfbb0 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -344,7 +344,7 @@ EXPORT(sysn32_call_table)
PTR sys_ni_syscall /* available, was setaltroot */
PTR sys_add_key
PTR sys_request_key
- PTR sys_keyctl /* 6245 */
+ PTR compat_sys_keyctl /* 6245 */
PTR sys_set_thread_area
PTR sys_inotify_init
PTR sys_inotify_add_watch
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 91b43eea2d5a..71fe3259a5e3 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -500,7 +500,7 @@ EXPORT(sys32_call_table)
PTR sys_ni_syscall /* available, was setaltroot */
PTR sys_add_key /* 4280 */
PTR sys_request_key
- PTR sys_keyctl
+ PTR compat_sys_keyctl
PTR sys_set_thread_area
PTR sys_inotify_init
PTR sys_inotify_add_watch /* 4285 */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7cd32c038286..4e1b060b8481 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -126,7 +126,7 @@ config PPC
select IRQ_FORCED_THREADING
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_SYSCALL_TRACEPOINTS
- select HAVE_BPF_JIT
+ select HAVE_BPF_JIT if CPU_BIG_ENDIAN
select HAVE_ARCH_JUMP_LABEL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 31e4c7e1a4b4..c42627645b54 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -648,7 +648,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
if (pe->type & EEH_PE_VF) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
} else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_lock_rescan_remove();
pcibios_remove_pci_devices(bus);
pci_unlock_rescan_remove();
@@ -698,10 +697,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- if (pe->type & EEH_PE_VF)
+ if (pe->type & EEH_PE_VF) {
eeh_add_virt_device(edev, NULL);
- else
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pcibios_add_pci_devices(bus);
+ }
} else if (frozen_bus && rmv_data->removed) {
pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
ssleep(5);
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4cddd17153fb..f848572169ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -294,7 +294,7 @@
# 285 sys_setaltroot
286 i386 add_key sys_add_key
287 i386 request_key sys_request_key
-288 i386 keyctl sys_keyctl
+288 i386 keyctl sys_keyctl compat_sys_keyctl
289 i386 ioprio_set sys_ioprio_set
290 i386 ioprio_get sys_ioprio_get
291 i386 inotify_init sys_inotify_init
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 9d3a96c4da78..01c2d14ec05f 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -133,13 +133,11 @@ static inline unsigned int x86_cpuid_family(void)
#ifdef CONFIG_MICROCODE
extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
-extern int __init save_microcode_in_initrd(void);
void reload_early_microcode(void);
extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
#else
static inline void __init load_ucode_bsp(void) { }
static inline void load_ucode_ap(void) { }
-static inline int __init save_microcode_in_initrd(void) { return 0; }
static inline void reload_early_microcode(void) { }
static inline bool
get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index b94f6f64e23d..dbff1456d215 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,6 +24,7 @@
#define _ASM_X86_MTRR_H

#include <uapi/asm/mtrr.h>
+#include <asm/pat.h>


/*
@@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
+static inline void mtrr_bp_init(void)
+{
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+}

#define mtrr_ap_init() do {} while (0)
-#define mtrr_bp_init() do {} while (0)
#define set_mtrr_aps_delayed_init() do {} while (0)
#define mtrr_aps_init() do {} while (0)
#define mtrr_bp_restore() do {} while (0)
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index ca6c228d5e62..0b1ff4c1c14e 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -5,8 +5,8 @@
#include <asm/pgtable_types.h>

bool pat_enabled(void);
+void pat_disable(const char *reason);
extern void pat_init(void);
-void pat_init_cache_modes(u64);

extern int reserve_memtype(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index ac360bfbbdb6..12823b6ebd6d 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -175,7 +175,7 @@ void load_ucode_ap(void)
}
}

-int __init save_microcode_in_initrd(void)
+static int __init save_microcode_in_initrd(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;

@@ -691,4 +691,5 @@ int __init microcode_init(void)
return error;

}
+fs_initcall(save_microcode_in_initrd);
late_initcall(microcode_init);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 19f57360dfd2..8d7a29ed9377 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}

+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ prepare_set();
+
+ pat_init();
+
+ post_set();
+ local_irq_restore(flags);
+}
+
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
- unsigned long flags;
unsigned lo, dummy;
unsigned int i;

@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)

mtrr_state_set = 1;

- /* PAT setup for BP. We need to go through sync steps here */
- local_irq_save(flags);
- prepare_set();
-
- pat_init();
-
- post_set();
- local_irq_restore(flags);
-
return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 10f8d4796240..7d393ecdeee6 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();

+ if (mtrr_enabled())
+ mtrr_bp_pat_init();
+
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
}
}

- if (!mtrr_enabled())
+ if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n");
+
+ /*
+ * PAT initialization relies on MTRR's rendezvous handler.
+ * Skip PAT init until the handler can initialize both
+ * features independently.
+ */
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ }
}

void mtrr_ap_init(void)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 951884dcc433..6c7ced07d16d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);

extern void set_mtrr_ops(const struct mtrr_ops *ops);

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 9d56f271d519..6df291c2987c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -696,13 +696,6 @@ void free_initmem(void)
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
/*
- * Remember, initrd memory may contain microcode or other useful things.
- * Before we lose initrd mem, we need to find a place to hold them
- * now that normal virtual memory is enabled.
- */
- save_microcode_in_initrd();
-
- /*
* end could be not aligned, and We can not align that,
* decompresser could be confused by aligned initrd_end
* We already reserve the end partial page before in
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index faec01e7a17d..fb0604f11eec 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -40,11 +40,22 @@
static bool boot_cpu_done;

static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
+static void init_cache_modes(void);

-static inline void pat_disable(const char *reason)
+void pat_disable(const char *reason)
{
+ if (!__pat_enabled)
+ return;
+
+ if (boot_cpu_done) {
+ WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
+ return;
+ }
+
__pat_enabled = 0;
pr_info("x86/PAT: %s\n", reason);
+
+ init_cache_modes();
}

static int __init nopat(char *str)
@@ -181,7 +192,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
* configuration.
* Using lower indices is preferred, so we start with highest index.
*/
-void pat_init_cache_modes(u64 pat)
+static void __init_cache_modes(u64 pat)
{
enum page_cache_mode cache;
char pat_msg[33];
@@ -202,14 +213,11 @@ static void pat_bsp_init(u64 pat)
{
u64 tmp_pat;

- if (!cpu_has_pat) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
pat_disable("PAT not supported by CPU.");
return;
}

- if (!pat_enabled())
- goto done;
-
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
pat_disable("PAT MSR is 0, disabled.");
@@ -218,16 +226,12 @@ static void pat_bsp_init(u64 pat)

wrmsrl(MSR_IA32_CR_PAT, pat);

-done:
- pat_init_cache_modes(pat);
+ __init_cache_modes(pat);
}

static void pat_ap_init(u64 pat)
{
- if (!pat_enabled())
- return;
-
- if (!cpu_has_pat) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
/*
* If this happens we are on a secondary CPU, but switched to
* PAT on the boot CPU. We have no way to undo PAT.
@@ -238,18 +242,32 @@ static void pat_ap_init(u64 pat)
wrmsrl(MSR_IA32_CR_PAT, pat);
}

-void pat_init(void)
+static void init_cache_modes(void)
{
- u64 pat;
- struct cpuinfo_x86 *c = &boot_cpu_data;
+ u64 pat = 0;
+ static int init_cm_done;

- if (!pat_enabled()) {
+ if (init_cm_done)
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_PAT)) {
+ /*
+ * CPU supports PAT. Set PAT table to be consistent with
+ * PAT MSR. This case supports "nopat" boot option, and
+ * virtual machine environments which support PAT without
+ * MTRRs. In specific, Xen has unique setup to PAT MSR.
+ *
+ * If PAT MSR returns 0, it is considered invalid and emulates
+ * as No PAT.
+ */
+ rdmsrl(MSR_IA32_CR_PAT, pat);
+ }
+
+ if (!pat) {
/*
* No PAT. Emulate the PAT table that corresponds to the two
- * cache bits, PWT (Write Through) and PCD (Cache Disable). This
- * setup is the same as the BIOS default setup when the system
- * has PAT but the "nopat" boot option has been specified. This
- * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
+ * cache bits, PWT (Write Through) and PCD (Cache Disable).
+ * This setup is also the same as the BIOS default setup.
*
* PTE encoding:
*
@@ -266,10 +284,36 @@ void pat_init(void)
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+ }
+
+ __init_cache_modes(pat);
+
+ init_cm_done = 1;
+}
+
+/**
+ * pat_init - Initialize PAT MSR and PAT table
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC and WT.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+ u64 pat;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (!pat_enabled()) {
+ init_cache_modes();
+ return;
+ }

- } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
- (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
- ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+ (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+ ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
/*
* PAT support with the lower four entries. Intel Pentium 2,
* 3, M, and 4 are affected by PAT errata, which makes the
@@ -734,25 +778,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (file->f_flags & O_DSYNC)
pcm = _PAGE_CACHE_MODE_UC_MINUS;

-#ifdef CONFIG_X86_32
- /*
- * On the PPro and successors, the MTRRs are used to set
- * memory types for physical addresses outside main memory,
- * so blindly setting UC or PWT on those pages is wrong.
- * For Pentiums and earlier, the surround logic should disable
- * caching for the high addresses through the KEN pin, but
- * we maintain the tradition of paranoia in this code.
- */
- if (!pat_enabled() &&
- !(boot_cpu_has(X86_FEATURE_MTRR) ||
- boot_cpu_has(X86_FEATURE_K6_MTRR) ||
- boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
- boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
- pcm = _PAGE_CACHE_MODE_UC;
- }
-#endif
-
*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
cachemode2protval(pcm));
return 1;
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 4400a43b9e28..f2a990285a5c 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -24,7 +24,6 @@
#include <asm/frame.h>

ENTRY(swsusp_arch_suspend)
- FRAME_BEGIN
movq $saved_context, %rax
movq %rsp, pt_regs_sp(%rax)
movq %rbp, pt_regs_bp(%rax)
@@ -51,6 +50,7 @@ ENTRY(swsusp_arch_suspend)
movq %cr3, %rax
movq %rax, restore_cr3(%rip)

+ FRAME_BEGIN
call swsusp_save
FRAME_END
ret
@@ -111,7 +111,6 @@ ENTRY(core_restore_code)
*/

ENTRY(restore_registers)
- FRAME_BEGIN
/* go back to the original page tables */
movq %rbx, %cr3

@@ -152,6 +151,5 @@ ENTRY(restore_registers)
/* tell the hibernation core that we've just restored the memory */
movq %rax, in_suspend(%rip)

- FRAME_END
ret
ENDPROC(restore_registers)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 880862c7d9dd..d8cca75e3b3e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,7 +75,6 @@
#include <asm/mach_traps.h>
#include <asm/mwait.h>
#include <asm/pci_x86.h>
-#include <asm/pat.h>
#include <asm/cpu.h>

#ifdef CONFIG_ACPI
@@ -1511,7 +1510,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
- u64 pat;
int rc;

if (!xen_start_info)
@@ -1618,13 +1616,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_start_info->nr_pages);
xen_reserve_special_pages();

- /*
- * Modify the cache mode translation tables to match Xen's PAT
- * configuration.
- */
- rdmsrl(MSR_IA32_CR_PAT, pat);
- pat_init_cache_modes(pat);
-
/* keep using Xen gdt for now; no urgent need to change it */

#ifdef CONFIG_X86_32
diff --git a/block/genhd.c b/block/genhd.c
index 9f42526b4d62..3eebd256b765 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -856,6 +856,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v)
if (iter) {
class_dev_iter_exit(iter);
kfree(iter);
+ seqf->private = NULL;
}
}

diff --git a/crypto/gcm.c b/crypto/gcm.c
index bec329b3de8d..d9ea5f9c0574 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,

ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
CRYPTO_ALG_TYPE_HASH,
- CRYPTO_ALG_TYPE_AHASH_MASK);
+ CRYPTO_ALG_TYPE_AHASH_MASK |
+ crypto_requires_sync(algt->type,
+ algt->mask));
if (IS_ERR(ghash_alg))
return PTR_ERR(ghash_alg);

diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index ea5815c5e128..bc769c448d4a 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,

void scatterwalk_done(struct scatter_walk *walk, int out, int more)
{
- if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
+ if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
+ !(walk->offset & (PAGE_SIZE - 1)))
scatterwalk_pagedone(walk, out, more);
}
EXPORT_SYMBOL_GPL(scatterwalk_done);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b583e5336630..e511f34be177 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -722,15 +722,18 @@ retry:
}
}

-static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
+static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
{
const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));

+ if (nbits < 0)
+ return -EINVAL;
+
/* Cap the value to avoid overflows */
nbits = min(nbits, nbits_max);
- nbits = max(nbits, -nbits_max);

credit_entropy_bits(r, nbits);
+ return 0;
}

/*********************************************************************
@@ -1542,8 +1545,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EPERM;
if (get_user(ent_count, p))
return -EFAULT;
- credit_entropy_bits_safe(&input_pool, ent_count);
- return 0;
+ return credit_entropy_bits_safe(&input_pool, ent_count);
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1557,8 +1559,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
size);
if (retval < 0)
return retval;
- credit_entropy_bits_safe(&input_pool, ent_count);
- return 0;
+ return credit_entropy_bits_safe(&input_pool, ent_count);
case RNDZAPENTCNT:
case RNDCLEARPOOL:
/*
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 808a320e9d5d..2a0d58959acf 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->min = policy->cpuinfo.min_freq =
ioread32(&pcch_hdr->minimum_frequency) * 1000;

- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-
pr_debug("init: policy->max is %d, policy->min is %d\n",
policy->max, policy->min);
out:
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 585a3b7915bd..f1b0eafecd0d 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -244,6 +244,13 @@ struct i801_priv {
struct platform_device *mux_pdev;
#endif
struct platform_device *tco_pdev;
+
+ /*
+ * If set to true the host controller registers are reserved for
+ * ACPI AML use. Protected by acpi_lock.
+ */
+ bool acpi_reserved;
+ struct mutex acpi_lock;
};

#define FEATURE_SMBUS_PEC (1 << 0)
@@ -714,9 +721,15 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
{
int hwpec;
int block = 0;
- int ret, xact = 0;
+ int ret = 0, xact = 0;
struct i801_priv *priv = i2c_get_adapdata(adap);

+ mutex_lock(&priv->acpi_lock);
+ if (priv->acpi_reserved) {
+ mutex_unlock(&priv->acpi_lock);
+ return -EBUSY;
+ }
+
hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
&& size != I2C_SMBUS_QUICK
&& size != I2C_SMBUS_I2C_BLOCK_DATA;
@@ -773,7 +786,8 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
default:
dev_err(&priv->pci_dev->dev, "Unsupported transaction %d\n",
size);
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ goto out;
}

if (hwpec) /* enable/disable hardware PEC */
@@ -796,11 +810,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));

if (block)
- return ret;
+ goto out;
if (ret)
- return ret;
+ goto out;
if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
- return 0;
+ goto out;

switch (xact & 0x7f) {
case I801_BYTE: /* Result put in SMBHSTDAT0 */
@@ -812,7 +826,10 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
(inb_p(SMBHSTDAT1(priv)) << 8);
break;
}
- return 0;
+
+out:
+ mutex_unlock(&priv->acpi_lock);
+ return ret;
}


@@ -1249,6 +1266,72 @@ static void i801_add_tco(struct i801_priv *priv)
priv->tco_pdev = pdev;
}

+#ifdef CONFIG_ACPI
+static acpi_status
+i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
+ u64 *value, void *handler_context, void *region_context)
+{
+ struct i801_priv *priv = handler_context;
+ struct pci_dev *pdev = priv->pci_dev;
+ acpi_status status;
+
+ /*
+ * Once BIOS AML code touches the OpRegion we warn and inhibit any
+ * further access from the driver itself. This device is now owned
+ * by the system firmware.
+ */
+ mutex_lock(&priv->acpi_lock);
+
+ if (!priv->acpi_reserved) {
+ priv->acpi_reserved = true;
+
+ dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
+ dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
+ }
+
+ if ((function & ACPI_IO_MASK) == ACPI_READ)
+ status = acpi_os_read_port(address, (u32 *)value, bits);
+ else
+ status = acpi_os_write_port(address, (u32)*value, bits);
+
+ mutex_unlock(&priv->acpi_lock);
+
+ return status;
+}
+
+static int i801_acpi_probe(struct i801_priv *priv)
+{
+ struct acpi_device *adev;
+ acpi_status status;
+
+ adev = ACPI_COMPANION(&priv->pci_dev->dev);
+ if (adev) {
+ status = acpi_install_address_space_handler(adev->handle,
+ ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
+ NULL, priv);
+ if (ACPI_SUCCESS(status))
+ return 0;
+ }
+
+ return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
+}
+
+static void i801_acpi_remove(struct i801_priv *priv)
+{
+ struct acpi_device *adev;
+
+ adev = ACPI_COMPANION(&priv->pci_dev->dev);
+ if (!adev)
+ return;
+
+ acpi_remove_address_space_handler(adev->handle,
+ ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
+}
+#else
+static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
+static inline void i801_acpi_remove(struct i801_priv *priv) { }
+#endif
+
static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
unsigned char temp;
@@ -1266,6 +1349,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
priv->adapter.dev.parent = &dev->dev;
ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
priv->adapter.retries = 3;
+ mutex_init(&priv->acpi_lock);

priv->pci_dev = dev;
switch (dev->device) {
@@ -1328,10 +1412,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
return -ENODEV;
}

- err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
- if (err) {
+ if (i801_acpi_probe(priv))
return -ENODEV;
- }

err = pcim_iomap_regions(dev, 1 << SMBBAR,
dev_driver_string(&dev->dev));
@@ -1340,6 +1422,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
"Failed to request SMBus region 0x%lx-0x%Lx\n",
priv->smba,
(unsigned long long)pci_resource_end(dev, SMBBAR));
+ i801_acpi_remove(priv);
return err;
}

@@ -1404,6 +1487,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
err = i2c_add_adapter(&priv->adapter);
if (err) {
dev_err(&dev->dev, "Failed to add SMBus adapter\n");
+ i801_acpi_remove(priv);
return err;
}

@@ -1422,6 +1506,7 @@ static void i801_remove(struct pci_dev *dev)

i801_del_mux(priv);
i2c_del_adapter(&priv->adapter);
+ i801_acpi_remove(priv);
pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);

platform_device_unregister(priv->tco_pdev);
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index db760e84119f..b8df0f5e8c25 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
if (err < 0)
return err;

- return register_netdevice(bond_dev);
+ err = register_netdevice(bond_dev);
+
+ netif_carrier_off(bond_dev);
+
+ return err;
}

static size_t bond_get_size(const struct net_device *bond_dev)
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 38db2e4d7d54..832401b41b98 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -231,7 +231,7 @@ err_dma:
dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
DMA_TO_DEVICE);

- while (i > 0) {
+ while (i-- > 0) {
int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
struct bgmac_slot_info *slot = &ring->slots[index];
u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 89469d5aae25..40e6f6c11f20 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -791,13 +791,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
* in a bitmap and increasing the chain consumer only
* for the first successive completed entries.
*/
- bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
+ __set_bit(pos, p_spq->p_comp_bitmap);

while (test_bit(p_spq->comp_bitmap_idx,
p_spq->p_comp_bitmap)) {
- bitmap_clear(p_spq->p_comp_bitmap,
- p_spq->comp_bitmap_idx,
- SPQ_RING_SIZE);
+ __clear_bit(p_spq->comp_bitmap_idx,
+ p_spq->p_comp_bitmap);
p_spq->comp_bitmap_idx++;
qed_chain_return_produced(&p_spq->chain);
}
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 8f3c55d03d5d..f58858b7972c 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -914,7 +914,6 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
}

macsec_skb_cb(skb)->req = req;
- macsec_skb_cb(skb)->rx_sa = rx_sa;
skb->dev = dev;
aead_request_set_callback(req, 0, macsec_decrypt_done, skb);

@@ -1141,6 +1140,8 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
}
}

+ macsec_skb_cb(skb)->rx_sa = rx_sa;
+
/* Disabled && !changed text => skip validation */
if (hdr->tci_an & MACSEC_TCI_C ||
secy->validate_frames != MACSEC_VALIDATE_DISABLED)
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index f572b31a2b20..9ab88e1ed394 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2404,8 +2404,6 @@ ppp_unregister_channel(struct ppp_channel *chan)
spin_lock_bh(&pn->all_channels_lock);
list_del(&pch->list);
spin_unlock_bh(&pn->all_channels_lock);
- put_net(pch->chan_net);
- pch->chan_net = NULL;

pch->file.dead = 1;
wake_up_interruptible(&pch->file.rwait);
@@ -2999,6 +2997,9 @@ ppp_disconnect_channel(struct channel *pch)
*/
static void ppp_destroy_channel(struct channel *pch)
{
+ put_net(pch->chan_net);
+ pch->chan_net = NULL;
+
atomic_dec(&channel_count);

if (!pch->file.dead) {
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index 8e343a3ca873..9d2704c83fa7 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -33,7 +33,9 @@ struct nd_pfn_sb {
/* minor-version-1 additions for section alignment */
__le32 start_pad;
__le32 end_trunc;
- u8 padding[4004];
+ /* minor-version-2 record the base alignment of the mapping */
+ __le32 align;
+ u8 padding[4000];
__le64 checksum;
};

diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index e071e214feba..84f2372dd0bb 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -329,6 +329,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
int nd_pfn_validate(struct nd_pfn *nd_pfn)
{
u64 checksum, offset;
+ unsigned long align;
+ enum nd_pfn_mode mode;
struct nd_namespace_io *nsio;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
struct nd_namespace_common *ndns = nd_pfn->ndns;
@@ -360,6 +362,9 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
pfn_sb->end_trunc = 0;
}

+ if (__le16_to_cpu(pfn_sb->version_minor) < 2)
+ pfn_sb->align = 0;
+
switch (le32_to_cpu(pfn_sb->mode)) {
case PFN_MODE_RAM:
case PFN_MODE_PMEM:
@@ -368,20 +373,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
return -ENXIO;
}

+ align = le32_to_cpu(pfn_sb->align);
+ offset = le64_to_cpu(pfn_sb->dataoff);
+ if (align == 0)
+ align = 1UL << ilog2(offset);
+ mode = le32_to_cpu(pfn_sb->mode);
+
if (!nd_pfn->uuid) {
- /* from probe we allocate */
+ /*
+ * When probing a namepace via nd_pfn_probe() the uuid
+ * is NULL (see: nd_pfn_devinit()) we init settings from
+ * pfn_sb
+ */
nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
if (!nd_pfn->uuid)
return -ENOMEM;
+ nd_pfn->align = align;
+ nd_pfn->mode = mode;
} else {
- /* from init we validate */
+ /*
+ * When probing a pfn / dax instance we validate the
+ * live settings against the pfn_sb
+ */
if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
return -ENODEV;
+
+ /*
+ * If the uuid validates, but other settings mismatch
+ * return EINVAL because userspace has managed to change
+ * the configuration without specifying new
+ * identification.
+ */
+ if (nd_pfn->align != align || nd_pfn->mode != mode) {
+ dev_err(&nd_pfn->dev,
+ "init failed, settings mismatch\n");
+ dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
+ nd_pfn->align, align, nd_pfn->mode,
+ mode);
+ return -EINVAL;
+ }
}

- if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
+ if (align > nvdimm_namespace_capacity(ndns)) {
dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
- nd_pfn->align, nvdimm_namespace_capacity(ndns));
+ align, nvdimm_namespace_capacity(ndns));
return -EINVAL;
}

@@ -391,7 +426,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
* namespace has changed since the pfn superblock was
* established.
*/
- offset = le64_to_cpu(pfn_sb->dataoff);
nsio = to_nd_namespace_io(&ndns->dev);
if (offset >= resource_size(&nsio->res)) {
dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
@@ -399,10 +433,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
return -EBUSY;
}

- nd_pfn->align = 1UL << ilog2(offset);
- if (!is_power_of_2(offset) || offset < PAGE_SIZE) {
- dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
- offset);
+ if ((align && !IS_ALIGNED(offset, align))
+ || !IS_ALIGNED(offset, PAGE_SIZE)) {
+ dev_err(&nd_pfn->dev,
+ "bad offset: %#llx dax disabled align: %#lx\n",
+ offset, align);
return -ENXIO;
}

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 92f536596b24..368efac7a950 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -426,9 +426,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
pfn_sb->version_major = cpu_to_le16(1);
- pfn_sb->version_minor = cpu_to_le16(1);
+ pfn_sb->version_minor = cpu_to_le16(2);
pfn_sb->start_pad = cpu_to_le32(start_pad);
pfn_sb->end_trunc = cpu_to_le32(end_trunc);
+ pfn_sb->align = cpu_to_le32(nd_pfn->align);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);

@@ -501,7 +502,6 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
pmem = dev_get_drvdata(dev);
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
pmem->pfn_pad = start_pad + end_trunc;
- nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
if (nd_pfn->mode == PFN_MODE_RAM) {
if (pmem->data_offset < SZ_8K)
return -EINVAL;
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 6c7fe4778793..7e156a0b65dd 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -140,7 +140,6 @@ static const struct regulator_ops rpm_smps_ldo_ops = {
.enable = rpm_reg_enable,
.disable = rpm_reg_disable,
.is_enabled = rpm_reg_is_enabled,
- .list_voltage = regulator_list_voltage_linear_range,

.get_voltage = rpm_reg_get_voltage,
.set_voltage = rpm_reg_set_voltage,
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index ae8a70f703eb..23e60feb81bb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -678,8 +678,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- bool sc4_bit = has_sc4_bit(packet);
- u8 sc;
+ u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
u32 bth1;
int is_mcast;
struct ib_grh *grh = NULL;
@@ -697,10 +696,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
*/
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- u8 sl, sc5;
+ u8 sl;

- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- sc5 |= sc4_bit;
sl = ibp->sc_to_sl[sc5];

process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
@@ -717,10 +714,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)

if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
u16 slid = be16_to_cpu(hdr->lrh[3]);
- u8 sc5;
-
- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- sc5 |= sc4_bit;

return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
}
@@ -745,10 +738,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (qp->ibqp.qp_num > 1) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u16 slid;
- u8 sc5;
-
- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- sc5 |= sc4_bit;

slid = be16_to_cpu(hdr->lrh[3]);
if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
@@ -790,10 +779,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
/* Received on QP0, and so by definition, this is an SMP */
struct opa_smp *smp = (struct opa_smp *)data;
u16 slid = be16_to_cpu(hdr->lrh[3]);
- u8 sc5;
-
- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- sc5 |= sc4_bit;

if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
goto drop;
@@ -890,9 +875,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
}

wc.slid = be16_to_cpu(hdr->lrh[3]);
- sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- sc |= sc4_bit;
- wc.sl = ibp->sc_to_sl[sc];
+ wc.sl = ibp->sc_to_sl[sc5];

/*
* Save the LMC lower bits if the destination LID is a unicast LID.
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c
index bc95c4112c61..d8fb056526f8 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.c
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.c
@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx)

struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
struct rvt_qp *qp)
+ __must_hold(&qp->s_lock)
{
struct verbs_txreq *tx = ERR_PTR(-EBUSY);
- unsigned long flags;

- spin_lock_irqsave(&qp->s_lock, flags);
write_seqlock(&dev->iowait_lock);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct hfi1_qp_priv *priv;
@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
}
out:
write_sequnlock(&dev->iowait_lock);
- spin_unlock_irqrestore(&qp->s_lock, flags);
return tx;
}

diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..a1d6e0807f97 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.h
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.h
@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,

static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
struct rvt_qp *qp)
+ __must_hold(&qp->slock)
{
struct verbs_txreq *tx;
struct hfi1_qp_priv *priv = qp->priv;
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index 0ff27818bb87..25b9f178c8d3 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -299,6 +299,8 @@ static int mvebu_uart_startup(struct uart_port *port)
static void mvebu_uart_shutdown(struct uart_port *port)
{
writel(0, port->membase + UART_CTRL);
+
+ free_irq(port->irq, port);
}

static void mvebu_uart_set_termios(struct uart_port *port,
diff --git a/fs/dcache.c b/fs/dcache.c
index 44008e3fafc4..5612631b7b46 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -578,7 +578,6 @@ static struct dentry *dentry_kill(struct dentry *dentry)

failed:
spin_unlock(&dentry->d_lock);
- cpu_relax();
return dentry; /* try again with same dentry */
}

@@ -752,6 +751,8 @@ void dput(struct dentry *dentry)
return;

repeat:
+ might_sleep();
+
rcu_read_lock();
if (likely(fast_dput(dentry))) {
rcu_read_unlock();
@@ -783,8 +784,10 @@ repeat:

kill_it:
dentry = dentry_kill(dentry);
- if (dentry)
+ if (dentry) {
+ cond_resched();
goto repeat;
+ }
}
EXPORT_SYMBOL(dput);

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index fe1f50fe764f..f97110461c19 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
memset(bh->b_data, 0, sb->s_blocksize);

bit_max = ext4_num_base_meta_clusters(sb, block_group);
+ if ((bit_max >> 3) >= bh->b_size)
+ return -EFSCORRUPTED;
+
for (bit = 0; bit < bit_max; bit++)
ext4_set_bit(bit, bh->b_data);

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index de692b91c166..8211698600c2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
- ext4_lblk_t last = lblock + len - 1;

- if (len == 0 || lblock > last)
+ /*
+ * We allow neither:
+ * - zero length
+ * - overflow/wrap-around
+ */
+ if (lblock + len <= lblock)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 250c2df04a92..58197a7c3c2c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inode)
* Note that directories do not have this problem because they
* don't use page cache.
*/
- if (ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
- inode->i_ino != EXT4_JOURNAL_INO) {
+ if (inode->i_ino != EXT4_JOURNAL_INO &&
+ ext4_should_journal_data(inode) &&
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;

@@ -2741,13 +2741,36 @@ retry:
done = true;
}
}
- ext4_journal_stop(handle);
+ /*
+ * Caution: If the handle is synchronous,
+ * ext4_journal_stop() can wait for transaction commit
+ * to finish which may depend on writeback of pages to
+ * complete or on page lock to be released. In that
+ * case, we have to wait until after after we have
+ * submitted all the IO, released page locks we hold,
+ * and dropped io_end reference (for extent conversion
+ * to be able to complete) before stopping the handle.
+ */
+ if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
+ ext4_journal_stop(handle);
+ handle = NULL;
+ }
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, give_up_on_write);
- /* Drop our io_end reference we got from init */
- ext4_put_io_end(mpd.io_submit.io_end);
+ /*
+ * Drop our io_end reference we got from init. We have
+ * to be careful and use deferred io_end finishing if
+ * we are still holding the transaction as we can
+ * release the last reference to io_end which may end
+ * up doing unwritten extent conversion.
+ */
+ if (handle) {
+ ext4_put_io_end_defer(mpd.io_submit.io_end);
+ ext4_journal_stop(handle);
+ } else
+ ext4_put_io_end(mpd.io_submit.io_end);

if (ret == -ENOSPC && sbi->s_journal) {
/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9d26fa2188f6..5f7ae0898ef7 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2939,7 +2939,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"fs metadata", block, block+len);
/* File system mounted not to panic on error
- * Fix the bitmap and repeat the block allocation
+ * Fix the bitmap and return EFSCORRUPTED
* We leak some of the blocks here.
*/
ext4_lock_group(sb, ac->ac_b_ex.fe_group);
@@ -2948,7 +2948,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!err)
- err = -EAGAIN;
+ err = -EFSCORRUPTED;
goto out_err;
}

@@ -4513,18 +4513,7 @@ repeat:
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
- if (*errp == -EAGAIN) {
- /*
- * drop the reference that we took
- * in ext4_mb_use_best_found
- */
- ext4_mb_release_context(ac);
- ac->ac_b_ex.fe_group = 0;
- ac->ac_b_ex.fe_start = 0;
- ac->ac_b_ex.fe_len = 0;
- ac->ac_status = AC_STATUS_CONTINUE;
- goto repeat;
- } else if (*errp) {
+ if (*errp) {
ext4_discard_allocated_blocks(ac);
goto errout;
} else {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 304c712dbe12..7fca76b6cd61 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2277,6 +2277,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
while (es->s_last_orphan) {
struct inode *inode;

+ /*
+ * We may have encountered an error during cleanup; if
+ * so, skip the rest.
+ */
+ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+ jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+ es->s_last_orphan = 0;
+ break;
+ }
+
inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
if (IS_ERR(inode)) {
es->s_last_orphan = 0;
@@ -3415,6 +3425,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}

+ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
+ ext4_msg(sb, KERN_ERR,
+ "Number of reserved GDT blocks insanely large: %d",
+ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
+ goto failed_mount;
+ }
+
if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
if (blocksize != PAGE_SIZE) {
ext4_msg(sb, KERN_ERR,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index dcad5e210525..3c7675fa664f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
fuse_sync_writes(inode);
inode_unlock(inode);

+ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
+ err = -ENOSPC;
+ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
+ err = -EIO;
+ if (err)
+ return err;
+
req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
@@ -462,6 +471,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
goto out;

fuse_sync_writes(inode);
+
+ /*
+ * Due to implementation of fuse writeback
+ * filemap_write_and_wait_range() does not catch errors.
+ * We have to do this directly after fuse_sync_writes()
+ */
+ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
+ err = -ENOSPC;
+ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
+ err = -EIO;
+ if (err)
+ goto out;
+
err = sync_inode_metadata(inode, 1);
if (err)
goto out;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1ce67668a8e1..d302a5fff9ba 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
- FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+ FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
req->in.h.opcode = FUSE_INIT;
diff --git a/fs/inode.c b/fs/inode.c
index 721fa18ead59..fd832eba4596 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -344,7 +344,7 @@ EXPORT_SYMBOL(inc_nlink);
void address_space_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
spin_lock_init(&mapping->tree_lock);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 36661acaf33b..5e2c8c814e1b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -78,6 +78,15 @@
#define VSD_FIRST_SECTOR_OFFSET 32768
#define VSD_MAX_SECTOR_OFFSET 0x800000

+/*
+ * Maximum number of Terminating Descriptor / Logical Volume Integrity
+ * Descriptor redirections. The chosen numbers are arbitrary - just that we
+ * hopefully don't limit any real use of rewritten inode on write-once media
+ * but avoid looping for too long on corrupted media.
+ */
+#define UDF_MAX_TD_NESTING 64
+#define UDF_MAX_LVID_NESTING 1000
+
enum { UDF_MAX_LINKS = 0xffff };

/* These are the "meat" - everything else is stuffing */
@@ -1541,42 +1550,52 @@ out_bh:
}

/*
- * udf_load_logicalvolint
- *
+ * Find the prevailing Logical Volume Integrity Descriptor.
*/
static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc)
{
- struct buffer_head *bh = NULL;
+ struct buffer_head *bh, *final_bh;
uint16_t ident;
struct udf_sb_info *sbi = UDF_SB(sb);
struct logicalVolIntegrityDesc *lvid;
+ int indirections = 0;
+
+ while (++indirections <= UDF_MAX_LVID_NESTING) {
+ final_bh = NULL;
+ while (loc.extLength > 0 &&
+ (bh = udf_read_tagged(sb, loc.extLocation,
+ loc.extLocation, &ident))) {
+ if (ident != TAG_IDENT_LVID) {
+ brelse(bh);
+ break;
+ }
+
+ brelse(final_bh);
+ final_bh = bh;

- while (loc.extLength > 0 &&
- (bh = udf_read_tagged(sb, loc.extLocation,
- loc.extLocation, &ident)) &&
- ident == TAG_IDENT_LVID) {
- sbi->s_lvid_bh = bh;
- lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+ loc.extLength -= sb->s_blocksize;
+ loc.extLocation++;
+ }

- if (lvid->nextIntegrityExt.extLength)
- udf_load_logicalvolint(sb,
- leea_to_cpu(lvid->nextIntegrityExt));
+ if (!final_bh)
+ return;

- if (sbi->s_lvid_bh != bh)
- brelse(bh);
- loc.extLength -= sb->s_blocksize;
- loc.extLocation++;
+ brelse(sbi->s_lvid_bh);
+ sbi->s_lvid_bh = final_bh;
+
+ lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data;
+ if (lvid->nextIntegrityExt.extLength == 0)
+ return;
+
+ loc = leea_to_cpu(lvid->nextIntegrityExt);
}
- if (sbi->s_lvid_bh != bh)
- brelse(bh);
+
+ udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n",
+ UDF_MAX_LVID_NESTING);
+ brelse(sbi->s_lvid_bh);
+ sbi->s_lvid_bh = NULL;
}

-/*
- * Maximum number of Terminating Descriptor redirections. The chosen number is
- * arbitrary - just that we hopefully don't limit any real use of rewritten
- * inode on write-once media but avoid looping for too long on corrupted media.
- */
-#define UDF_MAX_TD_NESTING 64

/*
* Process a main/reserve volume descriptor sequence.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 78181a88903b..54355a7e46de 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4156,6 +4156,13 @@ static inline void netif_keep_dst(struct net_device *dev)
dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
}

+/* return true if dev can't cope with mtu frames that need vlan tag insertion */
+static inline bool netif_reduces_vlan_mtu(struct net_device *dev)
+{
+ /* TODO: reserve and use an additional IFF bit, if we get more users */
+ return dev->priv_flags & IFF_MACSEC;
+}
+
extern struct pernet_operations __net_initdata loopback_net_ops;

/* Logging, debugging and troubleshooting/diagnostic helpers. */
diff --git a/ipc/msg.c b/ipc/msg.c
index 1471db9a7e61..c6521c205cb4 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
rcu_read_lock();
ipc_lock_object(&msq->q_perm);

- ipc_rcu_putref(msq, ipc_rcu_free);
+ ipc_rcu_putref(msq, msg_rcu_free);
/* raced with RMID? */
if (!ipc_valid_object(&msq->q_perm)) {
err = -EIDRM;
diff --git a/ipc/sem.c b/ipc/sem.c
index b3757ea0694b..5d2f875e8e2e 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -449,7 +449,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
static inline void sem_lock_and_putref(struct sem_array *sma)
{
sem_lock(sma, NULL, -1);
- ipc_rcu_putref(sma, ipc_rcu_free);
+ ipc_rcu_putref(sma, sem_rcu_free);
}

static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -1392,7 +1392,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
rcu_read_unlock();
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if (sem_io == NULL) {
- ipc_rcu_putref(sma, ipc_rcu_free);
+ ipc_rcu_putref(sma, sem_rcu_free);
return -ENOMEM;
}

@@ -1426,20 +1426,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
if (nsems > SEMMSL_FAST) {
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if (sem_io == NULL) {
- ipc_rcu_putref(sma, ipc_rcu_free);
+ ipc_rcu_putref(sma, sem_rcu_free);
return -ENOMEM;
}
}

if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
- ipc_rcu_putref(sma, ipc_rcu_free);
+ ipc_rcu_putref(sma, sem_rcu_free);
err = -EFAULT;
goto out_free;
}

for (i = 0; i < nsems; i++) {
if (sem_io[i] > SEMVMX) {
- ipc_rcu_putref(sma, ipc_rcu_free);
+ ipc_rcu_putref(sma, sem_rcu_free);
err = -ERANGE;
goto out_free;
}
@@ -1731,7 +1731,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
/* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
if (!new) {
- ipc_rcu_putref(sma, ipc_rcu_free);
+ ipc_rcu_putref(sma, sem_rcu_free);
return ERR_PTR(-ENOMEM);
}

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1624c4117961..9b9be3ffa1f6 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -228,10 +228,11 @@ radix_tree_node_alloc(struct radix_tree_root *root)

/*
* Even if the caller has preloaded, try to allocate from the
- * cache first for the new node to get accounted.
+ * cache first for the new node to get accounted to the memory
+ * cgroup.
*/
ret = kmem_cache_alloc(radix_tree_node_cachep,
- gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN);
+ gfp_mask | __GFP_NOWARN);
if (ret)
goto out;

@@ -254,8 +255,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
kmemleak_update_trace(ret);
goto out;
}
- ret = kmem_cache_alloc(radix_tree_node_cachep,
- gfp_mask | __GFP_ACCOUNT);
+ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
out:
BUG_ON(radix_tree_is_indirect_ptr(ret));
return ret;
@@ -302,6 +302,12 @@ static int __radix_tree_preload(gfp_t gfp_mask)
struct radix_tree_node *node;
int ret = -ENOMEM;

+ /*
+ * Nodes preloaded by one cgroup can be be used by another cgroup, so
+ * they should never be accounted to any particular memory cgroup.
+ */
+ gfp_mask &= ~__GFP_ACCOUNT;
+
preempt_disable();
rtp = this_cpu_ptr(&radix_tree_preloads);
while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bf860dbdd26e..eb2d761a83a2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4064,14 +4064,32 @@ static struct cftype mem_cgroup_legacy_files[] = {

static DEFINE_IDR(mem_cgroup_idr);

-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
{
- atomic_inc(&memcg->id.ref);
+ atomic_add(n, &memcg->id.ref);
}

-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
{
- if (atomic_dec_and_test(&memcg->id.ref)) {
+ while (!atomic_inc_not_zero(&memcg->id.ref)) {
+ /*
+ * The root cgroup cannot be destroyed, so it's refcount must
+ * always be >= 1.
+ */
+ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+ VM_BUG_ON(1);
+ break;
+ }
+ memcg = parent_mem_cgroup(memcg);
+ if (!memcg)
+ memcg = root_mem_cgroup;
+ }
+ return memcg;
+}
+
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
+{
+ if (atomic_sub_and_test(n, &memcg->id.ref)) {
idr_remove(&mem_cgroup_idr, memcg->id.id);
memcg->id.id = 0;

@@ -4080,6 +4098,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg)
}
}

+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+ mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+ mem_cgroup_id_put_many(memcg, 1);
+}
+
/**
* mem_cgroup_from_id - look up a memcg from a memcg id
* @id: the memcg id to look up
@@ -4716,6 +4744,8 @@ static void __mem_cgroup_clear_mc(void)
if (!mem_cgroup_is_root(mc.from))
page_counter_uncharge(&mc.from->memsw, mc.moved_swap);

+ mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
/*
* we charged both to->memory and to->memsw, so we
* should uncharge to->memory.
@@ -4723,9 +4753,9 @@ static void __mem_cgroup_clear_mc(void)
if (!mem_cgroup_is_root(mc.to))
page_counter_uncharge(&mc.to->memory, mc.moved_swap);

- css_put_many(&mc.from->css, mc.moved_swap);
+ mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+ css_put_many(&mc.to->css, mc.moved_swap);

- /* we've already done css_get(mc.to) */
mc.moved_swap = 0;
}
memcg_oom_recover(from);
@@ -5785,7 +5815,7 @@ subsys_initcall(mem_cgroup_init);
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *swap_memcg;
unsigned short oldid;

VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5800,16 +5830,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!memcg)
return;

- mem_cgroup_id_get(memcg);
- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ /*
+ * In case the memcg owning these pages has been offlined and doesn't
+ * have an ID allocated to it anymore, charge the closest online
+ * ancestor for the swap instead and transfer the memory+swap charge.
+ */
+ swap_memcg = mem_cgroup_id_get_online(memcg);
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(memcg, true);
+ mem_cgroup_swap_statistics(swap_memcg, true);

page->mem_cgroup = NULL;

if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);

+ if (memcg != swap_memcg) {
+ if (!mem_cgroup_is_root(swap_memcg))
+ page_counter_charge(&swap_memcg->memsw, 1);
+ page_counter_uncharge(&memcg->memsw, 1);
+ }
+
/*
* Interrupts should be disabled here because the caller holds the
* mapping->tree_lock lock which is taken with interrupts-off. It is
@@ -5848,11 +5889,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
if (!memcg)
return 0;

+ memcg = mem_cgroup_id_get_online(memcg);
+
if (!mem_cgroup_is_root(memcg) &&
- !page_counter_try_charge(&memcg->swap, 1, &counter))
+ !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+ mem_cgroup_id_put(memcg);
return -ENOMEM;
+ }

- mem_cgroup_id_get(memcg);
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);
diff --git a/mm/mempool.c b/mm/mempool.c
index 9b7a14a791cc..fd3a393b9eea 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -310,7 +310,7 @@ EXPORT_SYMBOL(mempool_resize);
* returns NULL. Note that due to preallocation, this function
* *never* fails when called from process contexts. (it might
* fail if called from an IRQ context.)
- * Note: neither __GFP_NOMEMALLOC nor __GFP_ZERO are supported.
+ * Note: using __GFP_ZERO is not supported.
*/
void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
{
@@ -319,27 +319,16 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
wait_queue_t wait;
gfp_t gfp_temp;

- /* If oom killed, memory reserves are essential to prevent livelock */
- VM_WARN_ON_ONCE(gfp_mask & __GFP_NOMEMALLOC);
- /* No element size to zero on allocation */
VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
-
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);

+ gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
gfp_mask |= __GFP_NOWARN; /* failures are OK */

gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);

repeat_alloc:
- if (likely(pool->curr_nr)) {
- /*
- * Don't allocate from emergency reserves if there are
- * elements available. This check is racy, but it will
- * be rechecked each loop.
- */
- gfp_temp |= __GFP_NOMEMALLOC;
- }

element = pool->alloc(gfp_temp, pool->pool_data);
if (likely(element != NULL))
@@ -363,12 +352,11 @@ repeat_alloc:
* We use gfp mask w/o direct reclaim or IO for the first round. If
* alloc failed with that and @pool was empty, retry immediately.
*/
- if ((gfp_temp & ~__GFP_NOMEMALLOC) != gfp_mask) {
+ if (gfp_temp != gfp_mask) {
spin_unlock_irqrestore(&pool->lock, flags);
gfp_temp = gfp_mask;
goto repeat_alloc;
}
- gfp_temp = gfp_mask;

/* We must not sleep if !__GFP_DIRECT_RECLAIM */
if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e7e62570bdb8..3a573a2dcee2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -146,10 +146,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,

static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
{
- /* TODO: gotta make sure the underlying layer can handle it,
- * maybe an IFF_VLAN_CAPABLE flag for devices?
- */
- if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+ unsigned int max_mtu = real_dev->mtu;
+
+ if (netif_reduces_vlan_mtu(real_dev))
+ max_mtu -= VLAN_HLEN;
+ if (max_mtu < new_mtu)
return -ERANGE;

dev->mtu = new_mtu;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c92b52f37d38..1270207f3d7c 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev;
+ unsigned int max_mtu;
__be16 proto;
int err;

@@ -144,9 +145,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
return err;

+ max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN :
+ real_dev->mtu;
if (!tb[IFLA_MTU])
- dev->mtu = real_dev->mtu;
- else if (dev->mtu > real_dev->mtu)
+ dev->mtu = max_mtu;
+ else if (dev->mtu > max_mtu)
return -EINVAL;

err = vlan_changelink(dev, tb, data);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 160797722228..b32f5a4750bf 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -213,6 +213,16 @@ drop:
}
EXPORT_SYMBOL_GPL(br_handle_frame_finish);

+static void __br_handle_local_finish(struct sk_buff *skb)
+{
+ struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+ u16 vid = 0;
+
+ /* check if vlan is allowed, to avoid spoofing */
+ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
+ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+}
+
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -279,6 +289,14 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
case 0x01: /* IEEE MAC (Pause) */
goto drop;

+ case 0x0E: /* 802.1AB LLDP */
+ fwd_mask |= p->br->group_fwd_mask;
+ if (fwd_mask & (1u << dest[5]))
+ goto forward;
+ *pskb = skb;
+ __br_handle_local_finish(skb);
+ return RX_HANDLER_PASS;
+
default:
/* Allow selective forwarding for most other protocols */
fwd_mask |= p->br->group_fwd_mask;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c124c3c12f7c..e2e78843301c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);

/* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 100;
+int sysctl_tcp_challenge_ack_limit = 1000;

int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
@@ -3423,6 +3423,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
return flag;
}

+static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+ u32 *last_oow_ack_time)
+{
+ if (*last_oow_ack_time) {
+ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ NET_INC_STATS_BH(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+ }
+
+ *last_oow_ack_time = tcp_time_stamp;
+
+ return false; /* not rate-limited: go ahead, send dupack now! */
+}
+
/* Return true if we're currently rate-limiting out-of-window ACKs and
* thus shouldn't send a dupack right now. We rate-limit dupacks in
* response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
@@ -3436,21 +3453,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
/* Data packets without SYNs are not likely part of an ACK loop. */
if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
!tcp_hdr(skb)->syn)
- goto not_rate_limited;
-
- if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
-
- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
- NET_INC_STATS_BH(net, mib_idx);
- return true; /* rate-limited: don't send yet! */
- }
- }
-
- *last_oow_ack_time = tcp_time_stamp;
+ return false;

-not_rate_limited:
- return false; /* not rate-limited: go ahead, send dupack now! */
+ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
}

/* RFC 5961 7 [ACK Throttling] */
@@ -3460,21 +3465,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
static u32 challenge_timestamp;
static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk);
- u32 now;
+ u32 count, now;

/* First check our per-socket dupack rate limit. */
- if (tcp_oow_rate_limited(sock_net(sk), skb,
- LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
- &tp->last_oow_ack_time))
+ if (__tcp_oow_rate_limited(sock_net(sk),
+ LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+ &tp->last_oow_ack_time))
return;

- /* Then check the check host-wide RFC 5961 rate limit. */
+ /* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
+ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+
challenge_timestamp = now;
- challenge_count = 0;
+ WRITE_ONCE(challenge_count, half +
+ prandom_u32_max(sysctl_tcp_challenge_ack_limit));
}
- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+ count = READ_ONCE(challenge_count);
+ if (count > 0) {
+ WRITE_ONCE(challenge_count, count - 1);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 79a03b87a771..7b8e903b2a97 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -236,7 +236,8 @@ void tcp_select_initial_window(int __space, __u32 mss,
/* Set window scaling on max possible window
* See RFC1323 for an explanation of the limit to 14
*/
- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+ space = max_t(u32, space, sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8ec4b3089e20..e3fc0cdf82a9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3563,6 +3563,10 @@ restart:
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
+ } else {
+ if (idev->cnf.forwarding)
+ addrconf_leave_anycast(ifa);
+ addrconf_leave_solict(ifa->idev, &ifa->addr);
}

write_lock_bh(&idev->lock);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 923abd6b3064..8d2f7c9b491d 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
}

/* Check if we have opened a local TSAP */
- if (!self->tsap)
- irda_open_tsap(self, LSAP_ANY, addr->sir_name);
+ if (!self->tsap) {
+ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
+ if (err)
+ goto out;
+ }

/* Move to connecting socket, start sending Connect Requests */
sock->state = SS_CONNECTING;
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index ad4fa49ad1db..9068369f8a1b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -331,6 +331,7 @@ static int aa_fs_seq_hash_show(struct seq_file *seq, void *v)
seq_printf(seq, "%.2x", profile->hash[i]);
seq_puts(seq, "\n");
}
+ aa_put_profile(profile);

return 0;
}