[RFC][PATCH -v2 2/4] locking/mutex: Rework mutex::owner

From: Peter Zijlstra
Date: Thu Aug 25 2016 - 14:47:18 EST


There's a number of iffy in mutex because mutex::count and
mutex::owner are two different fields; this too is the reason
MUTEX_SPIN_ON_OWNER and DEBUG_MUTEX are mutually exclusive.

Cure this by folding them into a single atomic_long_t field.

This nessecairly kills all the architecture specific mutex code.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/alpha/include/asm/mutex.h | 9 -
arch/arc/include/asm/mutex.h | 18 --
arch/arm/include/asm/mutex.h | 21 --
arch/arm64/include/asm/Kbuild | 1
arch/avr32/include/asm/mutex.h | 9 -
arch/blackfin/include/asm/Kbuild | 1
arch/c6x/include/asm/mutex.h | 6
arch/cris/include/asm/mutex.h | 9 -
arch/frv/include/asm/mutex.h | 9 -
arch/h8300/include/asm/mutex.h | 9 -
arch/hexagon/include/asm/mutex.h | 8
arch/ia64/include/asm/mutex.h | 90 ----------
arch/m32r/include/asm/mutex.h | 9 -
arch/m68k/include/asm/Kbuild | 1
arch/metag/include/asm/Kbuild | 1
arch/microblaze/include/asm/mutex.h | 1
arch/mips/include/asm/Kbuild | 1
arch/mn10300/include/asm/mutex.h | 16 -
arch/nios2/include/asm/mutex.h | 1
arch/openrisc/include/asm/mutex.h | 27 ---
arch/parisc/include/asm/Kbuild | 1
arch/powerpc/include/asm/mutex.h | 132 ---------------
arch/s390/include/asm/mutex.h | 9 -
arch/score/include/asm/mutex.h | 6
arch/sh/include/asm/mutex-llsc.h | 109 ------------
arch/sh/include/asm/mutex.h | 12 -
arch/sparc/include/asm/Kbuild | 1
arch/tile/include/asm/Kbuild | 1
arch/um/include/asm/Kbuild | 1
arch/unicore32/include/asm/mutex.h | 20 --
arch/x86/include/asm/mutex.h | 5
arch/x86/include/asm/mutex_32.h | 110 ------------
arch/x86/include/asm/mutex_64.h | 127 --------------
arch/xtensa/include/asm/mutex.h | 9 -
include/asm-generic/mutex-dec.h | 88 ----------
include/asm-generic/mutex-null.h | 19 --
include/asm-generic/mutex-xchg.h | 120 --------------
include/asm-generic/mutex.h | 9 -
include/linux/mutex-debug.h | 24 --
include/linux/mutex.h | 46 +++--
kernel/locking/mutex-debug.c | 13 -
kernel/locking/mutex-debug.h | 10 -
kernel/locking/mutex.c | 307 ++++++++++++++----------------------
kernel/locking/mutex.h | 26 ---
kernel/sched/core.c | 2
45 files changed, 155 insertions(+), 1299 deletions(-)

--- a/arch/alpha/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/arc/include/asm/mutex.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * xchg() based mutex fast path maintains a state of 0 or 1, as opposed to
- * atomic dec based which can "count" any number of lock contenders.
- * This ideally needs to be fixed in core, but for now switching to dec ver.
- */
-#if defined(CONFIG_SMP) && (CONFIG_NR_CPUS > 2)
-#include <asm-generic/mutex-dec.h>
-#else
-#include <asm-generic/mutex-xchg.h>
-#endif
--- a/arch/arm/include/asm/mutex.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * arch/arm/include/asm/mutex.h
- *
- * ARM optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
-/*
- * On pre-ARMv6 hardware this results in a swp-based implementation,
- * which is the most efficient. For ARMv6+, we have exclusive memory
- * accessors and use atomic_dec to avoid the extra xchg operations
- * on the locking slowpaths.
- */
-#if __LINUX_ARM_ARCH__ < 6
-#include <asm-generic/mutex-xchg.h>
-#else
-#include <asm-generic/mutex-dec.h>
-#endif
-#endif /* _ASM_MUTEX_H */
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -26,7 +26,6 @@ generic-y += mm-arch-hooks.h
generic-y += mman.h
generic-y += msgbuf.h
generic-y += msi.h
-generic-y += mutex.h
generic-y += pci.h
generic-y += poll.h
generic-y += preempt.h
--- a/arch/avr32/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -24,7 +24,6 @@ generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += mman.h
generic-y += msgbuf.h
-generic-y += mutex.h
generic-y += param.h
generic-y += percpu.h
generic-y += pgalloc.h
--- a/arch/c6x/include/asm/mutex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_C6X_MUTEX_H
-#define _ASM_C6X_MUTEX_H
-
-#include <asm-generic/mutex-null.h>
-
-#endif /* _ASM_C6X_MUTEX_H */
--- a/arch/cris/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/frv/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/h8300/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/hexagon/include/asm/mutex.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#include <asm-generic/mutex-xchg.h>
--- a/arch/ia64/include/asm/mutex.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * ia64 implementation of the mutex fastpath.
- *
- * Copyright (C) 2006 Ken Chen <kenneth.w.chen@xxxxxxxxx>
- *
- */
-
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
- fail_fn(count);
-}
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
- return -1;
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the count from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, then the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- int ret = ia64_fetchadd4_rel(count, 1);
- if (unlikely(ret < 0))
- fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
- if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
- return 1;
- return 0;
-}
-
-#endif
--- a/arch/m32r/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += mman.h
-generic-y += mutex.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += resource.h
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -27,7 +27,6 @@ generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += msgbuf.h
-generic-y += mutex.h
generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
--- a/arch/microblaze/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -9,7 +9,6 @@ generic-y += irq_work.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
-generic-y += mutex.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
--- a/arch/mn10300/include/asm/mutex.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Mutex fastpath
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@xxxxxxxxxx)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- *
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#include <asm-generic/mutex-null.h>
--- a/arch/nios2/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
--- a/arch/openrisc/include/asm/mutex.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * OpenRISC Linux
- *
- * Linux architectural port borrowing liberally from similar works of
- * others. All original copyrights apply as per the original source
- * declaration.
- *
- * OpenRISC implementation:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@xxxxxxxxx>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@xxxxxxxxxxxx>
- * et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
-generic-y += mutex.h
generic-y += param.h
generic-y += percpu.h
generic-y += poll.h
--- a/arch/powerpc/include/asm/mutex.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm
- */
-#ifndef _ASM_POWERPC_MUTEX_H
-#define _ASM_POWERPC_MUTEX_H
-
-static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new)
-{
- int t;
-
- __asm__ __volatile__ (
-"1: lwarx %0,0,%1 # mutex trylock\n\
- cmpw 0,%0,%2\n\
- bne- 2f\n"
- PPC405_ERR77(0,%1)
-" stwcx. %3,0,%1\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- "\n\
-2:"
- : "=&r" (t)
- : "r" (&v->counter), "r" (old), "r" (new)
- : "cc", "memory");
-
- return t;
-}
-
-static inline int __mutex_dec_return_lock(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%1 # mutex lock\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "memory");
-
- return t;
-}
-
-static inline int __mutex_inc_return_unlock(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%1 # mutex unlock\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "memory");
-
- return t;
-}
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(__mutex_dec_return_lock(count) < 0))
- fail_fn(count);
-}
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(__mutex_dec_return_lock(count) < 0))
- return -1;
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the count from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(__mutex_inc_return_unlock(count) <= 0))
- fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to 0, and return 1 (success), or if the count
- * was not 1, then return 0 (failure).
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
- if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
- return 1;
- return 0;
-}
-
-#endif
--- a/arch/s390/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/score/include/asm/mutex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_MUTEX_H
-#define _ASM_SCORE_MUTEX_H
-
-#include <asm-generic/mutex-dec.h>
-
-#endif /* _ASM_SCORE_MUTEX_H */
--- a/arch/sh/include/asm/mutex-llsc.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * arch/sh/include/asm/mutex-llsc.h
- *
- * SH-4A optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef __ASM_SH_MUTEX_LLSC_H
-#define __ASM_SH_MUTEX_LLSC_H
-
-/*
- * Attempting to lock a mutex on SH4A is done like in ARMv6+ architecure.
- * with a bastardized atomic decrement (it is not a reliable atomic decrement
- * but it satisfies the defined semantics for our purpose, while being
- * smaller and faster than a real atomic decrement or atomic swap.
- * The idea is to attempt decrementing the lock value only once. If once
- * decremented it isn't zero, or if its store-back fails due to a dispute
- * on the exclusive store, we simply bail out immediately through the slow
- * path where the lock will be reattempted until it succeeds.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- int __done, __res;
-
- __asm__ __volatile__ (
- "movli.l @%2, %0 \n"
- "add #-1, %0 \n"
- "movco.l %0, @%2 \n"
- "movt %1 \n"
- : "=&z" (__res), "=&r" (__done)
- : "r" (&(count)->counter)
- : "t");
-
- if (unlikely(!__done || __res != 0))
- fail_fn(count);
-}
-
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
- int __done, __res;
-
- __asm__ __volatile__ (
- "movli.l @%2, %0 \n"
- "add #-1, %0 \n"
- "movco.l %0, @%2 \n"
- "movt %1 \n"
- : "=&z" (__res), "=&r" (__done)
- : "r" (&(count)->counter)
- : "t");
-
- if (unlikely(!__done || __res != 0))
- __res = -1;
-
- return __res;
-}
-
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- int __done, __res;
-
- __asm__ __volatile__ (
- "movli.l @%2, %0 \n\t"
- "add #1, %0 \n\t"
- "movco.l %0, @%2 \n\t"
- "movt %1 \n\t"
- : "=&z" (__res), "=&r" (__done)
- : "r" (&(count)->counter)
- : "t");
-
- if (unlikely(!__done || __res <= 0))
- fail_fn(count);
-}
-
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/*
- * For __mutex_fastpath_trylock we do an atomic decrement and check the
- * result and put it in the __res variable.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
- int __res, __orig;
-
- __asm__ __volatile__ (
- "1: movli.l @%2, %0 \n\t"
- "dt %0 \n\t"
- "movco.l %0,@%2 \n\t"
- "bf 1b \n\t"
- "cmp/eq #0,%0 \n\t"
- "bt 2f \n\t"
- "mov #0, %1 \n\t"
- "bf 3f \n\t"
- "2: mov #1, %1 \n\t"
- "3: "
- : "=&z" (__orig), "=&r" (__res)
- : "r" (&count->counter)
- : "t");
-
- return __res;
-}
-#endif /* __ASM_SH_MUTEX_LLSC_H */
--- a/arch/sh/include/asm/mutex.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#if defined(CONFIG_CPU_SH4A)
-#include <asm/mutex-llsc.h>
-#else
-#include <asm-generic/mutex-dec.h>
-#endif
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += module.h
-generic-y += mutex.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += serial.h
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -21,7 +21,6 @@ generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += msgbuf.h
-generic-y += mutex.h
generic-y += param.h
generic-y += parport.h
generic-y += poll.h
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
-generic-y += mutex.h
generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
--- a/arch/unicore32/include/asm/mutex.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * linux/arch/unicore32/include/asm/mutex.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * UniCore optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef __UNICORE_MUTEX_H__
-#define __UNICORE_MUTEX_H__
-
-# include <asm-generic/mutex-xchg.h>
-#endif
--- a/arch/x86/include/asm/mutex.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_X86_32
-# include <asm/mutex_32.h>
-#else
-# include <asm/mutex_64.h>
-#endif
--- a/arch/x86/include/asm/mutex_32.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@xxxxxxxxxx>
- */
-#ifndef _ASM_X86_MUTEX_32_H
-#define _ASM_X86_MUTEX_32_H
-
-#include <asm/alternative.h>
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-#define __mutex_fastpath_lock(count, fail_fn) \
-do { \
- unsigned int dummy; \
- \
- typecheck(atomic_t *, count); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " decl (%%eax)\n" \
- " jns 1f \n" \
- " call " #fail_fn "\n" \
- "1:\n" \
- : "=a" (dummy) \
- : "a" (count) \
- : "memory", "ecx", "edx"); \
-} while (0)
-
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_dec_return(count) < 0))
- return -1;
- else
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value
- * to 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-#define __mutex_fastpath_unlock(count, fail_fn) \
-do { \
- unsigned int dummy; \
- \
- typecheck(atomic_t *, count); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " incl (%%eax)\n" \
- " jg 1f\n" \
- " call " #fail_fn "\n" \
- "1:\n" \
- : "=a" (dummy) \
- : "a" (count) \
- : "memory", "ecx", "edx"); \
-} while (0)
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
- int (*fail_fn)(atomic_t *))
-{
- /* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
- return 1;
-
- return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_32_H */
--- a/arch/x86/include/asm/mutex_64.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@xxxxxxxxxx>
- */
-#ifndef _ASM_X86_MUTEX_64_H
-#define _ASM_X86_MUTEX_64_H
-
-/**
- * __mutex_fastpath_lock - decrement and call function if negative
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is negative
- *
- * Atomically decrements @v and calls <fail_fn> if the result is negative.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_lock(atomic_t *v,
- void (*fail_fn)(atomic_t *))
-{
- asm_volatile_goto(LOCK_PREFIX " decl %0\n"
- " jns %l[exit]\n"
- : : "m" (v->counter)
- : "memory", "cc"
- : exit);
- fail_fn(v);
-exit:
- return;
-}
-#else
-#define __mutex_fastpath_lock(v, fail_fn) \
-do { \
- unsigned long dummy; \
- \
- typecheck(atomic_t *, v); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " decl (%%rdi)\n" \
- " jns 1f \n" \
- " call " #fail_fn "\n" \
- "1:" \
- : "=D" (dummy) \
- : "D" (v) \
- : "rax", "rsi", "rdx", "rcx", \
- "r8", "r9", "r10", "r11", "memory"); \
-} while (0)
-#endif
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_dec_return(count) < 0))
- return -1;
- else
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - increment and call function if nonpositive
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is nonpositive
- *
- * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_unlock(atomic_t *v,
- void (*fail_fn)(atomic_t *))
-{
- asm_volatile_goto(LOCK_PREFIX " incl %0\n"
- " jg %l[exit]\n"
- : : "m" (v->counter)
- : "memory", "cc"
- : exit);
- fail_fn(v);
-exit:
- return;
-}
-#else
-#define __mutex_fastpath_unlock(v, fail_fn) \
-do { \
- unsigned long dummy; \
- \
- typecheck(atomic_t *, v); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " incl (%%rdi)\n" \
- " jg 1f\n" \
- " call " #fail_fn "\n" \
- "1:" \
- : "=D" (dummy) \
- : "D" (v) \
- : "rax", "rsi", "rdx", "rcx", \
- "r8", "r9", "r10", "r11", "memory"); \
-} while (0)
-#endif
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
- * if it wasn't 1 originally. [the fallback function is never used on
- * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
- int (*fail_fn)(atomic_t *))
-{
- if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
- return 1;
-
- return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_64_H */
--- a/arch/xtensa/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/include/asm-generic/mutex-dec.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * include/asm-generic/mutex-dec.h
- *
- * Generic implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- */
-#ifndef _ASM_GENERIC_MUTEX_DEC_H
-#define _ASM_GENERIC_MUTEX_DEC_H
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(atomic_dec_return_acquire(count) < 0))
- fail_fn(count);
-}
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_dec_return_acquire(count) < 0))
- return -1;
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the count from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, then the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(atomic_inc_return_release(count) <= 0))
- fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
- if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
- return 1;
- return 0;
-}
-
-#endif
--- a/include/asm-generic/mutex-null.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * include/asm-generic/mutex-null.h
- *
- * Generic implementation of the mutex fastpath, based on NOP :-)
- *
- * This is used by the mutex-debugging infrastructure, but it can also
- * be used by architectures that (for whatever reason) want to use the
- * spinlock based slowpath.
- */
-#ifndef _ASM_GENERIC_MUTEX_NULL_H
-#define _ASM_GENERIC_MUTEX_NULL_H
-
-#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count)
-#define __mutex_fastpath_lock_retval(count) (-1)
-#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count)
-#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count)
-#define __mutex_slowpath_needs_to_unlock() 1
-
-#endif
--- a/include/asm-generic/mutex-xchg.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * include/asm-generic/mutex-xchg.h
- *
- * Generic implementation of the mutex fastpath, based on xchg().
- *
- * NOTE: An xchg based implementation might be less optimal than an atomic
- * decrement/increment based implementation. If your architecture
- * has a reasonable atomic dec/inc then you should probably use
- * asm-generic/mutex-dec.h instead, or you could open-code an
- * optimized version in asm/mutex.h.
- */
-#ifndef _ASM_GENERIC_MUTEX_XCHG_H
-#define _ASM_GENERIC_MUTEX_XCHG_H
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(atomic_xchg(count, 0) != 1))
- /*
- * We failed to acquire the lock, so mark it contended
- * to ensure that any waiting tasks are woken up by the
- * unlock slow path.
- */
- if (likely(atomic_xchg_acquire(count, -1) != 1))
- fail_fn(count);
-}
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_xchg_acquire(count, 0) != 1))
- if (likely(atomic_xchg(count, -1) != 1))
- return -1;
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <function>
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than one.
- * If the implementation sets it to a value of lower than one, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(atomic_xchg_release(count, 1) != 0))
- fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock() 0
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: spinlock based trylock implementation
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
- int prev;
-
- if (atomic_read(count) != 1)
- return 0;
-
- prev = atomic_xchg_acquire(count, 0);
- if (unlikely(prev < 0)) {
- /*
- * The lock was marked contended so we must restore that
- * state. If while doing so we get back a prev value of 1
- * then we just own it.
- *
- * [ In the rare case of the mutex going to 1, to 0, to -1
- * and then back to 0 in this few-instructions window,
- * this has the potential to trigger the slowpath for the
- * owner's unlock path needlessly, but that's not a problem
- * in practice. ]
- */
- prev = atomic_xchg_acquire(count, prev);
- if (prev < 0)
- prev = 0;
- }
-
- return prev;
-}
-
-#endif
--- a/include/asm-generic/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_GENERIC_MUTEX_H
-#define __ASM_GENERIC_MUTEX_H
-/*
- * Pull in the generic implementation for the mutex fastpath,
- * which is a reasonable default on many architectures.
- */
-
-#include <asm-generic/mutex-dec.h>
-#endif /* __ASM_GENERIC_MUTEX_H */
--- a/include/linux/mutex-debug.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __LINUX_MUTEX_DEBUG_H
-#define __LINUX_MUTEX_DEBUG_H
-
-#include <linux/linkage.h>
-#include <linux/lockdep.h>
-#include <linux/debug_locks.h>
-
-/*
- * Mutexes - debugging helpers:
- */
-
-#define __DEBUG_MUTEX_INITIALIZER(lockname) \
- , .magic = &lockname
-
-#define mutex_init(mutex) \
-do { \
- static struct lock_class_key __key; \
- \
- __mutex_init((mutex), #mutex, &__key); \
-} while (0)
-
-extern void mutex_destroy(struct mutex *lock);
-
-#endif
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -18,6 +18,7 @@
#include <linux/atomic.h>
#include <asm/processor.h>
#include <linux/osq_lock.h>
+#include <linux/debug_locks.h>

/*
* Simple, straightforward mutexes with strict semantics:
@@ -48,16 +49,12 @@
* locks and tasks (and only those tasks)
*/
struct mutex {
- /* 1: unlocked, 0: locked, negative: locked, possible waiters */
- atomic_t count;
+ atomic_long_t owner;
spinlock_t wait_lock;
- struct list_head wait_list;
-#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
- struct task_struct *owner;
-#endif
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
struct optimistic_spin_queue osq; /* Spinner MCS lock */
#endif
+ struct list_head wait_list;
#ifdef CONFIG_DEBUG_MUTEXES
void *magic;
#endif
@@ -66,6 +63,11 @@ struct mutex {
#endif
};

+static inline struct task_struct *__mutex_owner(struct mutex *lock)
+{
+ return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
+}
+
/*
* This is the control structure for tasks blocked on mutex,
* which resides on the blocked task's kernel stack:
@@ -79,9 +81,20 @@ struct mutex_waiter {
};

#ifdef CONFIG_DEBUG_MUTEXES
-# include <linux/mutex-debug.h>
+
+#define __DEBUG_MUTEX_INITIALIZER(lockname) \
+ , .magic = &lockname
+
+extern void mutex_destroy(struct mutex *lock);
+
#else
+
# define __DEBUG_MUTEX_INITIALIZER(lockname)
+
+static inline void mutex_destroy(struct mutex *lock) {}
+
+#endif
+
/**
* mutex_init - initialize the mutex
* @mutex: the mutex to be initialized
@@ -90,14 +103,12 @@ struct mutex_waiter {
*
* It is not allowed to initialize an already locked mutex.
*/
-# define mutex_init(mutex) \
-do { \
- static struct lock_class_key __key; \
- \
- __mutex_init((mutex), #mutex, &__key); \
+#define mutex_init(mutex) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __mutex_init((mutex), #mutex, &__key); \
} while (0)
-static inline void mutex_destroy(struct mutex *lock) {}
-#endif

#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
@@ -107,7 +118,7 @@ static inline void mutex_destroy(struct
#endif

#define __MUTEX_INITIALIZER(lockname) \
- { .count = ATOMIC_INIT(1) \
+ { .owner = ATOMIC_LONG_INIT(0) \
, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
__DEBUG_MUTEX_INITIALIZER(lockname) \
@@ -127,7 +138,10 @@ extern void __mutex_init(struct mutex *l
*/
static inline int mutex_is_locked(struct mutex *lock)
{
- return atomic_read(&lock->count) != 1;
+ /*
+ * XXX think about spin_is_locked
+ */
+ return __mutex_owner(lock) != NULL;
}

/*
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -73,21 +73,8 @@ void debug_mutex_unlock(struct mutex *lo
{
if (likely(debug_locks)) {
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-
- if (!lock->owner)
- DEBUG_LOCKS_WARN_ON(!lock->owner);
- else
- DEBUG_LOCKS_WARN_ON(lock->owner != current);
-
DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
}
-
- /*
- * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
- * mutexes so that we can do it here after we've verified state.
- */
- mutex_clear_owner(lock);
- atomic_set(&lock->count, 1);
}

void debug_mutex_init(struct mutex *lock, const char *name,
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -27,16 +27,6 @@ extern void debug_mutex_unlock(struct mu
extern void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key);

-static inline void mutex_set_owner(struct mutex *lock)
-{
- WRITE_ONCE(lock->owner, current);
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
- WRITE_ONCE(lock->owner, NULL);
-}
-
#define spin_lock_mutex(lock, flags) \
do { \
struct mutex *l = container_of(lock, struct mutex, wait_lock); \
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -33,35 +33,90 @@
*/
#ifdef CONFIG_DEBUG_MUTEXES
# include "mutex-debug.h"
-# include <asm-generic/mutex-null.h>
-/*
- * Must be 0 for the debug case so we do not do the unlock outside of the
- * wait_lock region. debug_mutex_unlock() will do the actual unlock in this
- * case.
- */
-# undef __mutex_slowpath_needs_to_unlock
-# define __mutex_slowpath_needs_to_unlock() 0
#else
# include "mutex.h"
-# include <asm/mutex.h>
#endif

void
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
- atomic_set(&lock->count, 1);
+ atomic_long_set(&lock->owner, 0);
spin_lock_init(&lock->wait_lock);
INIT_LIST_HEAD(&lock->wait_list);
- mutex_clear_owner(lock);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
osq_lock_init(&lock->osq);
#endif

debug_mutex_init(lock, name, key);
}
-
EXPORT_SYMBOL(__mutex_init);

+#define MUTEX_FLAG_WAITERS 0x01
+
+#define MUTEX_FLAGS 0x03
+
+static inline struct task_struct *__owner_task(unsigned long owner)
+{
+ return (struct task_struct *)(owner & ~MUTEX_FLAGS);
+}
+
+static inline unsigned long __owner_flags(unsigned long owner)
+{
+ return owner & MUTEX_FLAGS;
+}
+
+/*
+ * Actual trylock that will work on any unlocked state.
+ */
+static inline bool __mutex_trylock(struct mutex *lock)
+{
+ unsigned long owner, curr = (unsigned long)current;
+
+ owner = atomic_long_read(&lock->owner);
+ for (;;) { /* must loop, can race against a flag */
+ unsigned long old;
+
+ if (__owner_task(owner)) {
+ if ((unsigned long)__owner_task(owner) == curr)
+ return true;
+
+ return false;
+ }
+
+ curr |= __owner_flags(owner);
+ old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr);
+ if (old == owner)
+ return true;
+
+ owner = old;
+ }
+}
+
+/*
+ * Optimistic trylock that only works in the uncontended case. Make sure to
+ * follow with a __mutex_trylock() before failing.
+ */
+static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
+{
+ unsigned long owner, curr = (unsigned long)current;
+
+ owner = atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr);
+ if (!owner)
+ return true;
+
+ return false;
+}
+
+static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag)
+{
+ atomic_long_or(flag, &lock->owner);
+}
+
+static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag)
+{
+ atomic_long_andnot(flag, &lock->owner);
+}
+
#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* We split the mutex lock/unlock logic into separate fastpath and
@@ -69,7 +124,7 @@ EXPORT_SYMBOL(__mutex_init);
* We also put the fastpath first in the kernel image, to make sure the
* branch is predicted by the CPU as default-untaken.
*/
-__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
+static void __sched __mutex_lock_slowpath(struct mutex *lock);

/**
* mutex_lock - acquire the mutex
@@ -95,14 +150,10 @@ __visible void __sched __mutex_lock_slow
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
- /*
- * The locking fastpath is the 1->0 transition from
- * 'unlocked' into 'locked' state.
- */
- __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
- mutex_set_owner(lock);
-}

+ if (!__mutex_trylock_fast(lock))
+ __mutex_lock_slowpath(lock);
+}
EXPORT_SYMBOL(mutex_lock);
#endif

@@ -176,7 +227,7 @@ ww_mutex_set_context_fastpath(struct ww_
/*
* Check if lock is contended, if not there is nobody to wake up
*/
- if (likely(atomic_read(&lock->base.count) == 0))
+ if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
return;

/*
@@ -227,7 +278,7 @@ bool mutex_spin_on_owner(struct mutex *l
bool ret = true;

rcu_read_lock();
- while (lock->owner == owner) {
+ while (__mutex_owner(lock) == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
* checking lock->owner still matches owner. If that fails,
@@ -260,7 +311,7 @@ static inline int mutex_can_spin_on_owne
return 0;

rcu_read_lock();
- owner = READ_ONCE(lock->owner);
+ owner = __mutex_owner(lock);
if (owner)
retval = owner->on_cpu;
rcu_read_unlock();
@@ -272,15 +323,6 @@ static inline int mutex_can_spin_on_owne
}

/*
- * Atomically try to take the lock when it is available
- */
-static inline bool mutex_try_to_acquire(struct mutex *lock)
-{
- return !mutex_is_locked(lock) &&
- (atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
-}
-
-/*
* Optimistic spinning.
*
* We try to spin for acquisition when we find that the lock owner
@@ -342,12 +384,12 @@ static bool mutex_optimistic_spin(struct
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
- owner = READ_ONCE(lock->owner);
+ owner = __mutex_owner(lock);
if (owner && !mutex_spin_on_owner(lock, owner))
break;

/* Try to acquire the mutex if it is unlocked. */
- if (mutex_try_to_acquire(lock)) {
+ if (__mutex_trylock(lock)) {
lock_acquired(&lock->dep_map, ip);

if (use_ww_ctx) {
@@ -357,7 +399,6 @@ static bool mutex_optimistic_spin(struct
ww_mutex_set_context_fastpath(ww, ww_ctx);
}

- mutex_set_owner(lock);
osq_unlock(&lock->osq);
return true;
}
@@ -406,8 +447,7 @@ static bool mutex_optimistic_spin(struct
}
#endif

-__visible __used noinline
-void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock);

/**
* mutex_unlock - release the mutex
@@ -422,21 +462,16 @@ void __sched __mutex_unlock_slowpath(ato
*/
void __sched mutex_unlock(struct mutex *lock)
{
- /*
- * The unlocking fastpath is the 0->1 transition from 'locked'
- * into 'unlocked' state:
- */
-#ifndef CONFIG_DEBUG_MUTEXES
- /*
- * When debugging is enabled we must not clear the owner before time,
- * the slow path will always be taken, and that clears the owner field
- * after verifying that it was indeed current.
- */
- mutex_clear_owner(lock);
+ unsigned long owner;
+
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(__mutex_owner(lock) != current);
#endif
- __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
-}

+ owner = atomic_long_fetch_and(MUTEX_FLAGS, &lock->owner);
+ if (__owner_flags(owner))
+ __mutex_unlock_slowpath(lock);
+}
EXPORT_SYMBOL(mutex_unlock);

/**
@@ -465,15 +500,7 @@ void __sched ww_mutex_unlock(struct ww_m
lock->ctx = NULL;
}

-#ifndef CONFIG_DEBUG_MUTEXES
- /*
- * When debugging is enabled we must not clear the owner before time,
- * the slow path will always be taken, and that clears the owner field
- * after verifying that it was indeed current.
- */
- mutex_clear_owner(&lock->base);
-#endif
- __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
+ mutex_unlock(&lock->base);
}
EXPORT_SYMBOL(ww_mutex_unlock);

@@ -520,7 +547,7 @@ __mutex_lock_common(struct mutex *lock,
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);

- if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
+ if (__mutex_trylock(lock) || mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
/* got the lock, yay! */
preempt_enable();
return 0;
@@ -529,11 +556,9 @@ __mutex_lock_common(struct mutex *lock,
spin_lock_mutex(&lock->wait_lock, flags);

/*
- * Once more, try to acquire the lock. Only try-lock the mutex if
- * it is unlocked to reduce unnecessary xchg() operations.
+ * Once more, try to acquire the lock.
*/
- if (!mutex_is_locked(lock) &&
- (atomic_xchg_acquire(&lock->count, 0) == 1))
+ if (__mutex_trylock(lock))
goto skip_wait;

debug_mutex_lock_common(lock, &waiter);
@@ -543,21 +568,20 @@ __mutex_lock_common(struct mutex *lock,
list_add_tail(&waiter.list, &lock->wait_list);
waiter.task = task;

+ if (list_first_entry(&lock->wait_list, struct mutex_waiter, list) == &waiter) {
+ __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
+ /*
+ * We must be sure to set WAITERS before attempting the trylock
+ * below, such that mutex_unlock() must either see our WAITERS
+ * or we see its unlock.
+ */
+ smp_mb__after_atomic();
+ }
+
lock_contended(&lock->dep_map, ip);

for (;;) {
- /*
- * Lets try to take the lock again - this is needed even if
- * we get here for the first time (shortly after failing to
- * acquire the lock), to make sure that we get a wakeup once
- * it's unlocked. Later on, if we sleep, this is the
- * operation that gives us the lock. We xchg it to -1, so
- * that when we release the lock, we properly wake up the
- * other waiters. We only attempt the xchg if the count is
- * non-negative in order to avoid unnecessary xchg operations:
- */
- if (atomic_read(&lock->count) >= 0 &&
- (atomic_xchg_acquire(&lock->count, -1) == 1))
+ if (__mutex_trylock(lock))
break;

/*
@@ -587,13 +611,13 @@ __mutex_lock_common(struct mutex *lock,
mutex_remove_waiter(lock, &waiter, task);
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
- atomic_set(&lock->count, 0);
+ __mutex_clear_flag(lock, MUTEX_FLAG_WAITERS);
+
debug_mutex_free_waiter(&waiter);

skip_wait:
/* got the lock - cleanup and rejoice! */
lock_acquired(&lock->dep_map, ip);
- mutex_set_owner(lock);

if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
@@ -631,7 +655,6 @@ _mutex_lock_nest_lock(struct mutex *lock
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
0, nest, _RET_IP_, NULL, 0);
}
-
EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);

int __sched
@@ -650,7 +673,6 @@ mutex_lock_interruptible_nested(struct m
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
subclass, NULL, _RET_IP_, NULL, 0);
}
-
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);

static inline int
@@ -715,29 +737,13 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interr
/*
* Release the lock, slowpath:
*/
-static inline void
-__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock)
{
unsigned long flags;
WAKE_Q(wake_q);

- /*
- * As a performance measurement, release the lock before doing other
- * wakeup related duties to follow. This allows other tasks to acquire
- * the lock sooner, while still handling cleanups in past unlock calls.
- * This can be done as we do not enforce strict equivalence between the
- * mutex counter and wait_list.
- *
- *
- * Some architectures leave the lock unlocked in the fastpath failure
- * case, others need to leave it locked. In the later case we have to
- * unlock it here - as the lock counter is currently 0 or negative.
- */
- if (__mutex_slowpath_needs_to_unlock())
- atomic_set(&lock->count, 1);
-
spin_lock_mutex(&lock->wait_lock, flags);
- mutex_release(&lock->dep_map, nested, _RET_IP_);
+ mutex_release(&lock->dep_map, 0, _RET_IP_);
debug_mutex_unlock(lock);

if (!list_empty(&lock->wait_list)) {
@@ -754,17 +760,6 @@ __mutex_unlock_common_slowpath(struct mu
wake_up_q(&wake_q);
}

-/*
- * Release the lock, slowpath:
- */
-__visible void
-__mutex_unlock_slowpath(atomic_t *lock_count)
-{
- struct mutex *lock = container_of(lock_count, struct mutex, count);
-
- __mutex_unlock_common_slowpath(lock, 1);
-}
-
#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* Here come the less common (and hence less performance-critical) APIs:
@@ -789,38 +784,29 @@ __mutex_lock_interruptible_slowpath(stru
*/
int __sched mutex_lock_interruptible(struct mutex *lock)
{
- int ret;
-
might_sleep();
- ret = __mutex_fastpath_lock_retval(&lock->count);
- if (likely(!ret)) {
- mutex_set_owner(lock);
+
+ if (__mutex_trylock_fast(lock))
return 0;
- } else
- return __mutex_lock_interruptible_slowpath(lock);
+
+ return __mutex_lock_interruptible_slowpath(lock);
}

EXPORT_SYMBOL(mutex_lock_interruptible);

int __sched mutex_lock_killable(struct mutex *lock)
{
- int ret;
-
might_sleep();
- ret = __mutex_fastpath_lock_retval(&lock->count);
- if (likely(!ret)) {
- mutex_set_owner(lock);
+
+ if (__mutex_trylock_fast(lock))
return 0;
- } else
- return __mutex_lock_killable_slowpath(lock);
+
+ return __mutex_lock_killable_slowpath(lock);
}
EXPORT_SYMBOL(mutex_lock_killable);

-__visible void __sched
-__mutex_lock_slowpath(atomic_t *lock_count)
+static void __sched __mutex_lock_slowpath(struct mutex *lock)
{
- struct mutex *lock = container_of(lock_count, struct mutex, count);
-
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
NULL, _RET_IP_, NULL, 0);
}
@@ -856,37 +842,6 @@ __ww_mutex_lock_interruptible_slowpath(s

#endif

-/*
- * Spinlock based trylock, we take the spinlock and check whether we
- * can get the lock:
- */
-static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
-{
- struct mutex *lock = container_of(lock_count, struct mutex, count);
- unsigned long flags;
- int prev;
-
- /* No need to trylock if the mutex is locked. */
- if (mutex_is_locked(lock))
- return 0;
-
- spin_lock_mutex(&lock->wait_lock, flags);
-
- prev = atomic_xchg_acquire(&lock->count, -1);
- if (likely(prev == 1)) {
- mutex_set_owner(lock);
- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
- }
-
- /* Set it back to 0 if there are no waiters: */
- if (likely(list_empty(&lock->wait_list)))
- atomic_set(&lock->count, 0);
-
- spin_unlock_mutex(&lock->wait_lock, flags);
-
- return prev == 1;
-}
-
/**
* mutex_trylock - try to acquire the mutex, without waiting
* @lock: the mutex to be acquired
@@ -903,13 +858,7 @@ static inline int __mutex_trylock_slowpa
*/
int __sched mutex_trylock(struct mutex *lock)
{
- int ret;
-
- ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
- if (ret)
- mutex_set_owner(lock);
-
- return ret;
+ return __mutex_trylock(lock);
}
EXPORT_SYMBOL(mutex_trylock);

@@ -917,36 +866,28 @@ EXPORT_SYMBOL(mutex_trylock);
int __sched
__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- int ret;
-
might_sleep();

- ret = __mutex_fastpath_lock_retval(&lock->base.count);
-
- if (likely(!ret)) {
+ if (__mutex_trylock_fast(&lock->base)) {
ww_mutex_set_context_fastpath(lock, ctx);
- mutex_set_owner(&lock->base);
- } else
- ret = __ww_mutex_lock_slowpath(lock, ctx);
- return ret;
+ return 0;
+ }
+
+ return __ww_mutex_lock_slowpath(lock, ctx);
}
EXPORT_SYMBOL(__ww_mutex_lock);

int __sched
__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- int ret;
-
might_sleep();

- ret = __mutex_fastpath_lock_retval(&lock->base.count);
-
- if (likely(!ret)) {
+ if (__mutex_trylock_fast(&lock->base)) {
ww_mutex_set_context_fastpath(lock, ctx);
- mutex_set_owner(&lock->base);
- } else
- ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx);
- return ret;
+ return 0;
+ }
+
+ return __ww_mutex_lock_interruptible_slowpath(lock, ctx);
}
EXPORT_SYMBOL(__ww_mutex_lock_interruptible);

--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -16,32 +16,6 @@
#define mutex_remove_waiter(lock, waiter, task) \
__list_del((waiter)->list.prev, (waiter)->list.next)

-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-/*
- * The mutex owner can get read and written to locklessly.
- * We should use WRITE_ONCE when writing the owner value to
- * avoid store tearing, otherwise, a thread could potentially
- * read a partially written and incomplete owner value.
- */
-static inline void mutex_set_owner(struct mutex *lock)
-{
- WRITE_ONCE(lock->owner, current);
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
- WRITE_ONCE(lock->owner, NULL);
-}
-#else
-static inline void mutex_set_owner(struct mutex *lock)
-{
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
-}
-#endif
-
#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
#define debug_mutex_free_waiter(waiter) do { } while (0)
#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -75,11 +75,11 @@
#include <linux/compiler.h>
#include <linux/frame.h>
#include <linux/prefetch.h>
+#include <linux/mutex.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
-#include <asm/mutex.h>
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#endif